From ef038743f1c015b13287abcb87f7d63717f45b1b Mon Sep 17 00:00:00 2001
From: Tao Luo <luotao02@baidu.com>
Date: Fri, 18 Jan 2019 14:34:39 +0800
Subject: [PATCH] remove legacy python code

---
 python/paddle/trainer/PyDataProvider2.py      |  541 --
 .../paddle/trainer/PyDataProviderWrapper.py   |  749 --
 python/paddle/trainer/__init__.py             |   13 -
 python/paddle/trainer/config_parser.py        | 4447 ----------
 .../paddle/trainer/config_parser_extension.py |   39 -
 python/paddle/trainer/recurrent_units.py      |  357 -
 .../paddle/trainer_config_helpers/__init__.py |   25 -
 .../trainer_config_helpers/activations.py     |  263 -
 python/paddle/trainer_config_helpers/attrs.py |  291 -
 .../config_parser_utils.py                    |   51 -
 .../trainer_config_helpers/data_sources.py    |  213 -
 .../default_decorators.py                     |  164 -
 .../trainer_config_helpers/evaluators.py      |  813 --
 .../trainer_config_helpers/layer_math.py      |  113 -
 .../paddle/trainer_config_helpers/layers.py   | 7610 -----------------
 .../paddle/trainer_config_helpers/networks.py | 1813 ----
 .../trainer_config_helpers/optimizers.py      |  447 -
 .../paddle/trainer_config_helpers/poolings.py |  148 -
 .../tests/CMakeLists.txt                      |   17 -
 .../tests/ProtobufEqualMain.cpp               |   59 -
 .../tests/configs/.gitignore                  |    1 -
 .../tests/configs/file_list.sh                |   17 -
 .../tests/configs/generate_protostr.sh        |   27 -
 .../tests/configs/img_layers.py               |   38 -
 .../tests/configs/img_trans_layers.py         |   38 -
 .../tests/configs/last_first_seq.py           |   35 -
 .../tests/configs/layer_activations.py        |   34 -
 .../tests/configs/math_ops.py                 |   42 -
 .../tests/configs/projections.py              |   80 -
 .../configs/protostr/img_layers.protostr      |  193 -
 .../protostr/img_trans_layers.protostr        |  193 -
 .../configs/protostr/last_first_seq.protostr  |  102 -
 .../protostr/layer_activations.protostr       |  423 -
 .../tests/configs/protostr/math_ops.protostr  |  413 -
 .../configs/protostr/projections.protostr     |  466 -
 .../tests/configs/protostr/shared_fc.protostr |  125 -
 .../configs/protostr/shared_gru.protostr      |  289 -
 .../configs/protostr/shared_lstm.protostr     |  385 -
 .../protostr/simple_rnn_layers.protostr       |  424 -
 .../protostr/test_BatchNorm3D.protostr        |   93 -
 .../protostr/test_bi_grumemory.protostr       |  155 -
 .../protostr/test_bilinear_interp.protostr    |  137 -
 .../configs/protostr/test_clip_layer.protostr |   31 -
 .../protostr/test_conv3d_layer.protostr       |  132 -
 .../protostr/test_cost_layers.protostr        |  375 -
 .../test_cost_layers_with_weight.protostr     |  162 -
 .../test_cross_entropy_over_beam.protostr     |  207 -
 .../protostr/test_deconv3d_layer.protostr     |  132 -
 .../test_detection_output_layer.protostr      |   66 -
 .../protostr/test_dot_prod_layer.protostr     |   38 -
 .../protostr/test_expand_layer.protostr       |   56 -
 .../test_factorization_machine.protostr       |   39 -
 .../tests/configs/protostr/test_fc.protostr   |   98 -
 .../protostr/test_gated_unit_layer.protostr   |  106 -
 .../protostr/test_grumemory_layer.protostr    |   51 -
 .../configs/protostr/test_hsigmoid.protostr   |   62 -
 .../test_kmax_seq_socre_layer.protostr        |   59 -
 .../protostr/test_l2_distance_layer.protostr  |   39 -
 .../protostr/test_lstmemory_layer.protostr    |   53 -
 .../configs/protostr/test_maxout.protostr     |  233 -
 .../test_multibox_loss_layer.protostr         |   79 -
 .../protostr/test_multiplex_layer.protostr    |   63 -
 .../configs/protostr/test_ntm_layers.protostr |  225 -
 .../tests/configs/protostr/test_pad.protostr  |  122 -
 .../protostr/test_pooling3D_layer.protostr    |  123 -
 .../protostr/test_prelu_layer.protostr        |  144 -
 .../protostr/test_print_layer.protostr        |   27 -
 .../protostr/test_recursive_topology.protostr |  593 --
 .../protostr/test_repeat_layer.protostr       |   42 -
 .../protostr/test_resize_layer.protostr       |   27 -
 .../configs/protostr/test_rnn_group.protostr  |  738 --
 .../protostr/test_roi_pool_layer.protostr     |  100 -
 .../configs/protostr/test_row_conv.protostr   |   41 -
 .../protostr/test_row_l2_norm_layer.protostr  |   27 -
 .../protostr/test_scale_shift_layer.protostr  |   72 -
 .../test_scale_sub_region_layer.protostr      |   51 -
 .../protostr/test_seq_concat_reshape.protostr |   51 -
 .../protostr/test_seq_slice_layer.protostr    |   79 -
 .../protostr/test_sequence_pooling.protostr   |  162 -
 .../configs/protostr/test_smooth_l1.protostr  |   40 -
 .../protostr/test_split_datasource.protostr   |   72 -
 .../configs/protostr/test_spp_layer.protostr  |   40 -
 .../test_sub_nested_seq_select_layer.protostr |   37 -
 .../configs/protostr/unused_layers.protostr   |   27 -
 .../configs/protostr/util_layers.protostr     |   87 -
 .../tests/configs/run_tests.sh                |   44 -
 .../tests/configs/shared_fc.py                |   43 -
 .../tests/configs/shared_gru.py               |   54 -
 .../tests/configs/shared_lstm.py              |   56 -
 .../tests/configs/simple_rnn_layers.py        |   51 -
 .../tests/configs/test_BatchNorm3D.py         |   25 -
 .../tests/configs/test_bi_grumemory.py        |   21 -
 .../tests/configs/test_bilinear_interp.py     |   41 -
 .../tests/configs/test_clip_layer.py          |   20 -
 .../test_config_parser_for_non_file_config.py |   51 -
 .../tests/configs/test_conv3d_layer.py        |   63 -
 .../tests/configs/test_cost_layers.py         |   61 -
 .../configs/test_cost_layers_with_weight.py   |   33 -
 .../tests/configs/test_crop.py                |   35 -
 .../configs/test_cross_entropy_over_beam.py   |   45 -
 .../tests/configs/test_deconv3d_layer.py      |   64 -
 .../configs/test_detection_output_layer.py    |   37 -
 .../tests/configs/test_dot_prod_layer.py      |   21 -
 .../tests/configs/test_expand_layer.py        |   28 -
 .../configs/test_factorization_machine.py     |   21 -
 .../tests/configs/test_fc.py                  |   30 -
 .../tests/configs/test_gated_unit_layer.py    |   30 -
 .../tests/configs/test_grumemory_layer.py     |   27 -
 .../tests/configs/test_hsigmoid.py            |   22 -
 .../configs/test_kmax_seq_socre_layer.py      |    9 -
 .../tests/configs/test_l2_distance_layer.py   |   21 -
 .../tests/configs/test_lstmemory_layer.py     |   27 -
 .../tests/configs/test_maxout.py              |   56 -
 .../tests/configs/test_multibox_loss_layer.py |   39 -
 .../tests/configs/test_multiplex_layer.py     |   26 -
 .../tests/configs/test_ntm_layers.py          |   44 -
 .../tests/configs/test_pad.py                 |   34 -
 .../tests/configs/test_pooling3D_layer.py     |   52 -
 .../tests/configs/test_prelu_layer.py         |   24 -
 .../tests/configs/test_print_layer.py         |   23 -
 .../tests/configs/test_recursive_topology.py  |   30 -
 .../tests/configs/test_repeat_layer.py        |   25 -
 .../tests/configs/test_resize_layer.py        |   20 -
 .../tests/configs/test_rnn_group.py           |   62 -
 .../tests/configs/test_roi_pool_layer.py      |   37 -
 .../tests/configs/test_row_conv.py            |   23 -
 .../tests/configs/test_row_l2_norm_layer.py   |   20 -
 .../tests/configs/test_scale_shift_layer.py   |   23 -
 .../configs/test_scale_sub_region_layer.py    |   25 -
 .../tests/configs/test_seq_concat_reshape.py  |   26 -
 .../tests/configs/test_seq_slice_layer.py     |   13 -
 .../tests/configs/test_sequence_pooling.py    |   43 -
 .../tests/configs/test_smooth_l1.py           |   21 -
 .../tests/configs/test_split_datasource.py    |   24 -
 .../tests/configs/test_spp_layer.py           |   24 -
 .../test_sub_nested_seq_select_layer.py       |   11 -
 .../tests/configs/unused_layers.py            |   25 -
 .../tests/configs/util_layers.py              |   27 -
 .../tests/layers_test.py                      |   20 -
 .../tests/layers_test_config.py               |   86 -
 .../tests/test_reset_hook.py                  |   29 -
 python/paddle/trainer_config_helpers/utils.py |   33 -
 python/paddle/v2/__init__.py                  |  156 -
 python/paddle/v2/activation.py                |   26 -
 python/paddle/v2/attr.py                      |   29 -
 python/paddle/v2/config_base.py               |   68 -
 python/paddle/v2/data_feeder.py               |  133 -
 python/paddle/v2/data_type.py                 |   27 -
 python/paddle/v2/dataset/__init__.py          |   46 -
 python/paddle/v2/dataset/cifar.py             |  148 -
 python/paddle/v2/dataset/common.py            |  236 -
 python/paddle/v2/dataset/conll05.py           |  257 -
 python/paddle/v2/dataset/flowers.py           |  218 -
 python/paddle/v2/dataset/imdb.py              |  148 -
 python/paddle/v2/dataset/imikolov.py          |  161 -
 python/paddle/v2/dataset/mnist.py             |  129 -
 python/paddle/v2/dataset/movielens.py         |  262 -
 python/paddle/v2/dataset/mq2007.py            |  333 -
 python/paddle/v2/dataset/sentiment.py         |  141 -
 python/paddle/v2/dataset/tests/cifar_test.py  |   56 -
 python/paddle/v2/dataset/tests/common_test.py |   94 -
 .../paddle/v2/dataset/tests/flowers_test.py   |   51 -
 python/paddle/v2/dataset/tests/imdb_test.py   |   57 -
 .../paddle/v2/dataset/tests/imikolov_test.py  |   67 -
 python/paddle/v2/dataset/tests/mnist_test.py  |   44 -
 python/paddle/v2/dataset/tests/mq2007_test.py |   33 -
 .../paddle/v2/dataset/tests/test_sentiment.py |   55 -
 .../paddle/v2/dataset/tests/voc2012_test.py   |   42 -
 python/paddle/v2/dataset/tests/wmt16_test.py  |   66 -
 python/paddle/v2/dataset/uci_housing.py       |  134 -
 python/paddle/v2/dataset/voc2012.py           |   85 -
 python/paddle/v2/dataset/wmt14.py             |  181 -
 python/paddle/v2/dataset/wmt16.py             |  352 -
 python/paddle/v2/evaluator.py                 |   36 -
 python/paddle/v2/event.py                     |  113 -
 python/paddle/v2/image.py                     |  380 -
 python/paddle/v2/inference.py                 |  172 -
 python/paddle/v2/layer.py                     |  326 -
 python/paddle/v2/master/.gitignore            |    3 -
 python/paddle/v2/master/__init__.py           |   17 -
 python/paddle/v2/master/client.py             |   95 -
 python/paddle/v2/minibatch.py                 |   43 -
 python/paddle/v2/networks.py                  |   33 -
 python/paddle/v2/op.py                        |  120 -
 python/paddle/v2/optimizer.py                 |  297 -
 python/paddle/v2/parameters.py                |  441 -
 python/paddle/v2/plot/__init__.py             |   17 -
 python/paddle/v2/plot/plot.py                 |   82 -
 python/paddle/v2/plot/tests/CMakeLists.txt    |    5 -
 python/paddle/v2/plot/tests/__init__.py       |   16 -
 python/paddle/v2/plot/tests/test_ploter.py    |   40 -
 python/paddle/v2/pooling.py                   |   26 -
 python/paddle/v2/reader/__init__.py           |   74 -
 python/paddle/v2/reader/creator.py            |  130 -
 python/paddle/v2/reader/decorator.py          |  405 -
 python/paddle/v2/reader/tests/CMakeLists.txt  |    2 -
 python/paddle/v2/reader/tests/__init__.py     |   13 -
 python/paddle/v2/reader/tests/creator_test.py |   74 -
 .../paddle/v2/reader/tests/decorator_test.py  |  178 -
 .../v2/reader/tests/test_data_creator.txt     |    3 -
 .../v2/reader/tests/test_reader_recordio.dat  |  Bin 76 -> 0 bytes
 .../v2/reader/tests/test_recordio_creator.dat |  Bin 88 -> 0 bytes
 python/paddle/v2/tests/CMakeLists.txt         |    8 -
 python/paddle/v2/tests/cat.jpg                |  Bin 57218 -> 0 bytes
 python/paddle/v2/tests/test_data_feeder.py    |  267 -
 python/paddle/v2/tests/test_image.py          |   43 -
 python/paddle/v2/tests/test_layer.py          |  290 -
 python/paddle/v2/tests/test_op.py             |   51 -
 .../paddle/v2/tests/test_paramconf_order.py   |   99 -
 python/paddle/v2/tests/test_parameters.py     |  143 -
 python/paddle/v2/tests/test_rnn_layer.py      |  166 -
 python/paddle/v2/tests/test_topology.py       |   85 -
 python/paddle/v2/topology.py                  |  145 -
 python/paddle/v2/trainer.py                   |  258 -
 214 files changed, 37347 deletions(-)
 delete mode 100644 python/paddle/trainer/PyDataProvider2.py
 delete mode 100644 python/paddle/trainer/PyDataProviderWrapper.py
 delete mode 100644 python/paddle/trainer/__init__.py
 delete mode 100644 python/paddle/trainer/config_parser.py
 delete mode 100644 python/paddle/trainer/config_parser_extension.py
 delete mode 100644 python/paddle/trainer/recurrent_units.py
 delete mode 100644 python/paddle/trainer_config_helpers/__init__.py
 delete mode 100644 python/paddle/trainer_config_helpers/activations.py
 delete mode 100644 python/paddle/trainer_config_helpers/attrs.py
 delete mode 100644 python/paddle/trainer_config_helpers/config_parser_utils.py
 delete mode 100644 python/paddle/trainer_config_helpers/data_sources.py
 delete mode 100644 python/paddle/trainer_config_helpers/default_decorators.py
 delete mode 100644 python/paddle/trainer_config_helpers/evaluators.py
 delete mode 100644 python/paddle/trainer_config_helpers/layer_math.py
 delete mode 100644 python/paddle/trainer_config_helpers/layers.py
 delete mode 100644 python/paddle/trainer_config_helpers/networks.py
 delete mode 100644 python/paddle/trainer_config_helpers/optimizers.py
 delete mode 100644 python/paddle/trainer_config_helpers/poolings.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/CMakeLists.txt
 delete mode 100644 python/paddle/trainer_config_helpers/tests/ProtobufEqualMain.cpp
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/.gitignore
 delete mode 100755 python/paddle/trainer_config_helpers/tests/configs/file_list.sh
 delete mode 100755 python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/img_layers.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/layer_activations.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/math_ops.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/projections.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/layer_activations.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/shared_fc.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_bi_grumemory.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_bilinear_interp.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_clip_layer.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_conv3d_layer.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers_with_weight.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_deconv3d_layer.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_detection_output_layer.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_dot_prod_layer.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_expand_layer.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_factorization_machine.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_fc.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_gated_unit_layer.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_grumemory_layer.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_hsigmoid.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_l2_distance_layer.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_lstmemory_layer.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_multibox_loss_layer.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_multiplex_layer.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_ntm_layers.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_pad.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_pooling3D_layer.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_print_layer.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_recursive_topology.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_repeat_layer.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_resize_layer.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_conv.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_l2_norm_layer.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_scale_shift_layer.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_scale_sub_region_layer.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_concat_reshape.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_smooth_l1.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_split_datasource.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_spp_layer.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_sub_nested_seq_select_layer.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/unused_layers.protostr
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/util_layers.protostr
 delete mode 100755 python/paddle/trainer_config_helpers/tests/configs/run_tests.sh
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/shared_fc.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/shared_gru.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_BatchNorm3D.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_bi_grumemory.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_clip_layer.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_config_parser_for_non_file_config.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_conv3d_layer.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_crop.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_deconv3d_layer.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_detection_output_layer.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_dot_prod_layer.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_factorization_machine.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_fc.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_gated_unit_layer.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_l2_distance_layer.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_maxout.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_multibox_loss_layer.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_multiplex_layer.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_pad.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_recursive_topology.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_repeat_layer.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_resize_layer.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_row_conv.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_row_l2_norm_layer.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_scale_shift_layer.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_scale_sub_region_layer.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_seq_concat_reshape.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_split_datasource.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_sub_nested_seq_select_layer.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/unused_layers.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/configs/util_layers.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/layers_test.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/layers_test_config.py
 delete mode 100644 python/paddle/trainer_config_helpers/tests/test_reset_hook.py
 delete mode 100644 python/paddle/trainer_config_helpers/utils.py
 delete mode 100644 python/paddle/v2/__init__.py
 delete mode 100644 python/paddle/v2/activation.py
 delete mode 100644 python/paddle/v2/attr.py
 delete mode 100644 python/paddle/v2/config_base.py
 delete mode 100644 python/paddle/v2/data_feeder.py
 delete mode 100644 python/paddle/v2/data_type.py
 delete mode 100644 python/paddle/v2/dataset/__init__.py
 delete mode 100644 python/paddle/v2/dataset/cifar.py
 delete mode 100644 python/paddle/v2/dataset/common.py
 delete mode 100644 python/paddle/v2/dataset/conll05.py
 delete mode 100644 python/paddle/v2/dataset/flowers.py
 delete mode 100644 python/paddle/v2/dataset/imdb.py
 delete mode 100644 python/paddle/v2/dataset/imikolov.py
 delete mode 100644 python/paddle/v2/dataset/mnist.py
 delete mode 100644 python/paddle/v2/dataset/movielens.py
 delete mode 100644 python/paddle/v2/dataset/mq2007.py
 delete mode 100644 python/paddle/v2/dataset/sentiment.py
 delete mode 100644 python/paddle/v2/dataset/tests/cifar_test.py
 delete mode 100644 python/paddle/v2/dataset/tests/common_test.py
 delete mode 100644 python/paddle/v2/dataset/tests/flowers_test.py
 delete mode 100644 python/paddle/v2/dataset/tests/imdb_test.py
 delete mode 100644 python/paddle/v2/dataset/tests/imikolov_test.py
 delete mode 100644 python/paddle/v2/dataset/tests/mnist_test.py
 delete mode 100644 python/paddle/v2/dataset/tests/mq2007_test.py
 delete mode 100644 python/paddle/v2/dataset/tests/test_sentiment.py
 delete mode 100644 python/paddle/v2/dataset/tests/voc2012_test.py
 delete mode 100644 python/paddle/v2/dataset/tests/wmt16_test.py
 delete mode 100644 python/paddle/v2/dataset/uci_housing.py
 delete mode 100644 python/paddle/v2/dataset/voc2012.py
 delete mode 100644 python/paddle/v2/dataset/wmt14.py
 delete mode 100644 python/paddle/v2/dataset/wmt16.py
 delete mode 100644 python/paddle/v2/evaluator.py
 delete mode 100644 python/paddle/v2/event.py
 delete mode 100644 python/paddle/v2/image.py
 delete mode 100644 python/paddle/v2/inference.py
 delete mode 100644 python/paddle/v2/layer.py
 delete mode 100644 python/paddle/v2/master/.gitignore
 delete mode 100644 python/paddle/v2/master/__init__.py
 delete mode 100644 python/paddle/v2/master/client.py
 delete mode 100644 python/paddle/v2/minibatch.py
 delete mode 100644 python/paddle/v2/networks.py
 delete mode 100644 python/paddle/v2/op.py
 delete mode 100644 python/paddle/v2/optimizer.py
 delete mode 100644 python/paddle/v2/parameters.py
 delete mode 100644 python/paddle/v2/plot/__init__.py
 delete mode 100644 python/paddle/v2/plot/plot.py
 delete mode 100644 python/paddle/v2/plot/tests/CMakeLists.txt
 delete mode 100644 python/paddle/v2/plot/tests/__init__.py
 delete mode 100644 python/paddle/v2/plot/tests/test_ploter.py
 delete mode 100644 python/paddle/v2/pooling.py
 delete mode 100644 python/paddle/v2/reader/__init__.py
 delete mode 100644 python/paddle/v2/reader/creator.py
 delete mode 100644 python/paddle/v2/reader/decorator.py
 delete mode 100644 python/paddle/v2/reader/tests/CMakeLists.txt
 delete mode 100644 python/paddle/v2/reader/tests/__init__.py
 delete mode 100644 python/paddle/v2/reader/tests/creator_test.py
 delete mode 100644 python/paddle/v2/reader/tests/decorator_test.py
 delete mode 100644 python/paddle/v2/reader/tests/test_data_creator.txt
 delete mode 100644 python/paddle/v2/reader/tests/test_reader_recordio.dat
 delete mode 100644 python/paddle/v2/reader/tests/test_recordio_creator.dat
 delete mode 100644 python/paddle/v2/tests/CMakeLists.txt
 delete mode 100644 python/paddle/v2/tests/cat.jpg
 delete mode 100644 python/paddle/v2/tests/test_data_feeder.py
 delete mode 100644 python/paddle/v2/tests/test_image.py
 delete mode 100644 python/paddle/v2/tests/test_layer.py
 delete mode 100644 python/paddle/v2/tests/test_op.py
 delete mode 100644 python/paddle/v2/tests/test_paramconf_order.py
 delete mode 100644 python/paddle/v2/tests/test_parameters.py
 delete mode 100644 python/paddle/v2/tests/test_rnn_layer.py
 delete mode 100644 python/paddle/v2/tests/test_topology.py
 delete mode 100644 python/paddle/v2/topology.py
 delete mode 100644 python/paddle/v2/trainer.py

diff --git a/python/paddle/trainer/PyDataProvider2.py b/python/paddle/trainer/PyDataProvider2.py
deleted file mode 100644
index 05635833bf..0000000000
--- a/python/paddle/trainer/PyDataProvider2.py
+++ /dev/null
@@ -1,541 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import cPickle
-import logging
-import collections
-import functools
-import itertools
-
-logging.basicConfig(format="[%(levelname)s %(asctime)s %(filename)s:%(lineno)s]"
-                    " %(message)s")
-
-
-class SequenceType(object):
-    NO_SEQUENCE = 0
-    SEQUENCE = 1
-    SUB_SEQUENCE = 2
-
-    @classmethod
-    def tostring(cls, value):
-        for k in cls.__dict__:
-            if not k.startswith('__'):
-                if getattr(cls, k) == value:
-                    return cls.__name__ + '.' + k
-        return 'INVALID(' + str(value) + ')'
-
-
-# TODO(yuyang18): Add string data type here.
-class DataType(object):
-    Dense = 0
-    SparseNonValue = 1
-    SparseValue = 2
-    Index = 3
-
-    @classmethod
-    def tostring(cls, value):
-        for k in cls.__dict__:
-            if not k.startswith('__'):
-                if getattr(cls, k) == value:
-                    return cls.__name__ + '.' + k
-        return 'INVALID(' + str(value) + ')'
-
-
-class CacheType(object):
-    NO_CACHE = 0  # No cache at all
-
-    # First pass, read data from python.  And store them in memory. Read from
-    # memory during rest passes.
-    CACHE_PASS_IN_MEM = 1
-
-
-class InputType(object):
-    """
-    InputType is the base class for paddle input types.
-
-    ..  note::
-
-        this is a base class, and should never be used by user.
-
-    :param dim: dimension of input. If the input is an integer, it means the
-                value range. Otherwise, it means the size of layer.
-    :type dim: int
-    :param seq_type: sequence type of input. 0 means it is not a sequence. 1
-                     means it is a variable length sequence. 2 means it is a
-                     nested sequence.
-    :type seq_type: int
-    :param type: data type of input.
-    :type type: int
-    """
-    __slots__ = ['dim', 'seq_type', 'type']
-
-    def __init__(self, dim, seq_type, tp):
-        self.dim = dim
-        self.seq_type = seq_type
-        self.type = tp
-
-    def __repr__(self):
-        """
-        Return a human readable representation like 'InputType(dim=25921, 
-            seq_type=SequenceType.NO_SEQUENCE, type=DataType.Dense)'
-        """
-        repr_str = type(self).__name__
-        repr_str += '('
-        serialize_func_map = {
-            'dim': repr,
-            'seq_type': SequenceType.tostring,
-            'type': DataType.tostring
-        }
-        for idx, k in enumerate(self.__slots__):
-            if idx != 0:
-                repr_str += ', '
-            repr_str += (
-                k + '=' + serialize_func_map.get(k, repr)(getattr(self, k)))
-        repr_str += ')'
-        return repr_str
-
-
-def dense_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
-    """
-    Dense Array. It means the input feature is dense array with float type.
-    For example, if the input is an image with 28*28 pixels, the input of
-    Paddle neural network could be a dense vector with dimension 784 or a
-    numpy array with shape (28, 28).
-
-    For the 2-D convolution operation, each sample in one mini-batch must have
-    the similarly size in PaddlePaddle now. But, it supports variable-dimension
-    feature across mini-batch. For the variable-dimension, the param dim is not
-    used. While the data reader must yield numpy array and the data feeder will
-    set the data shape correctly.
-
-    :param dim: dimension of this vector.
-    :type dim: int
-    :param seq_type: sequence type of input.
-    :type seq_type: int
-    :return: An input type object.
-    :rtype: InputType
-    """
-    return InputType(dim, seq_type, DataType.Dense)
-
-
-def sparse_non_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
-    """
-    Sparse binary vector. It means the input feature is a sparse vector and the
-    every element in this vector is either zero or one.
-
-    :param dim: dimension of this vector.
-    :type dim: int
-    :param seq_type: sequence type of this input.
-    :type seq_type: int
-    :return: An input type object.
-    :rtype: InputType
-    """
-    return InputType(dim, seq_type, DataType.SparseNonValue)
-
-
-def sparse_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
-    """
-    Sparse vector. It means the input feature is a sparse vector. Most of the
-    elements in this vector are zero, others could be any float value.
-
-    :param dim: dimension of this vector.
-    :type dim: int
-    :param seq_type: sequence type of this input.
-    :type seq_type: int
-    :return: An input type object.
-    :rtype: InputType
-    """
-    return InputType(dim, seq_type, DataType.SparseValue)
-
-
-def index_slot(value_range, seq_type=SequenceType.NO_SEQUENCE):
-    """
-    Data type of integer.
-
-    :param seq_type: sequence type of this input.
-    :type seq_type: int
-    :param value_range: range of this integer.
-    :type value_range: int
-    :return: An input type object
-    :rtype: InputType
-    """
-    return InputType(value_range, seq_type, DataType.Index)
-
-
-dense_vector = dense_slot
-sparse_binary_vector = sparse_non_value_slot
-sparse_float_vector = sparse_value_slot
-integer_value = index_slot
-
-# dense_array can be used for variable-length input feature.
-# Each feature is not a vector, but a multi-dimensional array.
-dense_array = dense_slot
-
-
-def dense_vector_sequence(dim):
-    """
-    Data type of a sequence of dense vector.
-
-    :param dim: dimension of dense vector.
-    :type dim: int
-    :return: An input type object
-    :rtype: InputType
-    """
-    return dense_vector(dim, seq_type=SequenceType.SEQUENCE)
-
-
-def dense_vector_sub_sequence(dim):
-    return dense_vector(dim, seq_type=SequenceType.SUB_SEQUENCE)
-
-
-def sparse_binary_vector_sequence(dim):
-    """
-    Data type of a sequence of sparse vector, which every element is either zero
-     or one.
-
-    :param dim: dimension of sparse vector.
-    :type dim: int
-    :return: An input type object
-    :rtype: InputType
-    """
-    return sparse_binary_vector(dim, seq_type=SequenceType.SEQUENCE)
-
-
-def sparse_binary_vector_sub_sequence(dim):
-    return sparse_binary_vector(dim, seq_type=SequenceType.SUB_SEQUENCE)
-
-
-def sparse_float_vector_sequence(dim):
-    """
-    Data type of a sequence of sparse vector, which most elements are zero,
-    others could be any float value.
-
-    :param dim: dimension of sparse vector.
-    :type dim: int
-    :return: An input type object
-    :rtype: InputType
-    """
-    return sparse_float_vector(dim, seq_type=SequenceType.SEQUENCE)
-
-
-def sparse_float_vector_sub_sequence(dim):
-    return sparse_float_vector(dim, seq_type=SequenceType.SUB_SEQUENCE)
-
-
-def integer_value_sequence(value_range):
-    """
-    Data type of a sequence of integer.
-
-    :param value_range: range of each element.
-    :type value_range: int
-    """
-    return integer_value(value_range, seq_type=SequenceType.SEQUENCE)
-
-
-def integer_value_sub_sequence(dim):
-    return integer_value(dim, seq_type=SequenceType.SUB_SEQUENCE)
-
-
-integer_sequence = integer_value_sequence
-
-
-class SingleSlotWrapper(object):
-    def __init__(self, generator):
-        self.generator = generator
-
-    def __call__(self, obj, filename):
-        for item in self.generator(obj, filename):
-            if isinstance(item, dict):
-                yield item
-            else:
-                yield [item]
-
-
-class InputOrderWrapper(object):
-    def __init__(self, generator, input_order):
-        self.generator = generator
-        self.input_order = input_order
-
-    def __call__(self, obj, filename):
-        for item in self.generator(obj, filename):
-            if isinstance(item, dict):
-                yield [
-                    item.get(input_name, None)
-                    for input_name in self.input_order
-                ]
-            else:
-                yield item
-
-
-class CheckWrapper(object):
-    def __init__(self, generator, input_types, check_fail_continue, logger):
-        self.generator = generator
-        self.input_types = input_types
-        self.check_fail_continue = check_fail_continue
-        self.logger = logger
-
-    def __call__(self, obj, filename):
-        for items in self.generator(obj, filename):
-            try:
-                assert len(items) == len(self.input_types)
-                assert len(filter(lambda x: x is None, items)) == 0
-                for item, input_type in itertools.izip(items, self.input_types):
-                    callback = functools.partial(CheckWrapper.loop_callback,
-                                                 input_type)
-
-                    for _ in xrange(input_type.seq_type):
-                        callback = functools.partial(CheckWrapper.loop_check,
-                                                     callback)
-                    callback(item)
-
-                yield items
-            except AssertionError as e:
-                self.logger.warning(
-                    "Item (%s) is not fit the input type with error %s" %
-                    (repr(item), repr(e)))
-
-                if self.check_fail_continue:
-                    continue
-                else:
-                    raise
-
-    @staticmethod
-    def loop_callback(input_type, each):
-        assert isinstance(input_type, InputType)
-        if input_type.type == DataType.Dense:
-            assert isinstance(each, collections.Sequence)
-            for d in each:
-                assert isinstance(d, float)
-            assert len(each) == input_type.dim
-        elif input_type.type == DataType.Index:
-            assert isinstance(each, int)
-            assert each < input_type.dim
-        elif input_type.type == DataType.SparseNonValue \
-                or input_type.type == DataType.SparseValue:
-            assert isinstance(each, collections.Sequence)
-            sparse_id = set()
-            for k in each:
-                if input_type.type == DataType.SparseValue:
-                    k, v = k
-                    assert isinstance(v, float)
-                assert isinstance(k, int)
-                assert k < input_type.dim
-                sparse_id.add(k)
-            assert len(sparse_id) == len(each)
-        else:
-            raise RuntimeError("Not support input type")
-
-    @staticmethod
-    def loop_check(callback, item):
-        for each in item:
-            callback(each)
-
-
-class CheckInputTypeWrapper(object):
-    def __init__(self, generator, input_types, logger):
-        self.generator = generator
-        self.input_types = input_types
-        self.logger = logger
-
-    def __call__(self, obj, filename):
-        for items in self.generator(obj, filename):
-            try:
-                # dict type is required for input_types when item is dict type
-                assert (isinstance(items, dict) and \
-                        not isinstance(self.input_types, dict))==False
-                yield items
-            except AssertionError as e:
-                self.logger.error(
-                    "%s type is required for input type but got %s" %
-                    (repr(type(items)), repr(type(self.input_types))))
-                raise
-
-
-def provider(input_types=None,
-             should_shuffle=None,
-             pool_size=-1,
-             min_pool_size=-1,
-             can_over_batch_size=True,
-             calc_batch_size=None,
-             cache=CacheType.NO_CACHE,
-             check=False,
-             check_fail_continue=False,
-             init_hook=None,
-             **outter_kwargs):
-    """
-    Provider decorator. Use it to make a function into PyDataProvider2 object.
-    In this function, user only need to get each sample for some train/test
-    file.
-
-    The basic usage is:
-
-    ..  code-block:: python
-
-        @provider(some data provider config here...)
-        def process(settings, file_name):
-            while not at end of file_name:
-                sample = readOneSampleFromFile(file_name)
-                yield sample.
-
-    The configuration of data provider should be setup by\:
-
-    :param input_types: Specify the input types, can also be set in init_hook.
-                        It could be a list of InputType object. For example,
-                        input_types=[dense_vector(9), integer_value(2)]. Or user
-                        can set a dict of InputType object, which key is
-                        data_layer's name. For example, input_types=\
-                        {'img': img_features, 'label': label}. when using dict of
-                        InputType, user could yield a dict of feature values, which
-                        key is also data_layer's name.
-
-    :type input_types: list|tuple|dict
-
-    :param should_shuffle: True if data should shuffle. Pass None means shuffle
-                           when is training and not to shuffle when is testing.
-    :type should_shuffle: bool
-
-    :param pool_size: Max number of sample in data pool.
-    :type pool_size: int
-
-    :param min_pool_size: Set minimal sample in data pool. The PaddlePaddle will
-                          random pick sample in pool. So the min_pool_size
-                          effect the randomize of data.
-    :type min_pool_size: int
-
-    :param can_over_batch_size: True if paddle can return a mini-batch larger
-                                than batch size in settings. It is useful when
-                                custom calculate one sample's batch_size.
-
-                                It is very danger to set it to false and use
-                                calc_batch_size together. Default is true.
-    :type can_over_batch_size: bool
-
-    :param calc_batch_size: a method to calculate each sample's batch size.
-                            Default each sample's batch size is 1. But to you
-                            can customize each sample's batch size.
-    :type calc_batch_size: callable
-
-    :param cache: Cache strategy of Data Provider. Default is CacheType.NO_CACHE
-    :type cache: int
-
-    :param init_hook: Initialize hook. Useful when data provider need load some
-                      external data like dictionary. The parameter is
-                      (settings, file_list, \*\*kwargs).
-
-                      - settings. It is the global settings object. User can set
-                        settings.input_types here.
-                      - file_list. All file names for passed to data provider.
-                      - is_train. Is this data provider used for training or not.
-                      - kwargs. Other keyword arguments passed from
-                        trainer_config's args parameter.
-    :type init_hook: callable
-
-    :param check: Check the yield data format is as same as input_types. Enable
-                  this will make data provide process slow but it is very useful
-                  for debug. Default is disabled.
-    :type check: bool
-
-    :param check_fail_continue: Continue train or not when check failed. Just
-                                drop the wrong format data when it is True. Has
-                                no effect when check set to False.
-    :type check_fail_continue: bool
-    """
-
-    def __wrapper__(generator):
-        class DataProvider(object):
-            def __init__(self, file_list, **kwargs):
-                self.logger = logging.getLogger("")
-                self.logger.setLevel(logging.INFO)
-                self.input_types = None
-                self.should_shuffle = should_shuffle
-
-                true_table = [1, 't', 'true', 'on']
-                false_table = [0, 'f', 'false', 'off']
-                if not isinstance(self.should_shuffle, bool) and \
-                                self.should_shuffle is not None:
-
-                    if isinstance(self.should_shuffle, basestring):
-                        self.should_shuffle = self.should_shuffle.lower()
-
-                    if self.should_shuffle in true_table:
-                        self.should_shuffle = True
-                    elif self.should_shuffle in false_table:
-                        self.should_shuffle = False
-                    else:
-                        self.logger.warning(
-                            "Could not recognize should_shuffle (%s), "
-                            "just use default value of should_shuffle."
-                            " Please set should_shuffle to bool value or "
-                            "something in %s" %
-                            (repr(self.should_shuffle),
-                             repr(true_table + false_table)))
-                        self.should_shuffle = None
-
-                self.pool_size = pool_size
-                self.can_over_batch_size = can_over_batch_size
-                self.calc_batch_size = calc_batch_size
-                self.file_list = file_list
-                self.generator = generator
-                self.cache = cache
-                self.min_pool_size = min_pool_size
-                self.input_order = kwargs['input_order']
-                self.check = check
-                if init_hook is not None:
-                    init_hook(self, file_list=file_list, **kwargs)
-
-                if 'slots' in outter_kwargs:
-                    self.logger.warning('setting slots value is deprecated, '
-                                        'please use input_types instead.')
-                    self.slots = outter_kwargs['slots']
-                if input_types is not None:
-                    self.slots = input_types
-
-                if self.input_types is not None:
-                    self.slots = self.input_types
-
-                assert self.slots is not None, \
-                    "Data Provider's input_types must be set"
-                assert self.generator is not None
-
-                use_dynamic_order = False
-                if isinstance(self.slots, dict):  # reorder input_types
-                    self.slots = [self.slots[ipt] for ipt in self.input_order]
-                    use_dynamic_order = True
-
-                if len(self.slots) == 1:
-                    self.generator = SingleSlotWrapper(self.generator)
-
-                if use_dynamic_order:
-                    self.generator = InputOrderWrapper(self.generator,
-                                                       self.input_order)
-                else:
-                    self.generator = CheckInputTypeWrapper(
-                        self.generator, self.slots, self.logger)
-                if self.check:
-                    self.generator = CheckWrapper(self.generator, self.slots,
-                                                  check_fail_continue,
-                                                  self.logger)
-
-        return DataProvider
-
-    return __wrapper__
-
-
-def deserialize_args(args):
-    """
-    Internal use only.
-    :param args:
-    :return:
-    """
-    return cPickle.loads(args)
diff --git a/python/paddle/trainer/PyDataProviderWrapper.py b/python/paddle/trainer/PyDataProviderWrapper.py
deleted file mode 100644
index 374976db9f..0000000000
--- a/python/paddle/trainer/PyDataProviderWrapper.py
+++ /dev/null
@@ -1,749 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-This module provide a wrapper(decorator) to wrap a data process method into a
-PyDataProvider. Some examples are shown `here <data_provider/python_case.html>`_.
-"""
-
-import struct
-import array
-import random
-import gc
-import logging
-import pstats
-import sys
-import numpy
-import functools
-
-__all__ = [
-    'DenseSlot', 'SlotType', 'SparseNonValueSlot', 'StringSlot',
-    'SparseValueSlot', 'IndexSlot', 'PoolSize', 'GeneralPyDataProvider',
-    'provider', 'init_hook_wrapper'
-]
-
-try:  # Just for profile mode, will try to import cProfile first.
-    # Most python will contains cProfile, cProfile/profile are basically same.
-    # ref: https://docs.python.org/2/library/profile.html#introduction-to-the-profilers
-    import cProfile as profile
-except ImportError:
-    import profile
-
-try:
-    import cPickle as pickle
-except ImportError:
-    import six.moves.cPickle as pickle
-
-import io
-
-
-class SlotType(object):  # Just a hint for user.
-    pass
-
-
-class DenseSlot(SlotType):
-    """
-    Dense Slot Type: Each item is the value of a Dense Vector.
-
-    Its yield format for :code:`provider` is:
-
-    - **NonSeq**: [float, float, ... ]
-    - **Seq**: [[float, float, ...], [float, float ....], ... ]
-    - **SubSeq**: [[[float, float, ...], [float ....], ...] ,  \
-                   [[float, float, ...], [float ....], ...] , ...]
-    """
-
-    def __init__(self, dim):
-        """
-        :param dim: slot dimension
-        :type dim: int
-        """
-        self.dim = dim
-        self.type = 0
-
-
-class SparseNonValueSlot(SlotType):
-    """
-    Sparse NonValue Slot Type: Each item is the id of a Sparse Vector.
-
-    Its yield format for :code:`provider` is:
-
-    - **NonSeq**: [int, int, ...]
-    - **Seq**: [[int, int, ...], [int, int, ...], ... ]
-    - **SubSeq**: [[[int, int, ...], [int, ....], ...] ,  \
-                   [[int, int, ...], [int, ....], ...] , ...]
-    """
-
-    def __init__(self, dim):
-        """
-        :param dim: slot dimension
-        :type dim: int
-        """
-        self.dim = dim
-        self.type = 1
-
-
-class SparseValueSlot(SlotType):
-    """
-    Sparse Value Slot Type: Each item is the id and value of a Sparse Vector.
-
-    Its yield format for :code:`provider` is:
-
-    - **NonSeq**: [(int, float), (int, float), ... ]
-    - **Seq**: [[(int,float), (int, float), ... ], \
-                [(int, float), (int, float), ...], ... ]
-    - **SubSeq**: [[[(int,float), ...], [(int, float), ....], ...] ,  \
-                   [[(int,float), ...], [(int, float), ....], ...] , ...]
-    """
-
-    def __init__(self, dim):
-        """
-        :param dim: slot dimension.
-        :type dim: int
-        """
-        self.dim = dim
-        self.type = 2
-
-
-class IndexSlot(SlotType):
-    """
-    Index Value Slot Type: Each item is the id of Label.
-
-    Its yield format for :code:`provider` is:
-
-    - **NonSeq**: int
-    - **Seq**:  [int, int, ....]
-    - **SubSeq**: [[int, int, ...], [int, int, ...], ... ]
-    """
-
-    def __init__(self, dim):
-        """
-        :param dim: slot dimension
-        :type dim: int
-        """
-        self.dim = dim
-        self.type = 3
-
-
-class StringSlot(SlotType):
-    """
-    String Value Slot Type: Each item is a string for printout, \
-                            can be used in DataLayer too.
-
-    Its yield format for :code:`provider` is:
-
-    - **NonSeq**: string
-    - **Seq**: [string, string, ....]
-    - **SubSeq**:  [[string, string, ...], [string, string, ...], ... ]
-    """
-
-    def __init__(self, dim):
-        """
-        :param dim: slot dimension
-        :type dim: string
-        """
-        self.dim = dim
-        self.type = 6
-
-
-class SparseNonValueHandler(object):
-    """
-    Private Class, Use for converting python object to paddle string.
-    """
-
-    def __init__(self):
-        self.offsets = []
-        self.value = []
-        self.offset_count = 0
-
-    def __call__(self, ele):
-        """
-        It will be invoked when scan each sparse data.
-
-        :param ele: list of sparse data, maybe non-value [ idx, ... ] or value.
-                    [ (idx, val), ... ]
-        :type ele: list
-        """
-        self.offsets.append(self.offset_count)
-        self.offset_count += len(ele)
-        self.processElement(ele)
-
-    def processElement(self, ele):
-        """
-        Process for element list. See __call__ for more document.
-        """
-        self.value += ele
-
-    def done(self, data_stream, int_packer):
-        """
-        Dump data to stream.
-        :param data_stream: Output Stream.
-        :param int_packer:  A struct.Struct("i") object
-        """
-        data_stream.write(array.array("i", self.offsets).tostring())
-        data_stream.write(int_packer.pack(self.offset_count))
-        data_stream.write(array.array("i", self.value).tostring())
-
-
-class SparseValueHandler(SparseNonValueHandler):
-    """
-    Private class, use for converting python obj to paddle string.
-    """
-
-    def __init__(self):
-        SparseNonValueHandler.__init__(self)
-        self.weight = []
-
-    def processElement(self, ele):
-        for idx, w in ele:
-            self.value.append(idx)
-            self.weight.append(w)
-
-    def done(self, data_stream, int_packer):
-        SparseNonValueHandler.done(self, data_stream, int_packer)
-        data_stream.write(int_packer.pack(self.offset_count))
-        data_stream.write(array.array("f", self.weight).tostring())
-
-
-class StringHandler(object):
-    """
-    Private Class, Use for converting python object to paddle string.
-    """
-
-    def __init__(self, data_stream, int_packer):
-        self.data_stream = data_stream
-        self.int_packer = int_packer
-
-    def __call__(self, ele):
-        """
-        It will be invoked when scan each string data.
-        :param ele: string data
-        :type ele: str
-        """
-        self.data_stream.write(self.int_packer.pack(len(ele)))
-        self.data_stream.write(array.array("c", ele).tostring())
-
-
-class GeneralPyDataProvider:
-    def __init__(self, *file_list, **kwargs):
-        """
-        :param file_list: input file_list
-        """
-        del kwargs  # unused
-        gc.disable()
-        assert isinstance(self.logger, logging.Logger)
-        self.use_seq_flag = hasattr(self, "use_seq_flag") and self.use_seq_flag
-        self.slots_num = len(self.getSlots())
-        self.file_list = list(file_list)
-        self.generators = map(self.generateData, self.file_list)
-        self.int_packer = struct.Struct("i")
-        self.head_packer = struct.Struct("ii")
-        self.float_packer = struct.Struct("f")
-        self.shuffler = lambda *args, **kwargs: None
-        self.data_pool = []
-        self.has_subseq = []
-        self.has_checked = False
-
-        self.debug = hasattr(self, "debug") and self.debug
-
-        if hasattr(self, "profile_filename") and isinstance(
-                self.profile_filename, str):
-            self.profile_count = 0
-            self.is_profile = True
-        else:
-            self.is_profile = False
-
-        if not hasattr(self, "file_count") or not isinstance(self.file_count,
-                                                             int):
-            self.file_count = sys.maxint
-
-        if not hasattr(self, "can_over_batch_size"):
-            self.can_over_batch_size = True
-        elif not self.can_over_batch_size:
-            self.logger.warn(
-                "User should ensure every data size is not larger than batch"
-                " size when can_over_batch_size = False")
-
-        self.data_pool_idx = 0
-
-    def reset(self):
-        """Reset all data in provider."""
-
-        self.logger.debug("reset dataprovider.")
-        self.generators = map(self.generateData, self.file_list)
-        self.shuffler = lambda *args, **kwargs: None
-        self.data_pool = []
-        self.data_pool_idx = 0
-        if self.file_count != 0:
-            self.max_pool_size = 0
-
-        # When use Profile, each pass will print a profile result.
-        if self.is_profile:
-            if hasattr(self, "profiler") and isinstance(self.profiler,
-                                                        profile.Profile):
-                self.profiler.disable()
-                fn = "%s_%d" % (self.profile_filename, self.profile_count)
-                sortby = "cumulative"
-                with open(fn, "w") as f:
-                    pstats.Stats(
-                        self.profiler,
-                        stream=f).sort_stats(sortby).print_stats()
-                self.logger.info("saving profile to file %s" % fn)
-                self.profile_count += 1
-            self.logger.info("resetting profile")
-            self.profiler = profile.Profile()
-            self.profiler.enable()
-
-    def shuffle(self):
-        """ shuffle data"""
-        if not self.should_shuffle:
-            return
-        else:
-            self.logger.debug("shuffling data.")
-            random.shuffle(self.generators)
-            self.shuffler = random.shuffle
-
-    def getSlots(self):
-        """
-        :return : return a list of SlotType
-        :rtype: list
-        """
-        return []
-
-    def generateData(self, fn):
-        """
-        :param fn: file name
-        :return: a generator to yield data one by one.
-        """
-        raise NotImplementedError
-
-    def calculateDataBatchSize(self, data):
-        """
-        :param data: One sample which yield by generateData
-        :type data: list
-        :return: The batch size that the data contribute.
-        :rtype: int
-        """
-        return 1
-
-    def getHeader(self):
-        """return paddle header format"""
-        ret = self.head_packer.pack(self.slots_num, self.use_seq_flag)
-        for obj in self.getSlots():
-            ret += self.head_packer.pack(obj.type, obj.dim)
-        return ret
-
-    def getHeaderNative(self):
-        return self.use_seq_flag, self.getSlots()
-
-    def getNextBatchNative(self, batch_size):
-        ret_list = []
-        self.__prepareData(batch_size, ret_list)
-        return ret_list
-
-    def getNextBatch(self, batch_size):
-        """
-        :param batch_size: the batch_size approximately return.
-        :return: return paddle pyDataProvider format, just see documents.
-        :rtype: str
-
-        NOTE: If can_over_batch_size is True, the return batch_size >= input batch_size.
-              Otherwise, the return batch_size < input batch_size, BUT USER MUST ENSURE THAT each data's batch size
-              is less than input batch_size.
-        """
-        ret_list = []
-        current_batch_size = self.__prepareData(batch_size, ret_list)
-        # create unified format for ret_list with differnt slots_num
-        if self.slots_num == 1:
-            ret_list = [ret_list]
-
-        if current_batch_size == 0:
-            return self.int_packer.pack(current_batch_size)
-        data_bytes = io.BytesIO()
-        seq_bytes = io.BytesIO()
-        subseq_bytes = io.BytesIO()
-        data_stream = io.BufferedWriter(data_bytes)
-        seq_stream = io.BufferedWriter(seq_bytes)
-        subseq_stream = io.BufferedWriter(subseq_bytes)
-
-        def convertDataImpl(idx, data_callback):
-            """
-            This method will handle sequence in return data. invoke data_callback one by one.
-            :param idx: the slot index.
-            :param data_callback: a callback, which type is (each sample) => None.
-            """
-            indices = 0
-            slot_sample_num = len(ret_list)
-            if self.use_seq_flag:
-                slot_sample_num = 0
-                if self.has_subseq[idx]:  # has sub-sequence
-                    slot_subseq_num = 0
-                    for dat in ret_list:
-                        dat = dat[idx]
-                        slot_subseq_num += len(dat)
-                        for sub_dat in dat:
-                            slot_sample_num += len(sub_dat)
-                    subseq_stream.write(self.int_packer.pack(slot_subseq_num))
-                else:
-                    for dat in ret_list:
-                        dat = dat[idx]
-                        slot_sample_num += len(dat)
-                seq_stream.write(self.int_packer.pack(len(ret_list)))
-            data_stream.write(self.int_packer.pack(slot_sample_num))
-
-            for dat in ret_list:
-                dat = dat[idx]
-                if self.use_seq_flag:
-                    seq_stream.write(self.int_packer.pack(indices))
-                    if self.has_subseq[idx]:  # has sub-sequence
-                        for sub_dat in dat:
-                            writeDataStream(sub_dat, data_callback)
-                            subseq_stream.write(self.int_packer.pack(indices))
-                            indices += len(sub_dat)
-                    else:
-                        writeDataStream(dat, data_callback)
-                        indices += len(dat)
-                else:
-                    writeDataStream(dat, data_callback)
-
-        def writeDataStream(dat, data_callback):
-            if self.use_seq_flag > 0:
-                if data_callback is None:  # Special for index slot
-                    data_stream.write(array.array("i", dat).tostring())
-                else:
-                    for ele in dat:
-                        data_callback(ele)
-            else:
-                if data_callback is None:  # Special for index slot
-                    data_stream.write(self.int_packer.pack(dat))
-                else:
-                    data_callback(dat)
-
-        try:
-            for i in range(self.slots_num):
-                slot = self.getSlots()[i]
-                # According to the data_type, each slot data will be converted to binary
-                if isinstance(slot, DenseSlot):
-                    convertDataImpl(i, lambda e: data_stream.write(
-                        array.array("f", e).tostring()))
-                elif isinstance(slot, SparseNonValueSlot):
-                    handler = SparseNonValueHandler()
-                    convertDataImpl(i, handler)
-                    handler.done(data_stream, self.int_packer)
-                elif isinstance(slot, SparseValueSlot):
-                    handler = SparseValueHandler()
-                    convertDataImpl(i, handler)
-                    handler.done(data_stream, self.int_packer)
-                elif isinstance(slot, IndexSlot):
-                    convertDataImpl(i, None)
-                elif isinstance(slot, StringSlot):
-                    handler = StringHandler(data_stream, self.int_packer)
-                    convertDataImpl(i, handler)
-                else:
-                    raise RuntimeError("The data_type must be 0/1/2/3/6")
-            data_stream.flush()
-            seq_stream.flush()
-            subseq_stream.flush()
-
-            return "".join([
-                self.int_packer.pack(current_batch_size), data_bytes.getvalue(),
-                seq_bytes.getvalue(), subseq_bytes.getvalue()
-            ])
-
-        finally:
-            data_stream.close()
-            seq_stream.close()
-            subseq_stream.close()
-            data_bytes.close()
-            seq_bytes.close()
-            subseq_bytes.close()
-
-    def hasSubseq(self, ret_list):
-        # create unified format for ret_list with differnt slots_num
-        if self.slots_num == 1:
-            ret_list = [ret_list]
-        # decide whether slot has sub-sequence using its first sample
-        for i in range(self.slots_num):
-            slot = self.getSlots()[i]
-            dat = ret_list[0][i][0]
-            if isinstance(slot, IndexSlot) or isinstance(slot, StringSlot):
-                if isinstance(dat, list) or isinstance(dat, numpy.ndarray):
-                    self.has_subseq.append(1)  # has_subseq = True
-                    continue
-            elif isinstance(dat[0], list) or isinstance(dat[0], numpy.ndarray):
-                self.has_subseq.append(1)  # has_subseq = True
-                continue
-            self.has_subseq.append(0)  # has_subseq = False
-
-    def checkOrder(self):
-        first_noSubseq_slot = self.slots_num
-        last_subseq_slot = -1
-        for i in range(self.slots_num):
-            if not self.has_subseq[i]:
-                first_noSubseq_slot = i
-                break
-        for i in range(self.slots_num):
-            if self.has_subseq[i]:
-                last_subseq_slot = i
-        if first_noSubseq_slot < last_subseq_slot:
-            raise RuntimeError(
-                "slot hasSubseq must put before than slot without subseq")
-        self.has_checked = True
-
-    def __prepareData(self, batch_size, ret_list):
-        current_batch_size = 0
-        could_exit = False
-        while not could_exit:
-            if len(self.data_pool) == 0:
-                self.data_pool_idx = 0
-                self.fillPool()
-            if len(self.data_pool) != 0:
-                for idx in xrange(self.data_pool_idx, len(self.data_pool)):
-                    current_batch_size += self.calculateDataBatchSize(
-                        self.data_pool[idx])
-                    if current_batch_size >= batch_size:
-                        could_exit = True
-                        break
-                if current_batch_size > batch_size and not self.can_over_batch_size:  # if cannot over batch size
-                    current_batch_size -= self.calculateDataBatchSize(
-                        self.data_pool[idx])
-                    idx -= 1
-
-                ret_list += self.data_pool[self.data_pool_idx:idx + 1]
-
-                # for speed reason, just shift left index, not delete data actually.
-                self.data_pool_idx = idx + 1
-
-                if self.data_pool_idx == len(self.data_pool):
-                    self.data_pool = []
-            else:
-                break
-        if self.use_seq_flag and not self.has_checked:  # compute self.has_subseq and checkOrder only at first time
-            self.hasSubseq(ret_list)
-            self.checkOrder()
-        return current_batch_size
-
-    def fillPool(self):
-        """
-        Fill the pool to max_pool_size. If max_pool_size is None, then read file_count to pool.
-        """
-        if self.max_pool_size == 0:
-            for i in xrange(min(self.file_count, len(self.generators))):
-                self.data_pool += list(self.generators[i])
-            self.generators = self.generators[min(self.file_count,
-                                                  len(self.generators)):]
-            self.max_pool_size = len(self.data_pool)
-        else:
-            while len(self.data_pool) < self.max_pool_size and len(
-                    self.generators) != 0:
-                try:
-                    self.data_pool.append(self.generators[0].next())
-                except StopIteration:
-                    self.generators.pop(0)
-        self.shuffler(self.data_pool)
-
-
-class PoolSize(object):
-    """Max number of sample which contains in provider."""
-
-    def __init__(self, pool_size):
-        self.size = pool_size
-
-
-def default_init_hook(cls, *args, **kwargs):
-    """ default hook, do nothing """
-    del cls, args, kwargs
-
-
-def provider(slots=None,
-             use_seq=False,
-             should_shuffle=True,
-             pool_size=1,
-             can_over_batch_size=True,
-             calc_batch_size=lambda data: 1,
-             debug=False,
-             init_hook=default_init_hook,
-             profile_filename=None):
-    """
-    The decorator for PyDataProvider. User should use this to create Provider class.
-    User should only concern how to read sample from file.
-
-    So the basic usage is:
-
-    ..  code-block:: python
-
-        @provider(some data provider config here...)
-        def process(obj, file_name):
-            while not at end of file_name:
-                sample = readOneSampleFromFile(file_name)
-                yield sample.
-
-    The configuration of data provider should be setup by:
-
-    :param init_hook: A callback will be invoked when PyDataProvider instance \
-                      created. The parameter is (obj, \*args, \*\*kwargs).
-
-                      - **obj**: actually data provider instance, which \
-                                 contains some global objects in obj.xxxxx, \
-                                 and is used by process function.
-
-                        1. **obj.slots**: a list of SlotType Object. Can be \
-                                          set in init. For example, obj.slots = \
-                                          [DenseSlot(9), IndexSlot(2)].
-                        2. **obj.logger**: a logger object. User can invoke \
-                                          obj.logger.info(), obj.logger.fatal(), etc.
-
-                      - **args** and **kwargs**: the data provider __init__ \
-                                                 parameters. For example, load_data_args \
-                                                 will be found in \*\*kwargs, \
-                                                 and if you want to recieve \
-                                                 it from trainer_config, \
-                                                 recommand to use init_hook_wrapper
-    :type init_hook: callable
-
-    :param pool_size:
-                      - **int**: it will read at most pool_size files to memory.
-                      - **PoolSize**: it will read at most PoolSize.size samples to memory.
-                      - If not set, it will read all the files to memory.
-    :type pool_size: int | PoolSize
-
-    :param slots: Specify the SlotTypes, can also be set in init_hook. It has two formats:
-
-                  - A list of SlotType objects. For example, slots = \
-                    [DenseSlot(9), IndexSlot(2)].
-                  - A method return a list of SlotTypes, and the parameter of \
-                    method is (obj, \*file_list, \*\*kwargs).
-    :type slots: list | callable
-
-    :param use_seq:  False if use no sequence (Default). True if use sequence:
-
-                     - If sequence has **no sub-sequence**: Each slot will \
-                       return a list of data. This list is one sequence. \
-                       So the return format likes \
-                       [[a0, a1, a2], [b1, b2, b3, b4], [c1]].
-                     - If sequence has **sub-sequence**: Each slot will return \
-                       a nested-list of data. This list contains several \
-                       sub-lists, each sub-list is one sub-sequence. \
-                       So the return format likes \
-                       [[[a0, a1, a2], [a4, a5]], [[b1, b2, b3, b4], [b5, b6]], [[c1], [c2]]].
-    :type use_seq: bool
-
-    :param should_shuffle: True if data should shuffle.
-    :type should_shuffle: bool
-
-    :param calc_batch_size: The method calculate each data's batch size.
-
-                            - Default is the batch size of one sample.
-                            - User can customize by **lamda** funtion. For example, \
-                              :code:`calc_batch_size = lambda data : len(data)` \
-                              means calculating the token number of a sequence data.
-    :type calc_batch_size: callable
-
-    :param can_over_batch_size: Whether :code:`actual batch size >= input batch size`
-
-                                - **True** (>=): getNextBatch method can return more data (Default).
-                                - **False** (<): user must ensure that each data's batch size < input batch size.
-    :type can_over_batch_size: bool
-
-    :param debug: True if enable debug logger and some debug check. Default is False.
-    :type debug: bool
-
-    :param profile_filename: None if disable profile (Default). Otherwise, \
-                             the data provider will dump profile result when \
-                             reset. And the dump filename is \
-                             **<profile_filename>_<reset_count>**.
-    :type profile_filename: None | Str
-    """
-
-    def _wrapper(handler):
-        class Cls(GeneralPyDataProvider):
-            """ Real PyDataProvider Class. """
-
-            def __init__(self, *file_list, **kwargs):
-                logging.basicConfig(
-                    format="[%(levelname)s %(asctime)s %(filename)s:%(lineno)s]"
-                    " %(message)s")
-
-                self.logger = logging.getLogger("")
-                if debug:
-                    self.logger.setLevel(logging.DEBUG)
-                    self.logger.debug("Running pydataprovider in debug mode.")
-                else:
-                    self.logger.setLevel(logging.INFO)
-
-                init_hook(self, *file_list, **kwargs)
-                if callable(slots):
-                    self.slots = slots(self, *file_list, **kwargs)
-                elif slots is not None:
-                    self.slots = slots
-
-                if isinstance(pool_size, int):
-                    self.max_pool_size = 0
-                    self.file_count = pool_size
-                elif isinstance(pool_size, PoolSize):
-                    self.max_pool_size = pool_size.size
-                    self.file_count = 0
-                else:
-                    raise RuntimeError
-                self.can_over_batch_size = can_over_batch_size
-                self.debug = debug
-                self.profile_filename = profile_filename
-                self.use_seq_flag = use_seq
-                self.should_shuffle = should_shuffle
-                GeneralPyDataProvider.__init__(self, *file_list, **kwargs)
-
-            def getSlots(self):
-                return self.slots
-
-            def generateData(self, f):
-                return handler(self, f)
-
-            def calculateDataBatchSize(self, data):
-                return calc_batch_size(data)
-
-        return Cls
-
-    return _wrapper
-
-
-def init_hook_wrapper(func):
-    """
-    Wrap a method for PyDataProviderWrapper's init_hook. This method can
-    receive parameter from trainer_config's load_data_args. The load_data_args
-    must pass a pickle.dumps() value, and dump a map as keyword args. The
-    wrapped method :code:`func` will receive them as keyword args.
-
-    So an example usage is:
-
-    ..  code-block:: python
-
-        @init_hook_wrapper
-        def hook(obj, dictionary, file_list, **kwargs):
-            obj.dictionary = dictionary
-            obj.slots = [IndexSlot(len(obj.dictionary)),
-                         IndexSlot(len(open(file_list[0], "r").readlines()))]
-
-    :param func: init_hook function
-    :type func: callable
-    :return: wrapped method, can be passed into @provider.
-    """
-
-    @functools.wraps(func)
-    def wrapper(obj, *file_list, **kwargs):
-        args = kwargs.get("load_data_args", dict())
-        if isinstance(args, basestring):
-            args = pickle.loads(args)
-        args['file_list'] = file_list
-        func(obj=obj, **args)
-
-    return wrapper
diff --git a/python/paddle/trainer/__init__.py b/python/paddle/trainer/__init__.py
deleted file mode 100644
index f662d68263..0000000000
--- a/python/paddle/trainer/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
deleted file mode 100644
index 5b90facd49..0000000000
--- a/python/paddle/trainer/config_parser.py
+++ /dev/null
@@ -1,4447 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-'''
-The following functions are available in the config file:
-
-Bias: define bias. To be used as value of bias argument in Layer().
-
-Data: define data provider.
-
-Input: define input layer for a layer. To be used as element of inputs argument
-       in Layer().
-
-Conv: define a convolution operation for an input of a layer.
-
-Norm: define a normalization operation for an input of a layer.
-
-Pool: define a pooling operation for an input of a layer.
-
-Layer: define a layer.
-
-Parameter: define a parameter.
-
-Import: import another config file. If the imported config file name is
-        a relative path, then it will be searched under the directory of the
-        current config file.
-
-Inputs(layer_names...):
-    Define the name of the input layers of the NeuralNetwork.
-    The type of these layers must be "data".
-    These layers will be provided with the DataBatch obtained
-    from DataProvider. The data streams from DataProvider must
-    have the same order.
-
-Outputs(layer_names...):
-    Define the name of the output layers of the NeuralNetwork.
-    Usually the output is simply the cost layer.
-    You can specify other layers as outputs and  calculate the
-    cost (and its derivative) yourself.
-
-
-default_initial_std(val)
-default_initial_mean(val)
-default_momentum(val):
-default_decay_rate(val): Set the default value for these parameters
-
-
-get_config_arg(name, type, default): Get the value for a config parameter.
-
-
-*** customized extension to config_parser ***
-The functionality of the config_parser can be extended.
-If the config_arg_str for parse_config() contains
-extension_module_name=[MODULE_NAME], then config_parser will call
-MODULE_NAME.get_config_funcs(g_config)
-MODULE_NAME.get_config_funcs() should return a dictionary of name to functions,
-those functions will be available in the config file.
-See legacy/trainer/tests/config_parser_test.py for example
-
-To use this from paddle_trainer, paddle_trainer should be called with
---config_args=extension_module_name=[MODULE_NAME]
-
-'''
-import copy
-import logging
-import os
-import sys
-import traceback
-import math
-import shutil
-
-try:
-    from paddle.proto.DataConfig_pb2 import DataConfig
-    from paddle.proto.ModelConfig_pb2 import ModelConfig
-    from paddle.proto.ModelConfig_pb2 import LayerConfig
-    from paddle.proto.ModelConfig_pb2 import LayerInputConfig
-    from paddle.proto.ModelConfig_pb2 import ProjectionConfig
-    from paddle.proto.ModelConfig_pb2 import OperatorConfig
-    from paddle.proto.ModelConfig_pb2 import GeneratorConfig
-    from paddle.proto.ModelConfig_pb2 import LinkConfig
-    from paddle.proto.ParameterConfig_pb2 import ParameterConfig
-    from paddle.proto.ParameterConfig_pb2 import ParameterUpdaterHookConfig
-    from paddle.proto.TrainerConfig_pb2 import TrainerConfig
-
-except Exception as e:
-    traceback.print_exc()
-    raise
-
-logging.basicConfig(
-    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s', )
-logger = logging.getLogger('paddle')
-logger.setLevel(logging.INFO)
-__real_print__ = print
-print = logger.info
-
-# from layer type name to layer class
-g_layer_type_map = {}
-
-
-# Initialize global variables. We use this function so that we can
-# call parse_config() multiple times
-def init_config_environment(
-        g_default_momentum=None,
-        g_default_decay_rate=None,
-        g_default_initial_mean=0.,
-        g_default_initial_std=0.01,
-        g_default_num_batches_regularization=None,
-        g_default_initial_strategy=0,
-        g_default_initial_smart=False,
-        g_default_gradient_clipping_threshold=None,
-        g_default_device=None,
-        g_default_update_hooks=None,
-        g_default_compact_func=None,
-        g_config=TrainerConfig(),
-        g_layer_map={},
-        g_parameter_map={},
-        g_parameter_initializer_map={},
-        g_extended_config_funcs={},
-
-        # store command args of paddle_trainer
-        g_command_config_args={},
-
-        # Used for PyDataProvider to avoid duplicate module name
-        g_py_module_name_list=[],
-        g_current_submodel=None,
-        g_root_submodel=None,
-        g_submodel_map={},
-        g_submodel_stack=[],
-        g_add_submodel_suffix=False, ):
-
-    # directly iterate through locals().iteritems() will change
-    # the size of locals() due to introducing k, v into scope
-    # which will break the process in some env
-
-    local_vars = copy.deepcopy(locals())
-    for k, v in local_vars.iteritems():
-        globals()[k] = v
-
-
-# Because type is widely used as a variable name in this code.
-# we need a different function name for the builtin type()
-def type_of(x):
-    return type(x)
-
-
-# Check a condition derived config file
-def config_assert(b, msg):
-    if not b:
-        logger.fatal(msg)
-
-
-g_config_funcs = {}
-
-
-# decorator for indicating a function which can be used in config file
-def config_func(func):
-    g_config_funcs[func.func_name] = func
-    return func
-
-
-# decorator for indicating a class which can be used in config file
-def config_class(cls):
-    g_config_funcs[cls.__name__] = cls
-    return cls
-
-
-# decorator for indicating a class for a layer type
-def config_layer(layer_type):
-    def wrap(cls):
-        g_config_funcs[cls.__name__] = cls
-        g_layer_type_map[layer_type] = cls
-        return cls
-
-    return wrap
-
-
-def gen_parameter_name(layer_name, input_index):
-    return '_%s.w%d' % (layer_name, input_index)
-
-
-def gen_bias_parameter_name(layer_name):
-    return '_%s.wbias' % layer_name
-
-
-def default(x, default_value):
-    return default_value if x is None else x
-
-
-class Cfg(object):
-    def add_keys(self, locals):
-        for k, v in locals.iteritems():
-            if not k.startswith('_'):
-                self.__setattr__(k, v)
-
-
-# functions available in config file
-
-
-# Define the name of the input layers of the NeuralNetwork.
-# The type of these layers must be "data".
-# These layers will be provided with the DataBatch obtained
-# from DataProvider. The data streams from DataProvider must
-# have the same order.
-@config_func
-def Inputs(*args):
-    for name in args:
-        name = MakeLayerNameInSubmodel(name)
-        global g_current_submodel, g_root_submodel
-        if g_current_submodel.is_recurrent_layer_group:
-            config_assert(False, "Do not set Inputs in recurrent layer group")
-        else:
-            g_current_submodel.input_layer_names.append(name)
-
-        if g_current_submodel is g_root_submodel:
-            g_config.model_config.input_layer_names.append(name)
-
-
-@config_func
-def HasInputsSet():
-    return len(g_current_submodel.input_layer_names) != 0
-
-
-# Define the name of the output layers of the NeuralNetwork.
-# Usually the output is simply the cost layer.
-# You can specify other layers as outputs and calculate the
-# cost (and its derivative) yourself.
-@config_func
-def Outputs(*args):
-    for name in args:
-        name = MakeLayerNameInSubmodel(name)
-        global g_current_submodel, g_root_submodel
-        if g_current_submodel.is_recurrent_layer_group:
-            config_assert(False, "Do not set Outputs in recurrent layer group")
-        else:
-            g_current_submodel.output_layer_names.append(name)
-
-        if g_current_submodel is g_root_submodel:
-            g_config.model_config.output_layer_names.append(name)
-
-
-@config_func
-def SubModelBegin(name):
-    global g_current_submodel, g_root_submodel, g_submodel_stack
-    g_submodel_stack.append(g_current_submodel)
-
-    name = MakeLayerNameInParentSubmodel(name)  #rename in nested submodel
-
-    config_assert(name not in g_submodel_map,
-                  'Duplicated submodel name: %s' % name)
-
-    sub_model = g_config.model_config.sub_models.add()
-    sub_model.name = name
-    g_submodel_map[name] = sub_model
-    g_current_submodel = sub_model
-
-
-@config_func
-def SubModelEnd(name=None):
-    global g_current_submodel, g_root_submodel, g_submodel_stack
-    config_assert(g_current_submodel is not g_root_submodel,
-                  "submodel not begin")
-    if name is not None:
-        config_assert(
-            g_current_submodel.name == MakeLayerNameInParentSubmodel(name),
-            "submodel name error")
-
-    g_current_submodel = g_submodel_stack.pop()
-
-
-def MakeLayerNameInParentSubmodel(name):
-    suffix = ""
-    if len(g_submodel_stack) > 1:
-        suffix = "@" + g_submodel_stack[-1].name
-    return name + suffix
-
-
-def GetLayerBaseName(name):
-    return name.split('@')[0]
-
-
-def MakeLayerNameInSubmodel(name, submodel_name=None):
-    global g_current_submodel
-    global g_add_submodel_suffix
-    if (submodel_name is None and not g_add_submodel_suffix and
-            not g_current_submodel.is_recurrent_layer_group):
-        return name
-    if submodel_name is None:
-        submodel_name = g_current_submodel.name
-    return name + "@" + submodel_name
-
-
-# Define a recurrent layer group begin with RecurrentLayerGroupBegin
-# and end with RecurrentLayerGroupEnd.
-# A recurrent layer group forward/backward one frame after previous frame
-# forward/backward through all layers in layer group.
-# in_links are names of layer used as input layer in the layer group.
-# out_links are names of layer in layer group used as outside layer's input.
-#
-# If generator is set, the layer group need one or more than one outlinks.
-# The first outlink should always be the generated token ids.
-# If generator.num_results_per_sample is not set, the output for one sample is
-# a ids sequence. Else if num_results_per_sample is more than one,
-# the output for one sample is up to #num_results_per_sample generated
-# sequences, which are packed in one sequence in output ids vector. Each
-# generated sequence has a generation probability. The probabilities for one
-# sample are stored in one row of output value matrix.
-# Packed generated sequences format, for each i:
-#   seq_i_length: one interger, seq_i content length,
-#   [seq_i content], length = seq_i_length
-#   seq_i_end_mark: one interger, for format check, always -1
-# You can use "seq_text_printer" to print the output of the generator.
-@config_func
-def RecurrentLayerGroupWithoutOutLinksBegin(name,
-                                            in_links,
-                                            seq_reversed=False,
-                                            target_inlinkname=""):
-    global g_current_submodel
-    config_assert(g_config.model_config.type == "recurrent_nn",
-                  "RecurrentLayerGroup should be used only in recurrent_nn")
-    RecurrentLayerGroup(name=name)  # add to father model
-    SubModelBegin(name)
-    g_current_submodel.is_recurrent_layer_group = True
-    g_current_submodel.reversed = seq_reversed
-    in_links_count = 0
-    for linkid, link in enumerate(in_links):
-        if isinstance(link, basestring):
-            name = link
-        else:
-            name = link.link_name
-
-        in_links_count += 1
-        layer_name = MakeLayerNameInParentSubmodel(name)
-        layer = g_layer_map[layer_name]
-        ScatterAgentLayer(
-            name=name, size=layer.size, width=layer.width, height=layer.height)
-
-        pair = g_current_submodel.in_links.add()
-        pair.layer_name = layer_name
-        pair.link_name = MakeLayerNameInSubmodel(name)
-
-
-@config_func
-def RecurrentLayerGroupSetOutLink(link):
-    if isinstance(link, basestring):
-        name = link
-    else:
-        name = link.link_name
-    layer_name = MakeLayerNameInParentSubmodel(name)
-    pair = g_current_submodel.out_links.add()
-    pair.layer_name = MakeLayerNameInSubmodel(name)
-    pair.link_name = layer_name
-
-
-def RecurrentLayerGroupSetGenerator(generator=None):
-    generator.eos_layer_name = MakeLayerNameInSubmodel(generator.eos_layer_name)
-    g_current_submodel.generator.CopyFrom(generator)
-
-
-@config_func
-def RecurrentLayerGroupBegin(name,
-                             in_links,
-                             out_links,
-                             generator=None,
-                             target_inlinkname="",
-                             seq_reversed=False):
-    RecurrentLayerGroupWithoutOutLinksBegin(name, in_links, seq_reversed)
-    for link in out_links:
-        RecurrentLayerGroupSetOutLink(link)
-
-    if generator is not None:
-        RecurrentLayerGroupSetGenerator(generator)
-        config_assert(
-            len(in_links) == 0, "no in_links should be passed to generator")
-        config_assert(
-            len(out_links) >= 1,
-            "one or more than one out_links should be passed to generator")
-
-
-@config_func
-def RecurrentLayerGroupEnd(name):
-    global g_current_submodel
-    config_assert(g_current_submodel.is_recurrent_layer_group,
-                  "RecurrentLayerGroup not begin")
-    for pair in g_current_submodel.memories:  #check exist
-        layer = g_layer_map[pair.layer_name]
-        config_assert(layer is not None,
-                      "memory declare wrong name:%s" % pair.layer_name)
-        memory_link = g_layer_map[pair.link_name]
-        config_assert(layer.size == memory_link.size,
-                      "memory declare wrong size:%d" % memory_link.size)
-
-    prev_submodel = g_current_submodel
-    SubModelEnd(name)
-
-    for pair in prev_submodel.out_links:
-        layer = g_layer_map[pair.layer_name]
-        # add out agent to father model
-        agent_name = GetLayerBaseName(pair.link_name)
-        if prev_submodel.HasField("generator"):
-            DataLayer(name=agent_name, size=layer.size)
-        else:
-            GatherAgentLayer(name=agent_name, size=layer.size)
-
-
-# Define the model type
-# currently, the paddle supports "nn", "recurrent_nn", "recursive_nn" and "multi_nn"
-@config_func
-def model_type(name):
-    g_config.model_config.type = name
-
-
-@config_class
-class Bias(Cfg):
-    def __init__(self,
-                 parameter_name=None,
-                 learning_rate=None,
-                 momentum=None,
-                 decay_rate=None,
-                 decay_rate_l1=None,
-                 initial_mean=None,
-                 initial_std=None,
-                 initial_strategy=None,
-                 initial_smart=None,
-                 num_batches_regularization=None,
-                 sparse_remote_update=None,
-                 gradient_clipping_threshold=None,
-                 is_static=None,
-                 is_shared=None,
-                 initializer=None):
-        self.add_keys(locals())
-
-
-# Define one input for a layer
-@config_class
-class Input(Cfg):
-    def __init__(
-            self,
-            input_layer_name,
-            parameter_name=None,
-            initializer=None,
-            learning_rate=None,
-            momentum=None,
-            decay_rate=None,
-            decay_rate_l1=None,
-            initial_mean=None,
-            initial_std=None,
-            initial_strategy=None,
-            initial_smart=None,
-            num_batches_regularization=None,
-            sparse_remote_update=None,
-            sparse_update=None,
-            gradient_clipping_threshold=None,
-            conv=None,
-            bilinear_interp=None,
-            norm=None,
-            pool=None,
-            image=None,
-            block_expand=None,
-            maxout=None,
-            spp=None,
-            pad=None,
-            upsample=None,
-            format=None,
-            nnz=None,
-            is_static=None,
-            is_shared=None,
-            update_hooks=None,
-            input_layer_argument=None,
-            make_layer_name_in_submodel=True, ):
-        """
-        @param make_layer_name_in_submodel True by defalut, you might need to
-        set it carefully when adding Input in config_parser.py.
-        """
-        self.add_keys(locals())
-        self.input_layer_name = MakeLayerNameInSubmodel(
-            input_layer_name
-        ) if make_layer_name_in_submodel else input_layer_name
-
-
-# Define a projection for iexed layer
-@config_class
-class Projection(Input):
-    type = None  # subclass should set it correctly
-
-    def __init__(
-            self,
-            input_layer_name,
-            size=0,  # projection output size
-            parameter_name=None,
-            learning_rate=None,
-            momentum=None,
-            decay_rate=None,
-            decay_rate_l1=None,
-            initial_mean=None,
-            initial_std=None,
-            initial_strategy=None,
-            initial_smart=None,
-            initializer=None,
-            num_batches_regularization=None,
-            sparse_remote_update=None,
-            sparse_update=None,
-            gradient_clipping_threshold=None,
-            ptype=None,
-            format=None,
-            nnz=None,
-            is_static=None,
-            is_shared=None,
-            update_hooks=None,
-            input_layer_argument=None, ):
-        self.add_keys(locals())
-        self.input_layer_name = MakeLayerNameInSubmodel(input_layer_name)
-
-        self.proj_conf = ProjectionConfig()
-        if ptype is not None:
-            self.proj_conf.type = ptype
-        else:
-            self.proj_conf.type = self.type
-
-    # calculate the output_size given input_size. return 0
-    # to indicate using the size from Layer config
-    def calc_output_size(self, input_layer_config):
-        return self.size
-
-    def calc_parameter_size(self, input_size, output_size):
-        raise NotimplementedError
-
-    def calc_parameter_dims(self, input_size, output_size):
-        raise NotimplementedError
-
-
-@config_class
-class IdentityProjection(Projection):
-    type = 'identity'
-
-    def calc_output_size(self, input_layer_config):
-        return input_layer_config.size
-
-    def calc_parameter_size(self, input_size, output_size):
-        return 0
-
-    def calc_parameter_dims(self, input_size, output_size):
-        return []
-
-
-# Like IdentityProjection, but layer size may smaller than input size,
-# the projection select dimesions [offset, offset+layer_size) from input
-@config_class
-class IdentityOffsetProjection(Projection):
-    type = 'identity_offset'
-
-    def __init__(self, input_layer_name, offset, **xargs):
-        super(IdentityOffsetProjection, self).__init__(input_layer_name,
-                                                       **xargs)
-        self.proj_conf.offset = offset
-
-    def calc_output_size(self, input_layer_config):
-        return 0  # depends on the outside MixedLayer
-
-    def calc_parameter_size(self, input_size, output_size):
-        return 0
-
-    def calc_parameter_dims(self, input_size, output_size):
-        return []
-
-
-@config_class
-class SliceProjection(Projection):
-    type = 'slice'
-
-    def __init__(self, input_layer_name, slices, **xargs):
-        super(SliceProjection, self).__init__(input_layer_name, **xargs)
-        input = g_layer_map[input_layer_name]
-        if input.type in ["exconv", "cudnn_conv"]:
-            # the slice operator is for the channel dimension
-            assert input.num_filters is not None
-            channels = input.num_filters
-            image_size = input.size / channels
-            assert slices[len(slices) - 1][1] <= channels
-            for i in xrange(len(slices)):
-                slice = self.proj_conf.slices.add()
-                slice.start = slices[i][0] * image_size
-                slice.end = slices[i][1] * image_size
-                self.size += slice.end - slice.start
-        else:
-            config_assert(False,
-                          'Currently the input should be convolution layer')
-
-    def calc_parameter_size(self, input_size, output_size):
-        return 0
-
-    def calc_parameter_dims(self, input_size, output_size):
-        return []
-
-
-# DotMulProjection performs element-wise multiplication with weight
-@config_class
-class DotMulProjection(Projection):
-    type = 'dot_mul'
-
-    def calc_output_size(self, input_layer_config):
-        return input_layer_config.size
-
-    def calc_parameter_size(self, input_size, output_size):
-        return output_size
-
-    def calc_parameter_dims(self, input_size, output_size):
-        return [1, output_size]
-
-
-# ScalingProjection
-@config_class
-class ScalingProjection(Projection):
-    type = 'scaling'
-
-    def calc_output_size(self, input_layer_config):
-        return input_layer_config.size
-
-    def calc_parameter_size(self, input_size, output_size):
-        return 1
-
-    def calc_parameter_dims(self, input_size, output_size):
-        return [1, 1]
-
-
-@config_class
-class TableProjection(Projection):
-    type = 'table'
-
-    def calc_parameter_size(self, input_size, output_size):
-        return input_size * output_size
-
-    def calc_parameter_dims(self, input_size, output_size):
-        return [input_size, output_size]
-
-
-@config_class
-class FullMatrixProjection(Projection):
-    type = 'fc'
-
-    def calc_parameter_size(self, input_size, output_size):
-        return input_size * output_size
-
-    def calc_parameter_dims(self, input_size, output_size):
-        return [input_size, output_size]
-
-
-@config_class
-class TransposedFullMatrixProjection(Projection):
-    type = 'trans_fc'
-
-    def calc_parameter_size(self, input_size, output_size):
-        return input_size * output_size
-
-    def calc_parameter_dims(self, input_size, output_size):
-        return [output_size, input_size]
-
-
-@config_class
-class ContextProjection(Projection):
-    type = 'context'
-
-    def __init__(self, input_layer_name, context_start, context_length,
-                 trainable_padding, **xargs):
-        super(ContextProjection, self).__init__(input_layer_name, **xargs)
-        self.proj_conf.context_start = context_start
-        self.proj_conf.context_length = context_length
-        self.proj_conf.trainable_padding = trainable_padding
-        self._total_pad = max(0, -self.proj_conf.context_start) \
-                          + max(0, self.proj_conf.context_start \
-                                + self.proj_conf.context_length - 1)
-
-    def calc_output_size(self, input_layer_config):
-        return input_layer_config.size * self.proj_conf.context_length
-
-    def calc_parameter_size(self, input_size, output_size):
-        if self.proj_conf.trainable_padding == False:
-            return 0
-        else:
-            return input_size * self._total_pad
-
-    def calc_parameter_dims(self, input_size, output_size):
-        return [self._total_pad, input_size]
-
-    _total_pad = 0
-
-
-@config_class
-class ConvBaseProjection(Projection):
-    def __init__(self,
-                 input_layer_name,
-                 num_filters=None,
-                 conv_conf=None,
-                 **xargs):
-        super(ConvBaseProjection, self).__init__(input_layer_name, **xargs)
-
-        if num_filters is not None:
-            self.proj_conf.num_filters = num_filters
-
-    def calc_output_size(self, input_layer_config):
-        return self.proj_conf.output_size
-
-    def calc_parameter_size(self, input_size, output_size):
-        co = self.proj_conf.num_filters
-        ci = self.proj_conf.conv_conf.channels
-        fh = self.proj_conf.conv_conf.filter_size
-        fw = self.proj_conf.conv_conf.filter_size_y
-        gr = self.proj_conf.conv_conf.groups
-        return co * ci * fh * fw / gr
-
-    def calc_bias_size(self):
-        return self.proj_conf.num_filters
-
-    def calc_parameter_dims(self, input_size, output_size):
-        return None
-
-
-@config_class
-class ConvProjection(ConvBaseProjection):
-    type = 'conv'
-
-    def __init__(self,
-                 input_layer_name,
-                 num_filters=None,
-                 conv_conf=None,
-                 **xargs):
-        super(ConvProjection, self).__init__(input_layer_name, num_filters,
-                                             conv_conf, **xargs)
-
-        parse_conv(conv_conf, self.input_layer_name, self.proj_conf.conv_conf,
-                   num_filters)
-        self.proj_conf.output_size = self.proj_conf.conv_conf.output_x * \
-                                     self.proj_conf.conv_conf.output_y * \
-                                     num_filters
-
-
-@config_class
-class ConvTransProjection(ConvBaseProjection):
-    type = 'convt'
-
-    def __init__(self,
-                 input_layer_name,
-                 num_filters=None,
-                 conv_conf=None,
-                 **xargs):
-        super(ConvTransProjection, self).__init__(input_layer_name, num_filters,
-                                                  conv_conf, **xargs)
-
-        parse_conv(
-            conv_conf,
-            self.input_layer_name,
-            self.proj_conf.conv_conf,
-            num_filters,
-            trans=True)
-        self.proj_conf.output_size = self.proj_conf.conv_conf.img_size_y * \
-                                     self.proj_conf.conv_conf.img_size * \
-                                     num_filters
-
-
-# Define a operator for mixed layer
-@config_class
-class Operator(Cfg):
-    type = None  # subclass should set it correctly
-
-    def __init__(
-            self,
-            input_layer_names, ):
-        self.add_keys(locals())
-        self.operator_conf = OperatorConfig()
-        self.operator_conf.type = self.type
-
-    def check_dims(self):
-        pass
-
-    def calc_output_size(self, input_sizes):
-        return 0
-
-
-@config_class
-class DotMulOperator(Operator):
-    type = 'dot_mul'
-
-    def __init__(self, input_layer_names, scale=None, **xargs):
-        super(DotMulOperator, self).__init__(input_layer_names, **xargs)
-        if scale is not None:
-            self.operator_conf.dotmul_scale = scale
-
-        config_assert(len(input_layer_names) == 2, "DotMul is binary operator")
-
-    def check_dims(self):
-        for i in range(2):
-            config_assert(self.operator_conf.input_sizes[i] ==
-                          self.operator_conf.output_size,
-                          "DotMul input_size != output_size")
-
-    def calc_output_size(self, input_sizes):
-        return input_sizes[0]
-
-
-@config_class
-class ConvOperator(Operator):
-    type = 'conv'
-
-    def __init__(self,
-                 input_layer_names,
-                 num_filters=None,
-                 conv_conf=None,
-                 **xargs):
-        super(ConvOperator, self).__init__(input_layer_names, **xargs)
-        if num_filters is not None:
-            self.operator_conf.num_filters = num_filters
-
-        parse_conv(conv_conf,
-                   MakeLayerNameInSubmodel(input_layer_names[0]),
-                   self.operator_conf.conv_conf, num_filters)
-        self.operator_conf.output_size = self.operator_conf.conv_conf.output_x * \
-                                         self.operator_conf.conv_conf.output_y * \
-                                         num_filters
-
-        config_assert(len(input_layer_names) == 2, "Conv is binary operator")
-
-    def calc_output_size(self, input_sizes):
-        return self.operator_conf.output_size
-
-
-@config_class
-class ConvTransOperator(Operator):
-    type = 'convt'
-
-    def __init__(self,
-                 input_layer_names,
-                 num_filters=None,
-                 conv_conf=None,
-                 **xargs):
-        super(ConvTransOperator, self).__init__(input_layer_names, **xargs)
-        if num_filters is not None:
-            self.operator_conf.num_filters = num_filters
-
-        parse_conv(
-            conv_conf,
-            MakeLayerNameInSubmodel(input_layer_names[0]),
-            self.operator_conf.conv_conf,
-            num_filters,
-            trans=True)
-        self.operator_conf.output_size = \
-            self.operator_conf.conv_conf.img_size * \
-            self.operator_conf.conv_conf.img_size_y * \
-            num_filters
-
-        config_assert(len(input_layer_names) == 2, "Conv is binary operator")
-
-    def calc_output_size(self, input_sizes):
-        return self.operator_conf.output_size
-
-
-# please refer to the comments in proto/ModelConfig.proto
-@config_class
-class Conv(Cfg):
-    def __init__(self,
-                 filter_size,
-                 channels,
-                 padding=None,
-                 stride=None,
-                 groups=None,
-                 filter_channels=None,
-                 output_x=None,
-                 img_size=None,
-                 caffe_mode=True,
-                 filter_size_y=None,
-                 padding_y=None,
-                 stride_y=None,
-                 dilation=None,
-                 dilation_y=None):
-        self.add_keys(locals())
-        if filter_size_y is None:
-            self.filter_size_y = filter_size
-        if padding_y is None:
-            self.padding_y = padding
-        if dilation_y is None:
-            self.dilation_y = dilation
-        if stride_y is None:
-            self.stride_y = stride
-        if output_x is not None:
-            config_assert(output_x <= 0)
-
-
-# please refer to the comments in proto/ModelConfig.proto
-@config_class
-class Conv3D(Cfg):
-    def __init__(self,
-                 filter_size,
-                 channels,
-                 padding=None,
-                 stride=None,
-                 groups=None,
-                 filter_channels=None,
-                 output_x=None,
-                 img_size=None,
-                 caffe_mode=True,
-                 filter_size_y=None,
-                 padding_y=None,
-                 stride_y=None,
-                 filter_size_z=None,
-                 padding_z=None,
-                 stride_z=None):
-        self.add_keys(locals())
-        self.filter_size_y = filter_size_y if filter_size_y else filter_size
-        self.filter_size_z = filter_size_z if filter_size_z else filter_size
-        self.padding_y = padding_y if padding_y else padding
-        self.padding_z = padding_z if padding_z else padding
-        self.stride_y = stride_y if stride_y else stride
-        self.stride_z = stride_z if stride_z else stride
-        if output_x is not None:
-            config_assert(output_x <= 0)
-
-
-@config_class
-class BilinearInterp(Cfg):
-    def __init__(self, out_size_x=None, out_size_y=None, channels=None):
-        self.add_keys(locals())
-
-
-@config_class
-class Pool(Cfg):
-    def __init__(
-            self,
-            pool_type,
-            channels,
-            size_x,
-            size_y=None,
-            start=None,
-            stride=None,  # 1 by defalut in protobuf
-            stride_y=None,
-            padding=None,  # 0 by defalut in protobuf
-            padding_y=None):
-        self.add_keys(locals())
-
-
-@config_class
-class Pool3d(Cfg):
-    def __init__(
-            self,
-            pool_type,
-            channels,
-            size_x,
-            size_y=None,
-            size_z=None,
-            start=None,
-            stride=None,  # 1 by defalut in protobuf
-            stride_y=None,
-            stride_z=None,
-            padding=None,  # 0 by defalut in protobuf
-            padding_y=None,
-            padding_z=None):
-        self.add_keys(locals())
-        self.filter_size_y = size_y if size_y else size_x
-        self.filter_size_z = size_z if size_z else size_x
-        self.padding_y = padding_y if padding_y else padding
-        self.padding_z = padding_z if padding_z else padding
-        self.stride_y = stride_y if stride_y else stride
-        self.stride_z = stride_z if stride_z else stride
-
-
-@config_class
-class SpatialPyramidPool(Cfg):
-    def __init__(self, pool_type, pyramid_height, channels):
-        self.add_keys(locals())
-
-
-@config_class
-class Pad(Cfg):
-    def __init__(self, channels, pad_c, pad_h, pad_w):
-        self.add_keys(locals())
-
-
-@config_class
-class Upsample(Cfg):
-    def __init__(self, scale, scale_y, pad_out_x, pad_out_y, upsample_size,
-                 upsample_size_y):
-        self.add_keys(locals())
-
-
-@config_class
-class Norm(Cfg):
-    def __init__(self,
-                 norm_type,
-                 channels,
-                 size,
-                 scale,
-                 pow,
-                 output_x=None,
-                 img_size=None,
-                 blocked=None):
-        self.add_keys(locals())
-
-
-@config_class
-class Image(Cfg):
-    def __init__(self, channels, img_size=None):
-        self.add_keys(locals())
-
-
-@config_class
-class BlockExpand(Cfg):
-    def __init__(self,
-                 channels,
-                 padding_x=0,
-                 padding_y=0,
-                 stride_x=0,
-                 stride_y=0,
-                 block_x=0,
-                 block_y=0,
-                 img_size_x=0,
-                 img_size_y=0,
-                 output_x=0,
-                 output_y=0):
-        self.add_keys(locals())
-
-
-@config_class
-class MaxOut(Cfg):
-    def __init__(self, channels, groups, img_size_x=0, img_size_y=0):
-        self.add_keys(locals())
-
-
-def create_data_config_proto(async_load_data=False,
-                             constant_slots=None,
-                             data_ratio=1,
-                             is_main_data=True,
-                             usage_ratio=None):
-    # default: all sub dataproviders are treat as "main data".
-    # see proto/DataConfig.proto for is_main_data
-    data_config = DataConfig()
-
-    data_config.async_load_data = async_load_data
-
-    if constant_slots:
-        data_config.constant_slots.extend(constant_slots)
-    data_config.data_ratio = data_ratio
-    data_config.is_main_data = is_main_data
-
-    usage_ratio = default(usage_ratio, settings_deprecated["usage_ratio"])
-    config_assert(usage_ratio >= 0 and usage_ratio <= 1,
-                  "The range of usage_ratio is [0, 1]")
-    data_config.usage_ratio = usage_ratio
-
-    return data_config
-
-
-@config_func
-def SimpleData(files=None,
-               feat_dim=None,
-               context_len=None,
-               buffer_capacity=None,
-               **xargs):
-    data_config = create_data_config_proto(**xargs)
-    data_config.type = 'simple'
-    data_config.files = files
-    data_config.feat_dim = feat_dim
-    if context_len is not None:
-        data_config.context_len = context_len
-    if buffer_capacity:
-        data_config.buffer_capacity = buffer_capacity
-    return data_config
-
-
-@config_func
-def PyData(files=None,
-           type=None,
-           file_group_queue_capacity=None,
-           load_data_module=None,
-           load_data_object=None,
-           load_data_args="",
-           load_file_count=None,
-           constant_slots=None,
-           load_thread_num=None,
-           **xargs):
-    data_config = create_data_config_proto(**xargs)
-    data_config.type = 'py'
-    if load_data_module in g_py_module_name_list:
-
-        def get_path(module):
-            m = __import__(load_data_module)
-            return os.path.split(os.path.realpath(m.__file__))[0]
-
-        # python C-api is not thread safe, one module can only be import once,
-        # so here we nedd to copy the module with different names if it has to be
-        # imported several times.
-        module_new_name = "%s_copy_%d" % (load_data_module,
-                                          len(g_py_module_name_list))
-        g_py_module_name_list.append(module_new_name)
-        module_path = "%s/%s.py" % (get_path(load_data_module),
-                                    load_data_module)
-        new_module_path = "%s/%s.py" % (get_path(load_data_module),
-                                        module_new_name)
-        if os.path.isfile(module_path) == False:
-            raise Exception("File %s is not exist." % module_path)
-        shutil.copy2(module_path, new_module_path)
-        load_data_module = module_new_name
-    else:
-        g_py_module_name_list.append(load_data_module)
-    if load_data_module is not None and load_data_object is not None:
-        data_config.load_data_module = load_data_module
-        data_config.load_data_object = load_data_object
-    else:
-        raise ValueError('load_data_module, load_data_object is not defined.')
-    data_config.load_data_args = load_data_args
-
-    data_config.files = files or ''
-    if file_group_queue_capacity is not None:
-        data_config.file_group_conf.queue_capacity = file_group_queue_capacity
-    if load_file_count is not None:
-        data_config.file_group_conf.load_file_count = load_file_count
-    if load_thread_num is not None:
-        data_config.file_group_conf.load_thread_num = load_thread_num
-    if constant_slots:
-        data_config.constant_slots.extend(constant_slots)
-    return data_config
-
-
-#real data for training is actually provided by "sub_data" data providers.
-@config_func
-def MultiData(sub_data=[]):
-    data_config = DataConfig()
-    data_config.type = 'multi'
-    data_config.sub_data_configs.extend(sub_data)
-    return data_config
-
-
-@config_func
-def Data(type,
-         files=None,
-         feat_dim=None,
-         slot_dims=None,
-         context_len=None,
-         buffer_capacity=None,
-         **xargs):
-
-    data_config = create_data_config_proto(**xargs)
-    data_config.type = type
-    data_config.files = files
-    data_config.feat_dim = feat_dim
-    data_config.slot_dims.extend(slot_dims)
-    if context_len is not None:
-        data_config.context_len = context_len
-    data_config.buffer_capacity = buffer_capacity
-    return data_config
-
-
-@config_func
-def TrainData(data_config, async_load_data=None):
-    config_assert(not g_config.HasField('data_config'),
-                  'Only one TrainData definition is allowed')
-    g_config.data_config.CopyFrom(data_config)
-    g_config.data_config.for_test = False
-    if async_load_data is not None:
-        logger.warning("Deprecated: async_load_data should be used inside"
-                       " Data definition")
-        g_config.data_config.async_load_data = async_load_data
-
-
-@config_func
-def TestData(data_config, async_load_data=None):
-    config_assert(not g_config.HasField('test_data_config'),
-                  'Only one TestData definition is allowed')
-    g_config.test_data_config.CopyFrom(data_config)
-    g_config.test_data_config.for_test = True
-    if async_load_data is not None:
-        logger.warning("Deprecated: async_load_data should be used inside"
-                       " Data definition")
-        g_config.test_data_config.async_load_data = async_load_data
-
-
-#caffe_mode: compute the output size using floor instead of ceil,
-#            which is consistent of caffe and CuDNN's convention.
-def cnn_output_size(img_size,
-                    filter_size,
-                    padding,
-                    stride,
-                    caffe_mode,
-                    dilation=1):
-    filter_s = (filter_size - 1) * dilation + 1
-    output = (2 * padding + img_size - filter_s) / float(stride)
-    if caffe_mode:
-        return 1 + int(math.floor(output))
-    else:
-        return 1 + int(math.ceil(output))
-
-
-#calcualte image_size based on output_size for de-convolution (ConvTransLayer).
-#It is the reverse function of cnn_output_size
-def cnn_image_size(output_size,
-                   filter_size,
-                   padding,
-                   stride,
-                   caffe_mode,
-                   dilation=1):
-    filter_s = (filter_size - 1) * dilation + 1
-    img_size = (output_size - 1) * stride + filter_s - 2 * padding
-    if not caffe_mode:
-        img_size = img_size + 1
-    return img_size
-
-
-def get_img_size(input_layer_name, channels):
-    input = g_layer_map[input_layer_name]
-    img_pixels = input.size / channels
-    img_size = input.width if input.width > 0 else int(img_pixels**0.5)
-    img_size_y = input.height if input.height > 0 else int(img_pixels /
-                                                           img_size)
-    config_assert(
-        img_size * img_size_y == img_pixels,
-        "Input layer %s: Incorrect input image size %d * %d for input image pixels %d"
-        % (input_layer_name, img_size, img_size_y, img_pixels))
-    return img_size, img_size_y
-
-
-def get_img3d_size(input_layer_name, channels):
-    input = g_layer_map[input_layer_name]
-    img_pixels = input.size / channels
-    img_size = input.width
-    img_size_y = input.height
-    img_size_z = input.depth
-
-    config_assert(
-        img_size * img_size_y * img_size_z == img_pixels,
-        "Input layer %s: Incorrect input image size %d * %d * %d for input image pixels %d"
-        % (input_layer_name, img_size, img_size_y, img_size_z, img_pixels))
-    return img_size, img_size_y, img_size_z
-
-
-def parse_bilinear(bilinear, input_layer_name, bilinear_conf):
-    parse_image(bilinear, input_layer_name, bilinear_conf.image_conf)
-    bilinear_conf.out_size_x = bilinear.out_size_x
-    bilinear_conf.out_size_y = bilinear.out_size_y
-
-
-def parse_pool(pool, input_layer_name, pool_conf, ceil_mode, exclude_mode):
-    pool_conf.pool_type = pool.pool_type
-    config_assert(pool.pool_type in [
-        'max-projection', 'avg-projection', 'max-pool-with-mask', 'cudnn-max-pool', 'cudnn-avg-pool'
-    ], "pool-type %s is not in " \
-              "['max-projection', 'avg-projection', 'max-pool-with-mask'," \
-                  "'cudnn-max-pool', 'cudnn-avg-pool']" % pool.pool_type)
-
-    pool_conf.channels = pool.channels
-    pool_conf.size_x = pool.size_x
-    pool_conf.stride = pool.stride
-
-    pool_conf.size_y = default(pool.size_y, pool_conf.size_x)
-    pool_conf.stride_y = default(pool.stride_y, pool_conf.stride)
-
-    pool_conf.img_size, pool_conf.img_size_y = \
-        get_img_size(input_layer_name, pool.channels)
-
-    config_assert(not pool.start, "start is deprecated in pooling.")
-
-    if pool.padding is not None:
-        pool_conf.padding = pool.padding
-    pool_conf.padding_y = default(pool.padding_y, pool_conf.padding)
-    pool_conf.output_x = cnn_output_size(pool_conf.img_size, pool_conf.size_x,
-                                         pool_conf.padding, pool_conf.stride,
-                                         not ceil_mode)
-    pool_conf.output_y = cnn_output_size(pool_conf.img_size_y, pool_conf.size_y,
-                                         pool_conf.padding_y,
-                                         pool_conf.stride_y, not ceil_mode)
-    if exclude_mode != None:
-        pool_conf.exclude_mode = exclude_mode
-
-
-def parse_pool3d(pool, input_layer_name, pool_conf, ceil_mode):
-    pool_conf.pool_type = pool.pool_type
-    config_assert(pool.pool_type in ['max-projection', 'avg-projection'],
-                  "pool-type %s is not in "
-                  "['max-projection', 'avg-projection']" % pool.pool_type)
-
-    pool_conf.channels = pool.channels
-
-    pool_conf.size_x = pool.size_x
-    pool_conf.stride = pool.stride
-    pool_conf.padding = pool.padding
-
-    pool_conf.size_y = default(pool.size_y, pool_conf.size_x)
-    pool_conf.size_z = default(pool.size_z, pool_conf.size_x)
-    pool_conf.stride_y = default(pool.stride_y, pool_conf.stride)
-    pool_conf.stride_z = default(pool.stride_z, pool_conf.stride)
-    pool_conf.padding_y = default(pool.padding_y, pool_conf.padding)
-    pool_conf.padding_z = default(pool.padding_z, pool_conf.padding)
-
-    pool_conf.img_size, pool_conf.img_size_y, pool_conf.img_size_z = \
-        get_img3d_size(input_layer_name, pool.channels)
-
-    config_assert(not pool.start, "start is deprecated in pooling.")
-
-    if pool.padding is not None:
-        pool_conf.padding = pool.padding
-    pool_conf.padding_y = default(pool.padding_y, pool_conf.padding)
-    pool_conf.padding_z = default(pool.padding_z, pool_conf.padding)
-    pool_conf.output_x = cnn_output_size(pool_conf.img_size, pool_conf.size_x,
-                                         pool_conf.padding, pool_conf.stride,
-                                         not ceil_mode)
-    pool_conf.output_y = cnn_output_size(pool_conf.img_size_y, pool_conf.size_y,
-                                         pool_conf.padding_y,
-                                         pool_conf.stride_y, not ceil_mode)
-    pool_conf.output_z = cnn_output_size(pool_conf.img_size_z, pool_conf.size_z,
-                                         pool_conf.padding_z,
-                                         pool_conf.stride_z, not ceil_mode)
-
-
-def parse_spp(spp, input_layer_name, spp_conf):
-    parse_image(spp, input_layer_name, spp_conf.image_conf)
-    spp_conf.pool_type = spp.pool_type
-    config_assert(spp.pool_type in ['max-projection', 'avg-projection'],
-                  "pool-type %s is not in "
-                  "['max-projection', 'avg-projection']" % spp.pool_type)
-    spp_conf.pyramid_height = spp.pyramid_height
-
-
-def parse_image(image, input_layer_name, image_conf):
-    image_conf.channels = image.channels
-    image_conf.img_size, image_conf.img_size_y = \
-        get_img_size(input_layer_name, image_conf.channels)
-
-
-def parse_image3d(image, input_layer_name, image_conf):
-    image_conf.channels = image.channels
-    image_conf.img_size, image_conf.img_size_y, image_conf.img_size_z = \
-        get_img3d_size(input_layer_name, image_conf.channels)
-
-
-def parse_norm(norm, input_layer_name, norm_conf):
-    norm_conf.norm_type = norm.norm_type
-    config_assert(
-        norm.norm_type in
-        ['rnorm', 'cmrnorm-projection', 'cross-channel-norm'],
-        "norm-type %s is not in [rnorm, cmrnorm-projection, cross-channel-norm]"
-        % norm.norm_type)
-    norm_conf.channels = norm.channels
-    norm_conf.size = norm.size
-    norm_conf.scale = norm.scale
-    norm_conf.pow = norm.pow
-    norm_conf.blocked = norm.blocked
-
-    norm_conf.img_size, norm_conf.img_size_y = \
-        get_img_size(input_layer_name, norm.channels)
-    norm_conf.output_x = norm_conf.img_size
-    norm_conf.output_y = norm_conf.img_size_y
-    if norm.norm_type in ['cmrnorm-projection']:
-        norm_conf.scale /= norm.size
-    else:
-        norm_conf.scale /= norm.size**2
-
-
-#caffe_mode: compute the output size using floor instead of ceil,
-#            which is consistent of caffe and CuDNN's convention.
-def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False):
-    conv_conf.filter_size = conv.filter_size
-    conv_conf.filter_size_y = conv.filter_size_y
-    conv_conf.channels = conv.channels
-    conv_conf.padding = conv.padding
-    conv_conf.padding_y = conv.padding_y
-    conv_conf.stride = conv.stride
-    conv_conf.stride_y = conv.stride_y
-    conv_conf.groups = conv.groups
-    conv_conf.caffe_mode = conv.caffe_mode
-    if not conv.dilation:
-        conv.dilation = 1
-        conv.dilation_y = 1
-    else:
-        conv_conf.dilation = conv.dilation
-        conv_conf.dilation_y = conv.dilation_y
-
-    if not trans:
-        conv_conf.filter_channels = conv.channels / conv.groups
-        conv_conf.img_size, conv_conf.img_size_y = \
-            get_img_size(input_layer_name, conv.channels)
-        conv_conf.output_x = cnn_output_size(
-            conv_conf.img_size, conv_conf.filter_size, conv_conf.padding,
-            conv_conf.stride, conv_conf.caffe_mode, conv.dilation)
-        conv_conf.output_y = cnn_output_size(
-            conv_conf.img_size_y, conv_conf.filter_size_y, conv_conf.padding_y,
-            conv_conf.stride_y, conv_conf.caffe_mode, conv.dilation_y)
-    else:
-        conv_conf.filter_channels = num_filters / conv.groups
-        conv_conf.output_x, conv_conf.output_y = \
-            get_img_size(input_layer_name, conv.channels)
-        conv_conf.img_size = cnn_image_size(
-            conv_conf.output_x, conv_conf.filter_size, conv_conf.padding,
-            conv_conf.stride, conv_conf.caffe_mode, conv.dilation)
-        conv_conf.img_size_y = cnn_image_size(
-            conv_conf.output_y, conv_conf.filter_size_y, conv_conf.padding_y,
-            conv_conf.stride_y, conv_conf.caffe_mode, conv.dilation_y)
-
-
-#caffe_mode: compute the output size using floor instead of ceil,
-#            which is consistent of caffe and CuDNN's convention.
-def parse_conv3d(conv, input_layer_name, conv_conf, num_filters, trans=False):
-    conv_conf.filter_size = conv.filter_size
-    conv_conf.filter_size_y = conv.filter_size_y
-    conv_conf.filter_size_z = conv.filter_size_z
-    conv_conf.channels = conv.channels
-    conv_conf.padding = conv.padding
-    conv_conf.padding_y = conv.padding_y
-    conv_conf.padding_z = conv.padding_z
-    conv_conf.stride = conv.stride
-    conv_conf.stride_y = conv.stride_y
-    conv_conf.stride_z = conv.stride_z
-    conv_conf.groups = conv.groups
-    conv_conf.caffe_mode = conv.caffe_mode
-
-    if not trans:
-        conv_conf.filter_channels = conv.channels / conv.groups
-        conv_conf.img_size, conv_conf.img_size_y, conv_conf.img_size_z = \
-            get_img3d_size(input_layer_name, conv.channels)
-        conv_conf.output_x = cnn_output_size(
-            conv_conf.img_size, conv_conf.filter_size, conv_conf.padding,
-            conv_conf.stride, conv_conf.caffe_mode)
-        conv_conf.output_y = cnn_output_size(
-            conv_conf.img_size_y, conv_conf.filter_size_y, conv_conf.padding_y,
-            conv_conf.stride_y, conv_conf.caffe_mode)
-        conv_conf.output_z = cnn_output_size(
-            conv_conf.img_size_z, conv_conf.filter_size_z, conv_conf.padding_z,
-            conv_conf.stride_z, conv_conf.caffe_mode)
-    else:
-        conv_conf.filter_channels = num_filters / conv.groups
-        conv_conf.output_x, conv_conf.output_y, conv_conf.output_z = \
-            get_img3d_size(input_layer_name, conv.channels)
-        conv_conf.img_size = cnn_image_size(
-            conv_conf.output_x, conv_conf.filter_size, conv_conf.padding,
-            conv_conf.stride, conv_conf.caffe_mode)
-        conv_conf.img_size_y = cnn_image_size(
-            conv_conf.output_y, conv_conf.filter_size_y, conv_conf.padding_y,
-            conv_conf.stride_y, conv_conf.caffe_mode)
-        conv_conf.img_size_z = cnn_image_size(
-            conv_conf.output_z, conv_conf.filter_size_z, conv_conf.padding_z,
-            conv_conf.stride_z, conv_conf.caffe_mode)
-
-
-def parse_block_expand(block_expand, input_layer_name, block_expand_conf):
-    block_expand_conf.channels = block_expand.channels
-    block_expand_conf.stride_x = block_expand.stride_x
-    block_expand_conf.stride_y = block_expand.stride_y
-    block_expand_conf.padding_x = block_expand.padding_x
-    block_expand_conf.padding_y = block_expand.padding_y
-    block_expand_conf.block_x = block_expand.block_x
-    block_expand_conf.block_y = block_expand.block_y
-    block_expand_conf.img_size_x = block_expand.img_size_x
-    block_expand_conf.img_size_y = block_expand.img_size_y
-    if block_expand_conf.img_size_x == 0:
-        block_expand_conf.output_x = 0
-    else:
-        block_expand_conf.output_x = cnn_output_size(
-            block_expand.img_size_x, block_expand.block_x,
-            block_expand.padding_x, block_expand.stride_x, False)
-
-    if block_expand_conf.img_size_y == 0:
-        block_expand_conf.output_y = 0
-    else:
-        block_expand_conf.output_y = cnn_output_size(
-            block_expand.img_size_y, block_expand.block_y,
-            block_expand.padding_y, block_expand.stride_y, False)
-
-
-def parse_maxout(maxout, input_layer_name, maxout_conf):
-    parse_image(maxout, input_layer_name, maxout_conf.image_conf)
-    maxout_conf.groups = maxout.groups
-
-
-# Define an evaluator
-@config_func
-def Evaluator(name,
-              type,
-              inputs,
-              chunk_scheme=None,
-              num_chunk_types=None,
-              classification_threshold=None,
-              positive_label=None,
-              dict_file=None,
-              result_file=None,
-              num_results=None,
-              top_k=None,
-              delimited=None,
-              excluded_chunk_types=None,
-              overlap_threshold=None,
-              background_id=None,
-              evaluate_difficult=None,
-              ap_type=None):
-    evaluator = g_config.model_config.evaluators.add()
-    evaluator.type = type
-    evaluator.name = MakeLayerNameInSubmodel(name)
-    if type_of(inputs) == str:
-        inputs = [inputs]
-
-    evaluator.input_layers.extend(
-        [MakeLayerNameInSubmodel(name) for name in inputs])
-
-    if chunk_scheme is not None:
-        evaluator.chunk_scheme = chunk_scheme
-        evaluator.num_chunk_types = num_chunk_types
-    g_current_submodel.evaluator_names.append(evaluator.name)
-
-    if classification_threshold is not None:
-        evaluator.classification_threshold = classification_threshold
-    if positive_label is not None:
-        evaluator.positive_label = positive_label
-    if dict_file is not None:
-        evaluator.dict_file = dict_file
-
-    if result_file is not None:
-        evaluator.result_file = result_file
-    if num_results is not None:
-        evaluator.num_results = num_results
-    if top_k is not None:
-        evaluator.top_k = top_k
-    if delimited is not None:
-        evaluator.delimited = delimited
-
-    if excluded_chunk_types:
-        evaluator.excluded_chunk_types.extend(excluded_chunk_types)
-
-    if overlap_threshold is not None:
-        evaluator.overlap_threshold = overlap_threshold
-
-    if background_id is not None:
-        evaluator.background_id = background_id
-
-    if evaluate_difficult is not None:
-        evaluator.evaluate_difficult = evaluate_difficult
-
-    if ap_type is not None:
-        evaluator.ap_type = ap_type
-
-
-class LayerBase(object):
-    def __init__(
-            self,
-            name,
-            type,
-            size,  # size can be 0. In this case, subclass should set it.
-            inputs,
-            device=None,
-            active_type="",
-            drop_rate=0.,
-            coeff=None,
-            error_clipping_threshold=None):
-        config_assert('@' not in name,
-                      "layer name: %s contain special character @" % name)
-        global g_current_submodel
-        name = MakeLayerNameInSubmodel(name)
-
-        config_assert(name not in g_layer_map,
-                      'Duplicated layer name: %s' % name)
-
-        self.inputs = copy.deepcopy(inputs)
-        self.operators = []
-
-        if self.inputs is None:
-            self.inputs = []
-        elif type_of(self.inputs) != list:
-            self.inputs = [self.inputs]
-
-        self.config = g_config.model_config.layers.add()
-        assert isinstance(self.config, LayerConfig)
-        use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0)))
-        mkldnn_acts = ['relu', 'tanh', 'softmax']
-        if use_mkldnn and active_type in mkldnn_acts:
-            active_type = "mkldnn_" + active_type
-        self.config.name = name
-        self.config.type = type
-        self.config.active_type = active_type
-        if coeff is not None:
-            self.config.coeff = float(coeff)
-        if size != 0:
-            self.config.size = size
-        if drop_rate != 0:
-            self.config.drop_rate = drop_rate
-
-        if device is not None:
-            self.config.device = device
-        elif g_default_device is not None:
-            self.config.device = g_default_device
-
-        if error_clipping_threshold is not None:
-            self.config.error_clipping_threshold = error_clipping_threshold
-
-        for input_index in xrange(len(self.inputs)):
-            input = self.inputs[input_index]
-            input_config = None
-            input_layer_name = ''
-            if type_of(input) == str:
-                input_layer_name = input
-                input_config = Input(
-                    input_layer_name=input,
-                    parameter_name=gen_parameter_name(name, input_index))
-                input_layer_name = input_config.input_layer_name
-            elif isinstance(input, Input):
-                input_layer_name = input.input_layer_name
-                input_config = input
-                if input_config.parameter_name is None:
-                    input_config.parameter_name = \
-                        gen_parameter_name(name, input_index)
-            elif isinstance(input, Operator):
-                self.operators.append(input)
-                input.operator_conf.input_indices.append(input_index)
-                input_config = Input(input.input_layer_names[0])
-                input_layer_name = input_config.input_layer_name
-            else:
-                raise ValueError('Wrong type for inputs: %s' % type_of(input))
-            config_assert(input_layer_name in g_layer_map,
-                          "Unknown input layer '%s' for layer %s" %
-                          (input_layer_name, name))
-            self.inputs[input_index] = input_config
-            layer_input = self.config.inputs.add()
-            layer_input.input_layer_name = input_config.input_layer_name
-            if input_config.input_layer_argument is not None:
-                layer_input.input_layer_argument = \
-                    input_config.input_layer_argument
-
-        g_layer_map[name] = self.config
-
-        g_current_submodel.layer_names.append(self.config.name)
-
-    def get_input_layer(self, input_index):
-        return g_layer_map[self.config.inputs[input_index].input_layer_name]
-
-    # will return the bias created if not *for_self*
-    def create_bias_parameter(
-            self,
-            bias,  # True/False or BiasCfg
-            size,
-            dims=None,
-            for_self=True,  # whether create bias for layer self
-    ):
-
-        if size == 0:
-            return
-        if dims is None:
-            dims = [1, size]
-
-        config_assert(
-            type_of(bias) == bool or type_of(bias) == Bias,
-            'Incorrect type for bias: %s' % type_of(bias))
-
-        if type_of(bias) == bool:
-            if bias:
-                bias = Bias()
-
-        if type_of(bias) == Bias:
-            if bias.parameter_name is None:
-                bias.parameter_name = gen_bias_parameter_name(self.config.name)
-            if bias.parameter_name not in g_parameter_map:
-                assert isinstance(self.config, LayerConfig)
-
-                Parameter(
-                    bias.parameter_name,
-                    size,
-                    self.config.device
-                    if self.config.HasField('device') else None,
-                    dims,
-                    bias.learning_rate,
-                    bias.momentum,
-                    decay_rate=bias.decay_rate,
-                    decay_rate_l1=bias.decay_rate_l1,
-                    initial_mean=bias.initial_mean,
-                    initial_std=bias.initial_std,
-                    initial_strategy=bias.initial_strategy,
-                    initial_smart=bias.initial_smart,
-                    num_batches_regularization=bias.num_batches_regularization,
-                    sparse_remote_update=bias.sparse_remote_update,
-                    gradient_clipping_threshold=bias.
-                    gradient_clipping_threshold,
-                    is_static=bias.is_static,
-                    is_shared=bias.is_shared,
-                    initializer=bias.initializer)
-            if for_self:
-                self.config.bias_parameter_name = bias.parameter_name
-            else:
-                return bias.parameter_name
-
-    def create_input_parameter(self,
-                               input_index,
-                               size,
-                               dims=None,
-                               sparse=None,
-                               format=None):
-        if dims is None:
-            # TODO(yuyang18): print warning and callstack here!
-            dims = list()
-
-        if size == 0:
-            return
-
-        input_config = self.inputs[input_index]
-
-        self.config.inputs[input_index].input_parameter_name = \
-            input_config.parameter_name
-
-        if input_config.parameter_name in g_parameter_map:
-            para = g_parameter_map[input_config.parameter_name]
-            config_assert(size == para.size, (
-                'Shared parameter "%s" does not ' + 'have same size: %s vs. %s')
-                          % (input_config.parameter_name, para.size, size))
-
-            config_assert(dims == para.dims, (
-                'Shared parameter "%s" does not ' + 'have same dims: %s vs. %s')
-                          % (input_config.parameter_name, para.dims, dims))
-            return
-
-        Parameter(
-            input_config.parameter_name,
-            size,
-            self.config.device if self.config.HasField("device") else None,
-            dims,
-            input_config.learning_rate,
-            input_config.momentum,
-            decay_rate=input_config.decay_rate,
-            decay_rate_l1=input_config.decay_rate_l1,
-            initial_mean=input_config.initial_mean,
-            initial_std=input_config.initial_std,
-            initial_strategy=input_config.initial_strategy,
-            initial_smart=input_config.initial_smart,
-            num_batches_regularization=input_config.num_batches_regularization,
-            sparse_remote_update=input_config.sparse_remote_update,
-            sparse_update=input_config.sparse_update,
-            gradient_clipping_threshold=input_config.
-            gradient_clipping_threshold,
-            sparse=sparse,
-            format=format,
-            is_static=input_config.is_static,
-            is_shared=input_config.is_shared,
-            update_hooks=input_config.update_hooks,
-            initializer=input_config.initializer)
-
-    def set_layer_size(self, size):
-        if self.config.size == 0:
-            self.config.size = size
-        else:
-            config_assert(self.config.size == size,
-                          'Different inputs result in' +
-                          'different layer size at layer %s' % self.config.name)
-
-    def set_layer_height_width(self, height, width):
-        self.config.height = height
-        self.config.width = width
-
-    def set_layer_depth(self, depth):
-        self.config.depth = depth
-
-    def set_cnn_layer(self,
-                      input_layer_name,
-                      height,
-                      width,
-                      channels,
-                      is_print=True):
-        size = height * width * channels
-        self.set_layer_size(size)
-        self.set_layer_height_width(height, width)
-        if is_print:
-            print("output for %s: c = %d, h = %d, w = %d, size = %d" %
-                  (input_layer_name, channels, height, width, size))
-
-
-@config_layer('multi_class_cross_entropy_with_selfnorm')
-class MultiClassCrossEntropySelfNormCostLayer(LayerBase):
-    def __init__(self, name, inputs, softmax_selfnorm_alpha=0.1, **xargs):
-        super(MultiClassCrossEntropySelfNormCostLayer, self).__init__(
-            name, 'multi_class_cross_entropy_with_selfnorm', 0, inputs, **xargs)
-        self.config.softmax_selfnorm_alpha = softmax_selfnorm_alpha
-
-
-@config_layer('cross_entropy_over_beam')
-class CrossEntropyOverBeamLayer(LayerBase):
-    def __init__(self, name, inputs, **xargs):
-        config_assert(len(inputs) % 3 == 0, "Error input number.")
-        super(CrossEntropyOverBeamLayer, self).__init__(
-            name, 'cross_entropy_over_beam', 0, inputs, **xargs)
-        input_num = len(inputs) / 3
-        for i in range(input_num):
-            input_layer = self.get_input_layer(i * 3)
-            config_assert(input_layer.size == 1, (
-                "Inputs for this layer are made up of "
-                "several triples, in which the first one is scores over "
-                "all candidate paths, whose size should be equal to 1."))
-
-
-@config_layer('fc')
-class FCLayer(LayerBase):
-    layer_type = 'fc'
-
-    def __init__(self,
-                 name,
-                 size,
-                 inputs,
-                 bias=True,
-                 error_clipping_threshold=None,
-                 **xargs):
-        use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0)))
-        use_mkldnn_wgt = bool(
-            int(g_command_config_args.get("use_mkldnn_wgt", 0)))
-        if use_mkldnn:
-            self.layer_type = 'mkldnn_fc'
-            config_assert(
-                len(inputs) == 1,
-                "MKLDNNFCLayer support one and only one input!")
-        super(FCLayer, self).__init__(
-            name, self.layer_type, size, inputs=inputs, **xargs)
-        for input_index in xrange(len(self.inputs)):
-            input_layer = self.get_input_layer(input_index)
-            psize = self.config.size * input_layer.size
-            dims = [input_layer.size, self.config.size]
-            format = self.inputs[input_index].format
-            sparse = format == "csr" or format == "csc"
-            if use_mkldnn:
-                config_assert(not sparse,
-                              "MKLDNNFCLayer do not support sparse format yet")
-                if use_mkldnn_wgt:
-                    dims = [self.config.size, input_layer.size]
-            if sparse:
-                psize = self.inputs[input_index].nnz
-            else:
-                sparse = None
-
-            self.create_input_parameter(input_index, psize, dims, sparse,
-                                        format)
-        self.create_bias_parameter(bias, self.config.size)
-        if error_clipping_threshold is not None:
-            self.config.error_clipping_threshold = error_clipping_threshold
-
-
-@config_layer('mkldnn_fc')
-class MKLDNNFcLayer(FCLayer):
-    layer_type = 'mkldnn_fc'
-
-
-@config_layer('selective_fc')
-class SelectiveFCLayer(LayerBase):
-    def __init__(self,
-                 name,
-                 size,
-                 inputs,
-                 bias=True,
-                 selective_fc_pass_generation=False,
-                 has_selected_colums=True,
-                 selective_fc_full_mul_ratio=0.02,
-                 selective_fc_parallel_plain_mul_thread_num=None,
-                 **xargs):
-        super(SelectiveFCLayer, self).__init__(
-            name, 'selective_fc', size, inputs=inputs, **xargs)
-        # user MUST know if selctive fc is used in training,
-        # parameter matrices saved by this layer are automatically transposed,
-        # BUT bias is not.
-
-        # if selective_fc is used only in testing mode, and parameters for
-        # this layer are trained by fully connected layers,
-        # then TranposedFullMatrixProjectin MUST be used in training
-        # to avoid manual transpose in testing.
-
-        self.config.selective_fc_pass_generation = selective_fc_pass_generation
-        self.config.has_selected_colums = has_selected_colums
-        self.config.selective_fc_full_mul_ratio = selective_fc_full_mul_ratio
-        if selective_fc_parallel_plain_mul_thread_num is not None:
-            self.config.selective_fc_parallel_plain_mul_thread_num = selective_fc_parallel_plain_mul_thread_num
-
-        input_num = len(self.inputs)
-        if has_selected_colums:
-            config_assert(input_num >= 2,
-                          ("if indices of selected columns are not specified, "
-                           "selective_fc Layer has at least two inputs"))
-            input_num -= 1
-
-        for input_index in xrange(input_num):
-            input_layer = self.get_input_layer(input_index)
-            psize = self.config.size * input_layer.size
-            dims = [input_layer.size, self.config.size]
-            dims = dims[::-1]  # transpose the parameter
-            format = self.inputs[input_index].format
-            sparse = format == "csr" or format == "csc"
-            if sparse:
-                psize = self.inputs[input_index].nnz
-
-            self.create_input_parameter(input_index, psize, dims, sparse,
-                                        format)
-        self.create_bias_parameter(bias, self.config.size)
-
-
-@config_layer('print')
-class PrintLayer(LayerBase):
-    def __init__(self, name, inputs, format=None):
-        super(PrintLayer, self).__init__(name, 'print', 0, inputs)
-        if format is None:
-            format = "\n".join([
-                "layer=" + input.input_layer_name + " %s"
-                for input in self.inputs
-            ])
-        self.config.user_arg = format
-
-
-@config_layer('priorbox')
-class PriorBoxLayer(LayerBase):
-    def __init__(self, name, inputs, size, min_size, max_size, aspect_ratio,
-                 variance):
-        super(PriorBoxLayer, self).__init__(name, 'priorbox', 0, inputs)
-        config_assert(len(inputs) == 2, 'PriorBoxLayer must have 2 inputs')
-        input_layer = self.get_input_layer(1)
-        config_assert(
-            input_layer.type == 'data',
-            'Expecting the second input layer of an priorbox layer to be '
-            'a data layer')
-        config_assert(input_layer.width > 0, 'The data layer must set width')
-        config_assert(input_layer.height > 0, 'The data layer must set height')
-        config_assert(len(variance) == 4, 'The variance must have 4 inputs')
-        self.config.inputs[0].priorbox_conf.min_size.extend(min_size)
-        self.config.inputs[0].priorbox_conf.max_size.extend(max_size)
-        self.config.inputs[0].priorbox_conf.aspect_ratio.extend(aspect_ratio)
-        self.config.inputs[0].priorbox_conf.variance.extend(variance)
-        self.config.size = size
-
-
-@config_layer('multibox_loss')
-class MultiBoxLossLayer(LayerBase):
-    def __init__(self, name, inputs, input_num, num_classes, overlap_threshold,
-                 neg_pos_ratio, neg_overlap, background_id, **xargs):
-        super(MultiBoxLossLayer, self).__init__(name, 'multibox_loss', 0,
-                                                inputs)
-        config_assert(
-            len(inputs) == (input_num * 2 + 2),
-            'MultiBoxLossLayer does not have enough inputs')
-        config_assert(num_classes > background_id,
-                      'Classes number must greater than background ID')
-        self.config.inputs[0].multibox_loss_conf.num_classes = num_classes
-        self.config.inputs[
-            0].multibox_loss_conf.overlap_threshold = overlap_threshold
-        self.config.inputs[0].multibox_loss_conf.neg_pos_ratio = neg_pos_ratio
-        self.config.inputs[0].multibox_loss_conf.neg_overlap = neg_overlap
-        self.config.inputs[0].multibox_loss_conf.background_id = background_id
-        self.config.inputs[0].multibox_loss_conf.input_num = input_num
-        self.config.size = 1
-
-
-@config_layer('detection_output')
-class DetectionOutputLayer(LayerBase):
-    def __init__(self, name, inputs, size, input_num, num_classes,
-                 nms_threshold, nms_top_k, keep_top_k, confidence_threshold,
-                 background_id, **xargs):
-        super(DetectionOutputLayer, self).__init__(name, 'detection_output', 0,
-                                                   inputs)
-        config_assert(
-            len(inputs) == (input_num * 2 + 1),
-            'DetectionOutputLayer does not have enough inputs')
-        config_assert(num_classes > background_id,
-                      'Classes number must greater than background ID')
-        self.config.inputs[0].detection_output_conf.num_classes = num_classes
-        self.config.inputs[
-            0].detection_output_conf.nms_threshold = nms_threshold
-        self.config.inputs[0].detection_output_conf.nms_top_k = nms_top_k
-        self.config.inputs[0].detection_output_conf.keep_top_k = keep_top_k
-        self.config.inputs[
-            0].detection_output_conf.confidence_threshold = confidence_threshold
-        self.config.inputs[
-            0].detection_output_conf.background_id = background_id
-        self.config.inputs[0].detection_output_conf.input_num = input_num
-        self.config.size = size
-
-
-@config_layer('roi_pool')
-class ROIPoolLayer(LayerBase):
-    def __init__(self, name, inputs, pooled_width, pooled_height, spatial_scale,
-                 num_channels, **xargs):
-        super(ROIPoolLayer, self).__init__(name, 'roi_pool', 0, inputs)
-        config_assert(len(inputs) == 2, 'ROIPoolLayer must have 2 inputs')
-        self.config.inputs[0].roi_pool_conf.pooled_width = pooled_width
-        self.config.inputs[0].roi_pool_conf.pooled_height = pooled_height
-        self.config.inputs[0].roi_pool_conf.spatial_scale = spatial_scale
-        self.set_cnn_layer(name, pooled_height, pooled_width, num_channels)
-
-
-@config_layer('data')
-class DataLayer(LayerBase):
-    def __init__(self,
-                 name,
-                 size,
-                 depth=None,
-                 height=None,
-                 width=None,
-                 device=None):
-        super(DataLayer, self).__init__(
-            name, 'data', size, inputs=[], device=device)
-        if height and width:
-            self.set_layer_height_width(height, width)
-        if depth:
-            self.set_layer_depth(depth)
-
-
-'''
-DataNormLayer: A layer for data normalization
-Input: One and only one input layer is accepted. The input layer must
-       be DataLayer with dense data type
-Output: The normalization of the input data
-
-Reference:
-    LA Shalabi, Z Shaaban, B Kasasbeh. Data mining: A preprocessing engine
-
-Example:
-    Layer(
-        name = "norm_input_layer",
-        type = "data_norm",
-        inputs = [Input("input_layer",
-                        parameter_name = "_slot0.stats")],
-        data_norm_strategy = "z-score",
-    )
-
-Note:
-  (1) The parameter has been calculated in the preprocessing stage,
-      and should be initialized by --init_model_path when training.
-  (2) Three data normalization methoeds are considered
-          z-score: y = (x-mean)/std
-          min-max: y = (x-min)/(max-min)
-          decimal-scaling: y = x/10^j, where j is the smallest integer such that max(|y|)<1
-'''
-
-
-@config_layer('data_norm')
-class DataNormLayer(LayerBase):
-    def __init__(self, name, inputs, data_norm_strategy="z-score", device=None):
-        super(DataNormLayer, self).__init__(
-            name, 'data_norm', 0, inputs=inputs, device=device)
-        self.config.data_norm_strategy = data_norm_strategy
-        config_assert(len(inputs) == 1, 'DataNormLayer must have 1 input')
-        input_layer = self.get_input_layer(0)
-        self.set_layer_size(input_layer.size)
-        para_size = 5 * input_layer.size
-        para_dims = [5, input_layer.size]
-        self.inputs[0].is_static = True
-        self.create_input_parameter(0, para_size, para_dims)
-
-
-@config_layer('prelu')
-class ParameterReluLayer(LayerBase):
-    layer_type = 'prelu'
-
-    def __init__(self, name, inputs, partial_sum=1, **args):
-        super(ParameterReluLayer, self).__init__(
-            name, self.layer_type, 0, inputs=inputs, **args)
-
-        input_layer = self.get_input_layer(0)
-        config_assert(len(self.inputs) == 1, "prelu layer has only one input.")
-        config_assert(input_layer.size % partial_sum == 0,
-                      "a wrong setting for partial_sum")
-
-        dims = [1, input_layer.size / partial_sum]
-        self.set_layer_size(input_layer.size)
-        self.config.partial_sum = partial_sum
-        self.create_input_parameter(0, input_layer.size / partial_sum, dims)
-
-        self.set_layer_height_width(self.get_input_layer(0).height, \
-                                        self.get_input_layer(0).width)
-        self.set_layer_depth(self.get_input_layer(0).depth)
-
-
-@config_layer('conv')
-class ConvLayerBase(LayerBase):
-    layer_type = 'conv'
-
-    def __init__(self,
-                 name,
-                 inputs=[],
-                 bias=True,
-                 num_filters=None,
-                 shared_biases=False,
-                 **xargs):
-        super(ConvLayerBase, self).__init__(
-            name, self.layer_type, 0, inputs=inputs, **xargs)
-
-        if num_filters is not None:
-            self.config.num_filters = num_filters
-
-        use_mkldnn = int(g_command_config_args.get("use_mkldnn", 0))
-        use_gpu = int(g_command_config_args.get("use_gpu", 0))
-        parallel_nn = int(g_command_config_args.get("parallel_nn", 0))
-
-        # Automatically select cudnn_type for GPU, exconv for CPU
-        # and mkldnn_conv for MKLDNN
-        # if set type=conv, but still reserve the way user specify
-        # exconv, mkldnn_conv or cudnn_conv manually.
-        if self.layer_type == "cudnn_conv":
-            config_assert(use_gpu, "cudnn_conv only support GPU")
-
-        if self.layer_type == "mkldnn_conv":
-            config_assert(use_mkldnn, "mkldnn_conv only support MKLDNN")
-
-        if (use_gpu == 1 and self.layer_type != "exconv" and
-                self.layer_type != "mkldnn_conv" and
-            (parallel_nn == 0 or self.config.device > -1)):
-            self.layer_type = "cudnn_conv"
-        else:
-            self.layer_type = "mkldnn_conv" if use_mkldnn else "exconv"
-        # need to specify layer in config
-        self.config.type = self.layer_type
-
-        if shared_biases is not None:
-            self.config.shared_biases = shared_biases
-
-        for input_index in xrange(len(self.inputs)):
-            input_layer = self.get_input_layer(input_index)
-            conv_conf = self.config.inputs[input_index].conv_conf
-            parse_conv(self.inputs[input_index].conv, input_layer.name,
-                       conv_conf, num_filters)
-            psize = self.calc_parameter_size(conv_conf)
-            self.create_input_parameter(input_index, psize)
-            self.set_cnn_layer(name, conv_conf.output_y, conv_conf.output_x,
-                               self.config.num_filters)
-
-        psize = self.config.size
-        if shared_biases:
-            psize = self.config.num_filters
-        self.create_bias_parameter(bias, psize, [psize, 1])
-
-    def calc_parameter_size(self, conv_conf):
-        return self.config.num_filters * conv_conf.filter_channels \
-               * (conv_conf.filter_size * conv_conf.filter_size_y)
-
-
-@config_layer('exconv')
-class ConvLayer(ConvLayerBase):
-    layer_type = 'exconv'
-
-
-@config_layer('mkldnn_conv')
-class ConvLayer(ConvLayerBase):
-    layer_type = 'mkldnn_conv'
-
-
-@config_layer('cudnn_conv')
-class ConvLayer(ConvLayerBase):
-    layer_type = 'cudnn_conv'
-
-
-@config_layer('convt')
-class ConvTransLayerBase(LayerBase):
-    layer_type = 'convt'
-
-    def __init__(self,
-                 name,
-                 inputs=[],
-                 bias=True,
-                 num_filters=None,
-                 shared_biases=False,
-                 **xargs):
-        super(ConvTransLayerBase, self).__init__(
-            name, self.layer_type, 0, inputs=inputs, **xargs)
-
-        if num_filters is not None:
-            self.config.num_filters = num_filters
-
-        use_gpu = int(g_command_config_args.get("use_gpu", 0))
-        parallel_nn = int(g_command_config_args.get("parallel_nn", 0))
-
-        # Automatically select cudnn_type for GPU and exconvt for CPU
-        # if set type=exconvt, but still reserve the way user specify
-        # exconvt or cudnn_convt manually.
-        if self.layer_type == "cudnn_convt":
-            config_assert(use_gpu, "cudnn_convt only support GPU")
-
-        if (use_gpu == 1 and self.layer_type != "exconvt" and
-            (parallel_nn == 0 or self.config.device > -1)):
-            self.layer_type = "cudnn_convt"
-        else:
-            self.layer_type = "exconvt"
-        # need to specify layer in config
-        self.config.type = self.layer_type
-
-        if shared_biases is not None:
-            self.config.shared_biases = shared_biases
-
-        for input_index in xrange(len(self.inputs)):
-            input_layer = self.get_input_layer(input_index)
-            parse_conv(
-                self.inputs[input_index].conv,
-                input_layer.name,
-                self.config.inputs[input_index].conv_conf,
-                num_filters,
-                trans=True)
-            conv_conf = self.config.inputs[input_index].conv_conf
-            psize = self.calc_parameter_size(conv_conf)
-            self.create_input_parameter(input_index, psize)
-            self.set_cnn_layer(name, conv_conf.img_size_y, conv_conf.img_size,
-                               self.config.num_filters)
-
-        psize = self.config.size
-        if shared_biases:
-            psize = self.config.num_filters
-        self.create_bias_parameter(bias, psize, [psize, 1])
-
-    def calc_parameter_size(self, conv_conf):
-        return conv_conf.channels * conv_conf.filter_channels \
-                    * (conv_conf.filter_size * conv_conf.filter_size_y)
-
-
-@config_layer('exconvt')
-class ConvTransLayer(ConvTransLayerBase):
-    layer_type = 'exconvt'
-
-
-@config_layer('cudnn_convt')
-class ConvTransLayer(ConvTransLayerBase):
-    layer_type = 'cudnn_convt'
-
-
-@config_layer('conv_3d')
-class Conv3DLayerBase(LayerBase):
-    def __init__(self,
-                 name,
-                 inputs=[],
-                 bias=True,
-                 num_filters=None,
-                 shared_biases=True,
-                 **xargs):
-        super(Conv3DLayerBase, self).__init__(
-            name, self.layer_type, 0, inputs=inputs, **xargs)
-
-        if num_filters is not None:
-            self.config.num_filters = num_filters
-
-        # need to specify layer in config
-        self.config.type = self.layer_type
-
-        trans = False
-        if self.config.type == "deconv3d":
-            trans = True
-
-        if shared_biases is not None:
-            self.config.shared_biases = shared_biases
-
-        for input_index in xrange(len(self.inputs)):
-            input_layer = self.get_input_layer(input_index)
-            conv_conf = self.config.inputs[input_index].conv_conf
-            parse_conv3d(
-                self.inputs[input_index].conv,
-                input_layer.name,
-                conv_conf,
-                num_filters,
-                trans=trans
-            )  # for z-axis pad:0, strid:1, filter_size:1, img_size:1
-            psize = self.calc_parameter_size(conv_conf)
-            self.create_input_parameter(input_index, psize)
-            if trans:
-                self.set_cnn_layer(name, conv_conf.img_size_z,
-                                   conv_conf.img_size_y, conv_conf.img_size,
-                                   self.config.num_filters)
-            else:
-                self.set_cnn_layer(name, conv_conf.output_z, conv_conf.output_y,
-                                   conv_conf.output_x, self.config.num_filters)
-
-        psize = self.config.size
-        if shared_biases:
-            psize = self.config.num_filters
-        self.create_bias_parameter(bias, psize, [psize, 1])
-
-    def calc_parameter_size(self, conv_conf):
-        return self.config.num_filters * conv_conf.filter_channels \
-               * (conv_conf.filter_size * conv_conf.filter_size_y \
-                  * conv_conf.filter_size_z)
-
-    def set_cnn_layer(self,
-                      input_layer_name,
-                      depth,
-                      height,
-                      width,
-                      channels,
-                      is_print=True):
-        size = depth * height * width * channels
-        self.set_layer_size(size)
-        self.set_layer_height_width(height, width)
-        self.set_layer_depth(depth)
-        if is_print:
-            print("output for %s: c = %d, d = %d, h = %d, w = %d, size = %d" %
-                  (input_layer_name, channels, depth, height, width, size))
-
-
-@config_layer('conv3d')
-class Conv3DLayer(Conv3DLayerBase):
-    layer_type = 'conv3d'
-
-
-@config_layer('deconv3d')
-class Conv3DLayer(Conv3DLayerBase):
-    layer_type = 'deconv3d'
-
-
-@config_layer('norm')
-class NormLayer(LayerBase):
-    def __init__(self, name, inputs, **xargs):
-        super(NormLayer, self).__init__(name, 'norm', 0, inputs=inputs, **xargs)
-        use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0)))
-        use_mkldnn = True if use_mkldnn and self.inputs[
-            0].norm.norm_type == 'cmrnorm-projection' else False
-        self.config.type = 'mkldnn_lrn' if use_mkldnn else self.config.type
-        for input_index in xrange(len(self.inputs)):
-            input_layer = self.get_input_layer(input_index)
-            norm_conf = self.config.inputs[input_index].norm_conf
-            parse_norm(self.inputs[input_index].norm, input_layer.name,
-                       norm_conf)
-            norm_conf.scale = self.inputs[
-                input_index].norm.scale if use_mkldnn else norm_conf.scale
-            self.set_cnn_layer(name, norm_conf.output_y, norm_conf.output_x,
-                               norm_conf.channels, False)
-            if norm_conf.norm_type == "cross-channel-norm":
-                self.create_input_parameter(0, norm_conf.channels,
-                                            [norm_conf.channels, 1])
-
-
-@config_layer('pool')
-class PoolLayer(LayerBase):
-    layer_type = 'pool'
-
-    def __init__(self, name, inputs, ceil_mode=True, exclude_mode=None,
-                 **xargs):
-        use_mkldnn = int(g_command_config_args.get("use_mkldnn", 0))
-        if self.layer_type == "mkldnn_pool":
-            config_assert(use_mkldnn, "mkldnn_pool only support MKLDNN")
-        self.layer_type = 'mkldnn_pool' if use_mkldnn else 'pool'
-        super(PoolLayer, self).__init__(
-            name, self.layer_type, 0, inputs=inputs, **xargs)
-        for input_index in xrange(len(self.inputs)):
-            input_layer = self.get_input_layer(input_index)
-            pool_conf = self.config.inputs[input_index].pool_conf
-            parse_pool(self.inputs[input_index].pool, input_layer.name,
-                       pool_conf, ceil_mode, exclude_mode)
-            self.set_cnn_layer(name, pool_conf.output_y, pool_conf.output_x,
-                               pool_conf.channels)
-
-
-@config_layer('mkldnn_pool')
-class MKLDNNPoolLayer(PoolLayer):
-    layer_type = 'mkldnn_pool'
-
-
-@config_layer('pool3d')
-class Pool3DLayer(LayerBase):
-    def __init__(self, name, inputs, ceil_mode=True, **xargs):
-        super(Pool3DLayer, self).__init__(
-            name, 'pool3d', 0, inputs=inputs, **xargs)
-        for input_index in xrange(len(self.inputs)):
-            input_layer = self.get_input_layer(input_index)
-            pool_conf = self.config.inputs[input_index].pool_conf
-            parse_pool3d(self.inputs[input_index].pool, input_layer.name,
-                         pool_conf, ceil_mode)
-            self.set_cnn_layer(name, pool_conf.output_z, pool_conf.output_y,
-                               pool_conf.output_x, pool_conf.channels)
-
-    def set_cnn_layer(self,
-                      input_layer_name,
-                      depth,
-                      height,
-                      width,
-                      channels,
-                      is_print=True):
-        size = depth * height * width * channels
-        self.set_layer_size(size)
-        self.set_layer_height_width(height, width)
-        self.set_layer_depth(depth)
-        if is_print:
-            print("output for %s: c = %d, d = %d, h = %d, w = %d, size = %d" %
-                  (input_layer_name, channels, depth, height, width, size))
-
-
-@config_layer('spp')
-class SpatialPyramidPoolLayer(LayerBase):
-    def __init__(self, name, inputs, **xargs):
-        super(SpatialPyramidPoolLayer, self).__init__(
-            name, 'spp', 0, inputs=inputs, **xargs)
-        for input_index in xrange(len(self.inputs)):
-            input_layer = self.get_input_layer(input_index)
-            spp_conf = self.config.inputs[input_index].spp_conf
-            parse_spp(self.inputs[input_index].spp, input_layer.name, spp_conf)
-            output_x = (pow(4, spp_conf.pyramid_height) - 1) / (4 - 1)
-            self.set_cnn_layer(name, 1, output_x, spp_conf.image_conf.channels)
-
-
-@config_layer('upsample')
-class UpsampleLayer(LayerBase):
-    def __init__(self, name, inputs, **xargs):
-        super(UpsampleLayer, self).__init__(
-            name, 'upsample', 0, inputs=inputs, **xargs)
-
-        input_layer = self.get_input_layer(0)
-        image_conf = self.config.inputs[0].upsample_conf.image_conf
-        image_conf.img_size = input_layer.width
-        image_conf.img_size_y = input_layer.height
-        image_conf.channels = input_layer.size / (input_layer.width *
-                                                  input_layer.height)
-
-        upsample = self.inputs[0].upsample
-        output_x = 0
-        output_y = 0
-        output_size = 0
-
-        if upsample.scale:
-            self.config.inputs[0].upsample_conf.scale = upsample.scale
-            self.config.inputs[0].upsample_conf.scale_y = upsample.scale_y
-            output_x = input_layer.width * upsample.scale
-            output_y = input_layer.height * upsample.scale_y
-        self.config.inputs[0].upsample_conf.pad_out_x = upsample.pad_out_x
-        self.config.inputs[0].upsample_conf.pad_out_y = upsample.pad_out_y
-        if upsample.upsample_size:
-            self.config.inputs[
-                0].upsample_conf.upsample_size = upsample.upsample_size
-            self.config.inputs[
-                0].upsample_conf.upsample_size_y = upsample.upsample_size_y
-            output_x = upsample.upsample_size
-            output_y = upsample.upsample_size_y
-
-        output_size = image_conf.channels * output_x * output_y
-
-        self.set_layer_height_width(output_y, output_x)
-        self.set_layer_depth(input_layer.depth)
-        self.set_layer_size(output_size)
-
-
-@config_layer('pad')
-class PadLayer(LayerBase):
-    def __init__(self, name, inputs, **xargs):
-        super(PadLayer, self).__init__(name, 'pad', 0, inputs=inputs, **xargs)
-        pad = self.inputs[0].pad
-        self.config.inputs[0].pad_conf.pad_c.extend(pad.pad_c)
-        self.config.inputs[0].pad_conf.pad_h.extend(pad.pad_h)
-        self.config.inputs[0].pad_conf.pad_w.extend(pad.pad_w)
-
-        input_layer = self.get_input_layer(0)
-        image_conf = self.config.inputs[0].pad_conf.image_conf
-        parse_image(pad, input_layer.name, image_conf)
-        out_ch = pad.channels + pad.pad_c[0] + pad.pad_c[1]
-        out_h = image_conf.img_size_y + pad.pad_h[0] + pad.pad_h[1]
-        out_w = image_conf.img_size + pad.pad_w[0] + pad.pad_w[1]
-        self.set_cnn_layer(name, out_h, out_w, out_ch)
-        self.config.size = out_ch * out_h * out_w
-
-
-@config_layer('crop')
-class CropLayer(LayerBase):
-    def __init__(self, name, inputs, axis, offset, shape, **xargs):
-        super(CropLayer, self).__init__(name, 'crop', 0, inputs=inputs, **xargs)
-        self.config.axis = axis
-        self.config.offset.extend(offset)
-        self.config.shape.extend(shape)
-
-        # get channel, width and height from input_0 layer
-        input_layer = self.get_input_layer(0)
-        image_conf = self.config.inputs[0].image_conf
-        image_conf.img_size = input_layer.width
-        image_conf.img_size_y = input_layer.height
-        image_conf.channels = input_layer.size / (input_layer.width *
-                                                  input_layer.height)
-        # only support for 4-dims inputs and NCHW order
-        if (len(self.config.inputs) == 2):
-            self.set_layer_height_width(
-                self.get_input_layer(1).height, self.get_input_layer(1).width)
-            self.set_layer_size(self.get_input_layer(1).size)
-        else:
-            self.set_layer_height_width(shape[-2], shape[-1])
-            self.set_layer_size(reduce(lambda x, y: x * y, shape[1:]))
-
-
-@config_layer('batch_norm')
-class BatchNormLayer(LayerBase):
-    layer_type = 'batch_norm'
-
-    def __init__(self,
-                 name,
-                 inputs,
-                 bias=True,
-                 img3D=False,
-                 use_global_stats=True,
-                 epsilon=1e-5,
-                 moving_average_fraction=0.9,
-                 batch_norm_type=None,
-                 mean_var_names=None,
-                 **xargs):
-        if inputs is None:
-            inputs = []
-        elif not isinstance(inputs, list):
-            inputs = [inputs]
-        config_assert(
-            len(inputs) == 1, "BatchNormLayer must have one and only one input")
-        # Create Input for moving mean and std,
-        # in batch normalization layer.
-        # These paras no need to update, so set is_static is true.
-        # If not use is_static, even set learning_rate = 0, decay_rate = 0,
-        # these paras will change if set average_window in configure.
-        use_gpu = bool(int(g_command_config_args.get("use_gpu", 0)))
-        use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0)))
-        is_shared = True if not use_gpu else False
-        for i in xrange(2):
-            inputs.append(
-                Input(
-                    inputs[0].input_layer_name,
-                    initial_std=0.0,
-                    initial_mean=0.0,
-                    is_static=True,
-                    is_shared=is_shared,
-                    make_layer_name_in_submodel=False, ))
-
-        parallel_nn = bool(int(g_command_config_args.get("parallel_nn", 0)))
-        cudnn_version = int(g_command_config_args.get("cudnn_version", 0))
-        # Automatically select cudnn_batch_norm for GPU, batch_norm for CPU
-        # and mkldnn_batch_norm for MKLDNN. Also based on cudnn version.
-        if batch_norm_type == "mkldnn_batch_norm":
-            config_assert(use_mkldnn, "mkldnn_batch_norm only support MKLDNN")
-        use_cudnn = use_gpu and batch_norm_type != "batch_norm" and \
-                not use_mkldnn and batch_norm_type != "mkldnn_batch_norm" and \
-                ((not parallel_nn) or self.config.device > -1)
-        if use_cudnn:
-            self.layer_type = "cudnn_batch_norm"
-        else:
-            self.layer_type = "mkldnn_batch_norm" if use_mkldnn else "batch_norm"
-        super(BatchNormLayer, self).__init__(
-            name, self.layer_type, 0, inputs=inputs, **xargs)
-
-        if use_global_stats is not None:
-            self.config.use_global_stats = use_global_stats
-        if moving_average_fraction is not None:
-            self.config.moving_average_fraction = moving_average_fraction
-        if epsilon is not None:
-            assert epsilon >= 1e-5, "epsilon must be no less than 1e-5."
-            self.config.epsilon = epsilon
-
-        input_layer = self.get_input_layer(0)
-        image_conf = self.config.inputs[0].image_conf
-        if img3D:
-            parse_image3d(self.inputs[0].image, input_layer.name, image_conf)
-            # Only pass the width and height of input to batch_norm layer
-            # when either of it is non-zero.
-            if input_layer.width != 0 or input_layer.height != 0:
-                self.set_cnn_layer(
-                    input_layer_name=name,
-                    depth=image_conf.img_size_z,
-                    height=image_conf.img_size_y,
-                    width=image_conf.img_size,
-                    channels=image_conf.channels,
-                    is_print=True)
-            else:
-                self.set_layer_size(input_layer.size)
-        else:
-            parse_image(self.inputs[0].image, input_layer.name, image_conf)
-            # Only pass the width and height of input to batch_norm layer
-            # when either of it is non-zero.
-            if input_layer.width != 0 or input_layer.height != 0:
-                self.set_cnn_layer(
-                    input_layer_name=name,
-                    height=image_conf.img_size_y,
-                    width=image_conf.img_size,
-                    channels=image_conf.channels,
-                    is_print=True)
-            else:
-                self.set_layer_size(input_layer.size)
-
-        psize = self.calc_parameter_size(image_conf)
-        dims = [1, psize]
-        if mean_var_names is not None:
-            assert len(mean_var_names) == 2
-            self.inputs[1].parameter_name = mean_var_names[0]
-            self.inputs[2].parameter_name = mean_var_names[1]
-
-        self.create_input_parameter(0, psize)
-        self.create_input_parameter(1, psize, dims)
-        self.create_input_parameter(2, psize, dims)
-
-        self.create_bias_parameter(bias, psize)
-
-    def set_cnn_layer(self,
-                      input_layer_name,
-                      depth=None,
-                      height=None,
-                      width=None,
-                      channels=None,
-                      is_print=True):
-        depthIsNone = False
-        if depth is None:
-            depth = 1
-            depthIsNone = True
-        size = depth * height * width * channels
-        self.set_layer_size(size)
-        self.set_layer_height_width(height, width)
-        self.set_layer_depth(depth)
-        if is_print and depthIsNone:
-            print("output for %s: c = %d, h = %d, w = %d, size = %d" %
-                  (input_layer_name, channels, height, width, size))
-        elif is_print:
-            print("output for %s: c = %d, d = %d, h = %d, w = %d, size = %d" %
-                  (input_layer_name, channels, depth, height, width, size))
-
-    def calc_parameter_size(self, image_conf):
-        return image_conf.channels
-
-
-@config_layer('trans')
-class TransLayer(LayerBase):
-    def __init__(self, name, inputs, **xargs):
-        super(TransLayer, self).__init__(
-            name, 'trans', 0, inputs=inputs, **xargs)
-        config_assert(
-            len(self.inputs) == 1,
-            'TransLayer must have one and only one input')
-        self.set_layer_size(self.get_input_layer(0).size)
-
-
-@config_layer('resize')
-class ResizeLayer(LayerBase):
-    def __init__(self, name, size, inputs, **xargs):
-        super(ResizeLayer, self).__init__(
-            name, 'resize', size=size, inputs=inputs, **xargs)
-        config_assert(
-            len(self.inputs) == 1,
-            'ResizeLayer must have one and only one input')
-
-
-@config_layer('rotate')
-class RotateLayer(LayerBase):
-    def __init__(self, name, inputs, height, width, device=None):
-        super(RotateLayer, self).__init__(
-            name, 'rotate', 0, inputs=inputs, device=device)
-        config_assert(
-            len(self.inputs) == 1,
-            'RotateLayer must have one and only one input')
-        self.set_layer_height_width(height, width)
-        self.set_layer_size(self.get_input_layer(0).size)
-
-
-@config_layer('blockexpand')
-class BlockExpandLayer(LayerBase):
-    def __init__(self, name, inputs, **xargs):
-        super(BlockExpandLayer, self).__init__(
-            name, 'blockexpand', 0, inputs=inputs, **xargs)
-        for input_index in xrange(len(self.inputs)):
-            input_layer = self.get_input_layer(input_index)
-            parse_block_expand(
-                self.inputs[input_index].block_expand, input_layer.name,
-                self.config.inputs[input_index].block_expand_conf)
-            block_expand_conf = self.config.inputs[
-                input_index].block_expand_conf
-            self.set_layer_size(block_expand_conf.block_x *
-                                block_expand_conf.block_y *
-                                block_expand_conf.channels)
-
-
-@config_layer('maxout')
-class MaxOutLayer(LayerBase):
-    def __init__(self, name, inputs, **xargs):
-        super(MaxOutLayer, self).__init__(
-            name, 'maxout', 0, inputs=inputs, **xargs)
-        input_layer = self.get_input_layer(0)
-        maxout_conf = self.config.inputs[0].maxout_conf
-        parse_maxout(self.inputs[0].maxout, input_layer.name, maxout_conf)
-        out_channels = maxout_conf.image_conf.channels / maxout_conf.groups
-        self.set_cnn_layer(name, maxout_conf.image_conf.img_size_y,
-                           maxout_conf.image_conf.img_size, out_channels)
-
-
-@config_layer('row_conv')
-class RowConvLayer(LayerBase):
-    def __init__(self, name, inputs, context_length, **xargs):
-        super(RowConvLayer, self).__init__(
-            name, 'row_conv', 0, inputs=inputs, **xargs)
-        config_assert(
-            len(self.inputs) == 1,
-            'row convolution layer must have one and only one input.')
-        input_layer = self.get_input_layer(0)
-        row_conv_conf = self.config.inputs[0].row_conv_conf
-        row_conv_conf.context_length = context_length
-        self.set_layer_size(input_layer.size)
-        psize = context_length * input_layer.size
-        dims = [context_length, input_layer.size]
-        self.create_input_parameter(0, psize, dims)
-
-
-@config_layer('clip')
-class ClipLayer(LayerBase):
-    def __init__(self, name, inputs, min, max, **xargs):
-        super(ClipLayer, self).__init__(name, 'clip', 0, inputs=inputs, **xargs)
-        config_assert(
-            len(self.inputs) == 1,
-            'ClipLayer must have one and only one input.')
-        config_assert(min < max, 'min must be less than max.')
-        input_layer = self.get_input_layer(0)
-        self.set_layer_size(input_layer.size)
-        self.config.inputs[0].clip_conf.min = min
-        self.config.inputs[0].clip_conf.max = max
-
-
-@config_layer('scale_shift')
-class ScaleShiftLayer(LayerBase):
-    def __init__(self, name, inputs, bias=True, **xargs):
-        super(ScaleShiftLayer, self).__init__(
-            name, 'scale_shift', 0, inputs=inputs, **xargs)
-        config_assert(
-            len(self.inputs) == 1,
-            'ScaleShiftLayer must have one and only one input.')
-        input_layer = self.get_input_layer(0)
-        self.set_layer_size(input_layer.size)
-        self.create_input_parameter(0, 1, [1, 1])
-        self.create_bias_parameter(bias, 1)
-
-
-# key: cost type
-# value: cost class
-g_cost_map = {}
-
-
-# define a cost layer without any parameters
-def define_cost(class_name, cost_type):
-    def init(cls, name, inputs, device=None, coeff=1.):
-        super(type(cls), cls).__init__(
-            name, cost_type, 1, inputs, device=device, coeff=coeff)
-
-    cls = type(class_name, (LayerBase, ), dict(__init__=init))
-    global g_cost_map
-    g_cost_map[cost_type] = cls
-
-
-define_cost('MultiClassCrossEntropy', 'multi-class-cross-entropy')
-define_cost('CrossEntropyOverBeamCostLayer', 'cross_entropy_over_beam')
-define_cost('RankingCost', 'rank-cost')
-define_cost('AucValidation', 'auc-validation')
-define_cost('PnpairValidation', 'pnpair-validation')
-define_cost('SumOfSquaresCostLayer', 'square_error')
-define_cost('MultiBinaryLabelCrossEntropy', 'multi_binary_label_cross_entropy')
-define_cost('SoftBinaryClassCrossEntropy', 'soft_binary_class_cross_entropy')
-define_cost('HuberTwoClassification', 'huber_classification')
-define_cost('SumCost', 'sum_cost')
-define_cost('SmoothL1Cost', 'smooth_l1')
-
-
-@config_layer('hsigmoid')
-class HierarchicalSigmoidLayer(LayerBase):
-    def __init__(self, name, num_classes, inputs, device=None, bias=True):
-        super(HierarchicalSigmoidLayer, self).__init__(
-            name, 'hsigmoid', 1, inputs=inputs, device=device)
-        config_assert(
-            len(self.inputs) >= 2,
-            'HierarchicalSigmoidLayer must have at least 2 inputs')
-        self.config.num_classes = num_classes
-        for input_index in xrange(len(self.inputs) - 1):
-            input_layer = self.get_input_layer(input_index)
-            psize = (num_classes - 1) * input_layer.size
-            dims = [num_classes - 1, input_layer.size]
-            self.create_input_parameter(input_index, psize, dims)
-        self.create_bias_parameter(bias, num_classes - 1)
-
-
-'''
-lambdaCost for lambdaRank LTR approach
-
-Usage:
-  Example: Layer(name = "cost", type = "lambda_cost", NDCG_num = 8,
-             max_sort_size = -1, inputs = ["output", "score"])
-
-  Input data: Samples of the same query should be loaded as a sequence,
-          by PyDataProvider etc.. User should provide
-          scores for each sample. The score slot should be the 2nd
-          input of lambdaRank layer.
-
-  NDCG_num = the size of NDCG, e.g., 5 for NDCG@5.
-    Note: NDCG_num must be less than or equal to the minimum
-          size of lists.
-
-  max_sort_size = the size of partial sorting in calculating gradient.
-    Note: If max_sort_size = -1, then for each list, the algorithm will
-          sort the entire list to get gradient.
-          In other cases, max_sort_size must be greater than or equal
-          to NDCG_num.
-          max_sort_size can be greater than the size of a list, in which
-          case the algorithm will sort the entire list to get gradient.
-'''
-
-
-@config_layer('lambda_cost')
-class LambdaCost(LayerBase):
-    def __init__(self, name, inputs, NDCG_num=5, max_sort_size=-1, device=None):
-        super(LambdaCost, self).__init__(
-            name, 'lambda_cost', 1, inputs=inputs, device=device)
-        config_assert(len(self.inputs) == 2, 'lambdaCost must have 2 inputs')
-        self.config.NDCG_num = NDCG_num
-        if max_sort_size != -1:
-            config_assert(
-                NDCG_num <= max_sort_size,
-                'NDCG_num must be less than or equal to max_sort_size')
-        self.config.max_sort_size = max_sort_size
-
-
-@config_layer('huber_regression')
-class HuberRegressionLoss(LayerBase):
-    def __init__(self, name, inputs, delta=1., coeff=1., device=None):
-        super(HuberRegressionLoss, self).__init__(
-            name, 'huber_regression', 1, inputs=inputs, device=device)
-        config_assert(
-            len(self.inputs) == 2, 'HuberRegression must have 2 inputs')
-        self.config.delta = delta
-        self.config.coeff = coeff
-
-
-@config_layer('nce')
-class NCELayer(LayerBase):
-    def __init__(self,
-                 name,
-                 num_classes,
-                 inputs,
-                 num_neg_samples=10,
-                 neg_sampling_dist=None,
-                 bias=True,
-                 **xargs):
-        super(NCELayer, self).__init__(name, 'nce', 1, inputs=inputs, **xargs)
-        config_assert(
-            len(self.inputs) >= 2, 'NCELayer must have at least 2 inputs')
-        self.config.num_classes = num_classes
-        if neg_sampling_dist is not None:
-            config_assert(
-                len(neg_sampling_dist) == num_classes,
-                'len(neg_sampling_dist)(%s) is not same as num_classes (%s)' %
-                (len(neg_sampling_dist), num_classes))
-            s = sum(neg_sampling_dist)
-            config_assert(
-                abs(s - 1) < 1e-5,
-                'The sum of neg_sampling_dist (%s) is not 1' % s)
-
-            self.config.neg_sampling_dist.extend(neg_sampling_dist)
-
-        self.config.num_neg_samples = num_neg_samples
-        num_real_inputs = len(self.inputs) - 1
-        input_layer = self.get_input_layer(num_real_inputs)
-        config_assert(input_layer.type == 'data',
-                      'Expecting the last input layer of an nce layer to be '
-                      'a data layer')
-
-        if (num_real_inputs > 1 and input_layer.size == 1 and
-                self.get_input_layer(num_real_inputs - 1).type == 'data'):
-            # This input layer is assumed to be a sample weight layer
-            num_real_inputs -= 1
-
-        for input_index in xrange(num_real_inputs):
-            input_layer = self.get_input_layer(input_index)
-            psize = num_classes * input_layer.size
-            dims = [num_classes, input_layer.size]
-            self.create_input_parameter(input_index, psize, dims)
-        self.create_bias_parameter(bias, num_classes)
-
-
-@config_layer('addto')
-class AddToLayer(LayerBase):
-    layer_type = 'addto'
-
-    def __init__(self, name, inputs, bias=True, **xargs):
-        use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0)))
-        if self.layer_type == "mkldnn_addto":
-            config_assert(use_mkldnn, "mkldnn_addto only support MKLDNN")
-        self.layer_type = 'mkldnn_addto' if use_mkldnn else 'addto'
-        super(AddToLayer, self).__init__(
-            name, self.layer_type, 0, inputs=inputs, **xargs)
-        config_assert(len(inputs) > 0, 'inputs cannot be empty for AddToLayer')
-
-        layer_size = self.get_input_layer(0).size
-        # To reserve heght, width, depth.
-        layer_with_hwc = self.get_input_layer(0)
-        for input_index in xrange(len(self.inputs)):
-            input_layer = self.get_input_layer(input_index)
-            assert layer_size == input_layer.size
-            if input_layer.height and input_layer.height and input_layer.height:
-                layer_with_hwc = input_layer
-
-        self.set_layer_size(layer_with_hwc.size)
-        self.set_layer_height_width(layer_with_hwc.height, layer_with_hwc.width)
-        self.set_layer_depth(layer_with_hwc.depth)
-        self.create_bias_parameter(bias, self.config.size)
-
-
-@config_layer('mkldnn_addto')
-class MKLDNNAddtoLayer(AddToLayer):
-    layer_type = 'mkldnn_addto'
-
-
-@config_layer('agent')
-class AgentLayer(LayerBase):
-    def __init__(self, name, size, device=None):
-        super(AgentLayer, self).__init__(
-            name, 'agent', size, inputs=[], device=device)
-
-
-@config_layer('gather_agent')
-class GatherAgentLayer(LayerBase):
-    def __init__(self, name, size, device=None):
-        super(GatherAgentLayer, self).__init__(
-            name, 'gather_agent', size, inputs=[], device=device)
-
-
-@config_layer('scatter_agent')
-class ScatterAgentLayer(LayerBase):
-    def __init__(self, name, size, width=None, height=None, device=None):
-        super(ScatterAgentLayer, self).__init__(
-            name, 'scatter_agent', size, inputs=[], device=device)
-        if height and width:
-            self.set_layer_height_width(height, width)
-
-
-@config_layer('multiplex')
-class MultiplexLayer(LayerBase):
-    def __init__(self, name, inputs, size, device=None):
-        super(MultiplexLayer, self).__init__(
-            name, 'multiplex', size, inputs=inputs, device=device)
-        config_assert(
-            len(inputs) > 2, 'MultiplexLayer should have more than 2 inputs.')
-        for i in range(1, len(inputs)):
-            config_assert(
-                self.get_input_layer(i).size == size,
-                "All the input layers except the first one should"
-                "have the same size as the MultiplexLayer.")
-
-
-@config_func
-def Link(name, has_subseq=False):
-    """
-    Still keeping has_subseq for backward compatibility
-    """
-    link_config = LinkConfig()
-    link_config.link_name = name
-    return link_config
-
-
-# memory for recurrent layer group.
-# *name* and *size* are actual layer's name and size.
-# If *name* is None, need to provide *memory_name* and need to use
-# SetMemoryInput() later to specify the layer which this memory remembers.
-#
-# return the name of the memory,
-# use this name if you assign the memory as other layer's input
-#
-# boot frame of memory is zeroed by default,
-# or initialize by boot layer output if *boot_layer* set,
-# or initialize by trainable bias if *boot_bias* set,
-# or initialize by a constant id if *boot_with_const_id* set
-#
-# Memory can be a sequence if *is_sequence* set, this type of memory
-# can only be initailized by a *boot_layer* which is a sequence.
-#
-@config_func
-def Memory(name,
-           size,
-           is_sequence=False,
-           boot_layer=None,
-           boot_bias=False,
-           boot_bias_active_type="",
-           boot_with_const_id=None,
-           memory_name=None):
-    if not memory_name:
-        config_assert(name is not None, "name needs cannot be None")
-        memory_name = name + "+delay1"
-    agent_name = memory_name
-    agent_layer = AgentLayer(agent_name, size)
-    config_assert(g_current_submodel.is_recurrent_layer_group,
-                  'Memory should be used in recurrent layer group only')
-    memory = g_current_submodel.memories.add()
-    if name is not None:
-        memory.layer_name = MakeLayerNameInSubmodel(name)
-    memory.link_name = MakeLayerNameInSubmodel(agent_name)
-    options = sum((boot_layer is not None, bool(boot_bias),
-                   boot_with_const_id is not None))
-    config_assert(
-        options <= 1,
-        'take one option at most from boot_layer, boot_bias, or boot_with_const_id'
-    )
-    if boot_layer is not None:
-        boot_layer = MakeLayerNameInParentSubmodel(boot_layer)
-        config_assert(boot_layer in g_layer_map,
-                      'boot_layer "%s" does not correspond to a layer name' %
-                      boot_layer)
-        memory.boot_layer_name = boot_layer
-    elif boot_bias:
-        memory.boot_bias_parameter_name = agent_layer.create_bias_parameter(
-            boot_bias, size, for_self=False)
-        memory.boot_bias_active_type = boot_bias_active_type
-    elif boot_with_const_id is not None:
-        memory.boot_with_const_id = boot_with_const_id
-    return agent_name
-
-
-@config_func
-def SetMemoryInput(memory_name, layer_name):
-    memory_name = MakeLayerNameInSubmodel(memory_name)
-    layer_name = MakeLayerNameInSubmodel(layer_name)
-    for mem in g_current_submodel.memories:
-        if mem.link_name == memory_name:
-            mem.layer_name = layer_name
-            return
-    logger.fatal("Nonexistent memory name: " + memory_name)
-
-
-# Generator for recurrent layer group, to use it:
-#  1. define a id layer as output of layer group
-#  2. define a memory of this id layer, and assign a boot id(begin of sequence)
-#  3. define a eos check layer and fill its name in generator's *eos_layer_name*
-# Sequence generation will stop when eos check return 1 or *max_num_frames* reached.
-# If *beam_size* is greater than one, generator will use beam search.
-#   in beam search, if *num_results_per_sample* set, one sample sequence can output
-#   multiple results each with a probility.
-@config_func
-def Generator(
-        max_num_frames,
-        eos_layer_name="eos_check",
-        num_results_per_sample=1,
-        beam_size=1,
-        log_prob=None, ):
-    generator_config = GeneratorConfig()
-    generator_config.max_num_frames = max_num_frames
-    generator_config.eos_layer_name = eos_layer_name
-    generator_config.num_results_per_sample = num_results_per_sample
-    generator_config.beam_size = beam_size
-    if log_prob is not None:
-        generator_config.log_prob = log_prob
-    return generator_config
-
-
-@config_layer('expand')
-class ExpandLayer(LayerBase):
-    def __init__(self, name, inputs, trans_type='non-seq', bias=False, **xargs):
-        super(ExpandLayer, self).__init__(
-            name, 'expand', 0, inputs=inputs, **xargs)
-        config_assert(
-            len(self.inputs) == 2, 'ExpandLayer takes 2 and only 2 inputs')
-        self.config.trans_type = trans_type
-        for input_index in xrange(len(self.inputs)):
-            input_layer = self.get_input_layer(input_index)
-        self.set_layer_size(self.get_input_layer(0).size)
-        self.create_bias_parameter(bias, self.config.size)
-
-
-@config_layer('featmap_expand')
-class FeatMapExpandLayer(LayerBase):
-    def __init__(self,
-                 name,
-                 inputs,
-                 num_filters=None,
-                 as_row_vector=True,
-                 bias=False,
-                 **xargs):
-        super(FeatMapExpandLayer, self).__init__(
-            name, 'featmap_expand', 0, inputs=inputs, **xargs)
-        config_assert(
-            len(self.inputs) == 1, 'ExpandLayer takes 1 and only 1 inputs')
-        if num_filters is not None:
-            self.config.num_filters = num_filters
-        else:
-            logger.fatal("FeatMapExpandLayer must specify num_filters.")
-        if not as_row_vector:
-            self.config.user_arg = "as_col_vec"
-        self.set_layer_size(self.get_input_layer(0).size * num_filters)
-
-
-@config_layer('max')
-class MaxLayer(LayerBase):
-    def __init__(self,
-                 name,
-                 inputs,
-                 trans_type='non-seq',
-                 bias=False,
-                 output_max_index=None,
-                 stride=-1,
-                 **xargs):
-        super(MaxLayer, self).__init__(name, 'max', 0, inputs=inputs, **xargs)
-        config_assert(len(self.inputs) == 1, 'MaxLayer must have 1 input')
-        if trans_type == 'seq':
-            config_assert(stride == -1, 'subseq does not support stride window')
-        self.config.trans_type = trans_type
-        self.config.seq_pool_stride = stride
-        for input_index in xrange(len(self.inputs)):
-            input_layer = self.get_input_layer(input_index)
-            self.set_layer_size(input_layer.size)
-        self.create_bias_parameter(bias, self.config.size)
-        if output_max_index is not None:
-            self.config.output_max_index = output_max_index
-
-
-@config_layer('maxid')
-class MaxIdLayer(LayerBase):
-    def __init__(self, name, inputs, beam_size=None, device=None):
-        super(MaxIdLayer, self).__init__(
-            name, 'maxid', 0, inputs=inputs, device=device)
-        config_assert(len(self.inputs) == 1, 'MaxIdLayer must have 1 input')
-        for input_index in xrange(len(self.inputs)):
-            input_layer = self.get_input_layer(input_index)
-            self.set_layer_size(input_layer.size)
-
-        if beam_size is None:
-            global g_current_submodel
-            if g_current_submodel.HasField("generator"):
-                self.config.beam_size = g_current_submodel.generator.beam_size
-        else:
-            self.config.beam_size = beam_size
-
-
-@config_layer('eos_id')
-class EosIdLayer(LayerBase):
-    def __init__(self, name, inputs, eos_id, device=None):
-        super(EosIdLayer, self).__init__(
-            name, 'eos_id', 0, inputs=inputs, device=device)
-        config_assert(len(self.inputs) == 1, 'EosIdLayer must have 1 input')
-        self.set_layer_size(2)  # boolean output
-        self.config.eos_id = eos_id
-
-
-@config_layer('seqlastins')
-class SequenceLastInstanceLayer(LayerBase):
-    def __init__(self,
-                 name,
-                 inputs,
-                 trans_type='non-seq',
-                 bias=False,
-                 stride=-1,
-                 **xargs):
-        super(SequenceLastInstanceLayer, self).__init__(
-            name, 'seqlastins', 0, inputs=inputs, **xargs)
-        config_assert(
-            len(inputs) == 1, 'SequenceLastInstanceLayer must have 1 input')
-        if trans_type == 'seq':
-            config_assert(stride == -1, 'subseq does not support stride window')
-        self.config.trans_type = trans_type
-        self.config.seq_pool_stride = stride
-        self.set_layer_size(self.get_input_layer(0).size)
-        self.create_bias_parameter(bias, self.config.size)
-
-
-@config_layer('seqfirstins')
-class SequenceFirstInstanceLayer(SequenceLastInstanceLayer):
-    def __init__(self,
-                 name,
-                 inputs,
-                 trans_type='non-seq',
-                 bias=False,
-                 stride=-1,
-                 **xargs):
-        super(SequenceFirstInstanceLayer, self).__init__(
-            name,
-            inputs=inputs,
-            trans_type=trans_type,
-            bias=bias,
-            stride=stride,
-            **xargs)
-        self.config.select_first = True
-
-
-@config_layer('seqconcat')
-class SequenceConcatLayer(LayerBase):
-    def __init__(self, name, inputs, bias=False, **xargs):
-        super(SequenceConcatLayer, self).__init__(
-            name, 'seqconcat', 0, inputs=inputs, **xargs)
-        config_assert(
-            len(inputs) == 2, 'SequenceConcatLayer must have 2 inputs')
-        for input_index in xrange(len(self.inputs)):
-            input_layer = self.get_input_layer(input_index)
-            self.set_layer_size(input_layer.size)
-        self.create_bias_parameter(bias, self.config.size)
-
-
-@config_layer('seqreshape')
-class SequenceReshapeLayer(LayerBase):
-    def __init__(self, name, size, inputs, bias=False, **xargs):
-        super(SequenceReshapeLayer, self).__init__(
-            name, 'seqreshape', size, inputs=inputs, **xargs)
-        config_assert(
-            len(inputs) == 1, 'SequenceReshapeLayer must have 1 inputs')
-        self.set_layer_size(size)
-        self.create_bias_parameter(bias, size)
-
-
-@config_layer('subseq')
-class SubSequenceLayer(LayerBase):
-    def __init__(self, name, inputs, bias=False, **xargs):
-        super(SubSequenceLayer, self).__init__(
-            name, 'subseq', 0, inputs=inputs, **xargs)
-        config_assert(len(inputs) == 3, 'SubSequenceLayer must have 3 inputs')
-        input_layer0 = self.get_input_layer(0)
-        size = input_layer0.size
-        self.set_layer_size(size)
-        self.create_bias_parameter(bias, size)
-
-
-@config_layer('seq_slice')
-class SeqSliceLayer(LayerBase):
-    def __init__(self, name, inputs, starts, ends, bias=False, **xargs):
-        if isinstance(inputs, list):
-            assert len(inputs) == 1, ('the first input of sequence slice layer '
-                                      'is a single sequence input.')
-        else:
-            inputs = [inputs]
-
-        if starts is not None:
-            if isinstance(starts, list):
-                assert len(starts) == 1, (
-                    'the start indices for sequence slice layer cannot '
-                    'be a list having more than one element.')
-                starts = starts[0]
-            inputs.append(starts)
-
-        if ends is not None:
-            if isinstance(ends, list):
-                assert len(ends) == 1, (
-                    'the end indices for sequence slice layer cannot '
-                    'be a list having more than one element.')
-                ends = ends[0]
-            inputs.append(ends)
-        assert len(inputs) >= 2, (
-            'the sequence slice layer has at least two inputs.')
-
-        super(SeqSliceLayer, self).__init__(
-            name, 'seq_slice', 0, inputs=inputs, **xargs)
-
-        input_layer0 = self.get_input_layer(0)
-        size = input_layer0.size
-        self.set_layer_size(size)
-
-        if len(inputs) == 3:
-            assert (
-                self.get_input_layer(1).size == self.get_input_layer(2).size), (
-                    'If start and end indices are both given to'
-                    'sequence slice layer, they should have the same width.')
-        elif len(inputs) == 2:
-            self.config.select_first = (starts is not None)
-
-
-@config_layer('sub_nested_seq')
-class SubNestedSequenceLayer(LayerBase):
-    def __init__(self, name, inputs, selected_indices, bias=False, **xargs):
-        if isinstance(inputs, list):
-            assert len(inputs) == 1, ('the first input of sub_nested_seq '
-                                      'layer is a single nested sequence.')
-            inputs = inputs[0]
-        if isinstance(selected_indices, list):
-            assert len(selected_indices) == 1, (
-                'the second input of '
-                'sub_nested_seq layer is a single layer which is a '
-                'set of selected indices.')
-            selected_indices = selected_indices[0]
-
-        super(SubNestedSequenceLayer, self).__init__(
-            name,
-            'sub_nested_seq',
-            0,
-            inputs=[inputs, selected_indices],
-            **xargs)
-        input_layer0 = self.get_input_layer(0)
-        size = input_layer0.size
-        self.set_layer_size(size)
-
-
-@config_layer('dot_prod')
-class DotProdLayer(LayerBase):
-    def __init__(self, name, inputs, device=None):
-        super(DotProdLayer, self).__init__(
-            name, 'dot_prod', 0, inputs, device=device)
-        config_assert(len(inputs) == 2, 'DotProdLayer must have 2 inputs.')
-        config_assert(
-            self.get_input_layer(0).size == self.get_input_layer(1).size,
-            "Two inputs should have the same size.")
-        self.set_layer_size(1)
-
-
-@config_layer('out_prod')
-class OuterProdLayer(LayerBase):
-    def __init__(self, name, inputs, device=None):
-        super(OuterProdLayer, self).__init__(
-            name, 'out_prod', 0, inputs=inputs, device=device)
-        config_assert(len(inputs) == 2, 'OuterProdLayer must have 2 inputs')
-        input_layer0 = self.get_input_layer(0)
-        input_layer1 = self.get_input_layer(1)
-        self.set_layer_size(input_layer0.size * input_layer1.size)
-
-
-@config_layer('power')
-class PowerLayer(LayerBase):
-    def __init__(self, name, inputs, device=None):
-        super(PowerLayer, self).__init__(
-            name, 'power', 0, inputs=inputs, device=device)
-        config_assert(len(inputs) == 2, 'PowerLayer must have 2 inputs')
-        input_layer1 = self.get_input_layer(1)
-        self.set_layer_size(input_layer1.size)
-        input_layer0 = self.get_input_layer(0)
-        config_assert(1 == input_layer0.size,
-                      'The left input is the exponent and should be of size 1')
-
-
-@config_layer('slope_intercept')
-class SlopeInterceptLayer(LayerBase):
-    def __init__(self, name, inputs, slope=1.0, intercept=0.0, device=None):
-        super(SlopeInterceptLayer, self).__init__(
-            name, 'slope_intercept', 0, inputs=inputs, device=device)
-        self.config.slope = slope
-        self.config.intercept = intercept
-        config_assert(len(inputs) == 1, 'SlopeInterceptLayer must have 1 input')
-        input_layer0 = self.get_input_layer(0)
-        self.set_layer_size(input_layer0.size)
-
-
-@config_layer('scaling')
-class ScalingLayer(LayerBase):
-    def __init__(self, name, inputs, device=None):
-        super(ScalingLayer, self).__init__(
-            name, 'scaling', 0, inputs=inputs, device=device)
-        config_assert(len(inputs) == 2, 'ScalingLayer must have 2 inputs')
-        input_layer1 = self.get_input_layer(1)
-        self.set_layer_size(input_layer1.size)
-        input_layer0 = self.get_input_layer(0)
-        config_assert(1 == input_layer0.size,
-                      'The left input should be of size 1')
-
-
-@config_layer('conv_shift')
-class ConvShiftLayer(LayerBase):
-    def __init__(self, name, inputs, device=None):
-        super(ConvShiftLayer, self).__init__(
-            name, 'conv_shift', 0, inputs=inputs, device=device)
-        config_assert(len(inputs) == 2, 'ConvShiftLayer must have 2 inputs')
-        input_layer0 = self.get_input_layer(0)
-        self.set_layer_size(input_layer0.size)
-
-
-@config_layer('convex_comb')
-class ConvexCombinationLayer(LayerBase):
-    def __init__(self, name, size, inputs, device=None):
-        super(ConvexCombinationLayer, self).__init__(
-            name, 'convex_comb', size, inputs=inputs, device=device)
-        config_assert(
-            len(self.inputs) == 2, 'ConvexCombinationLayer must have 2 inputs')
-        config_assert(
-            size * self.get_input_layer(0).size == self.get_input_layer(1).size,
-            'Wrong input size for ConvexCombinationLayer')
-        self.set_layer_size(size)
-
-
-@config_layer('interpolation')
-class InterpolationLayer(LayerBase):
-    def __init__(self, name, inputs, device=None):
-        super(InterpolationLayer, self).__init__(
-            name, 'interpolation', 0, inputs=inputs, device=device)
-        config_assert(
-            len(self.inputs) == 3, 'InterpolationLayer must have 3 inputs')
-        input_layer0 = self.get_input_layer(0)
-        input_layer1 = self.get_input_layer(1)
-        input_layer2 = self.get_input_layer(2)
-        self.set_layer_size(input_layer1.size)
-        config_assert(input_layer0.size == 1, 'weight should be of size 1')
-        config_assert(input_layer1.size == input_layer2.size,
-                      'the two vector inputs should be of the same size')
-
-
-@config_layer('bilinear_interp')
-class BilinearInterpLayer(LayerBase):
-    def __init__(self, name, inputs, **xargs):
-        super(BilinearInterpLayer, self).__init__(
-            name, 'bilinear_interp', 0, inputs=inputs, **xargs)
-        input_layer = self.get_input_layer(0)
-        conf = self.config.inputs[0].bilinear_interp_conf
-        parse_bilinear(self.inputs[0].bilinear_interp, input_layer.name, conf)
-        self.set_cnn_layer(name, conf.out_size_y, conf.out_size_x,
-                           conf.image_conf.channels)
-
-
-@config_layer('sum_to_one_norm')
-class SumToOneNormLayer(LayerBase):
-    def __init__(self, name, inputs, device=None):
-        super(SumToOneNormLayer, self).__init__(
-            name, 'sum_to_one_norm', 0, inputs=inputs, device=device)
-        config_assert(
-            len(self.inputs) == 1, 'SumToOneNormLayer must have 1 input')
-        input_layer0 = self.get_input_layer(0)
-        self.set_layer_size(input_layer0.size)
-
-
-@config_layer('row_l2_norm')
-class RowL2NormLayer(LayerBase):
-    def __init__(self, name, inputs, **xargs):
-        super(RowL2NormLayer, self).__init__(
-            name, 'row_l2_norm', 0, inputs=inputs, **xargs)
-        config_assert(len(self.inputs) == 1, 'RowL2NormLayer must have 1 input')
-        input_layer = self.get_input_layer(0)
-        self.set_layer_size(input_layer.size)
-
-
-@config_layer('cos')
-class CosSimLayer(LayerBase):
-    def __init__(self, name, inputs, cos_scale=1, device=None):
-        super(CosSimLayer, self).__init__(
-            name, 'cos', 1, inputs=inputs, device=device)
-        config_assert(
-            len(self.inputs) == 2,
-            'The CosSimLayer expects two and only two inputs.')
-        config_assert(
-            self.get_input_layer(0).size == self.get_input_layer(1).size,
-            'The two inputs of CosSimLayer must have the same dimensionality.')
-        self.config.cos_scale = cos_scale
-
-
-@config_layer('cos_vm')
-class CosSimVecMatLayer(LayerBase):
-    def __init__(self, name, size, inputs, cos_scale=1.0, device=None):
-        super(CosSimVecMatLayer, self).__init__(
-            name, 'cos_vm', size, inputs=inputs, device=device)
-        self.config.cos_scale = cos_scale
-        config_assert(
-            len(self.inputs) == 2, 'The CosSimVecMatLayer must have 2 inputs.')
-        config_assert(
-            size * self.get_input_layer(0).size == self.get_input_layer(1).size,
-            'Wrong input size for CosSimVecMatLayer.')
-
-
-@config_layer('l2_distance')
-class L2DistanceLayer(LayerBase):
-    def __init__(self, name, inputs, device=None):
-        super(L2DistanceLayer, self).__init__(
-            name, 'l2_distance', 1, inputs=inputs, device=device)
-        config_assert(
-            len(self.inputs) == 2, ('The L2DistanceLayer must have '
-                                    'and only have 2 inputs.'))
-        config_assert(
-            self.get_input_layer(0).size == self.get_input_layer(1).size,
-            ('Two inputs of the L2DistanceLayer must have '
-             'the same dimensionality.'))
-
-
-@config_layer('sampling_id')
-class SamplingIdLayer(LayerBase):
-    def __init__(self, name, inputs, device=None):
-        super(SamplingIdLayer, self).__init__(
-            name, 'sampling_id', 0, inputs=inputs, device=device)
-        config_assert(
-            len(self.inputs) == 1, 'SamplingIdLayer must have 1 input')
-        for input_index in xrange(len(self.inputs)):
-            input_layer = self.get_input_layer(input_index)
-            self.set_layer_size(input_layer.size)
-
-
-# AverageLayer: "average" for each sample within a sequence.
-# average_stratrgy: set to one of the following:
-# 'average': plain average.
-# 'sum': sum each sample instead of average (which is divide by sample_num).
-# 'squarerootn': sum each sample, but divide by sqrt(sample_num).
-@config_layer('average')
-class AverageLayer(LayerBase):
-    def __init__(self,
-                 name,
-                 inputs,
-                 average_strategy='average',
-                 trans_type='non-seq',
-                 bias=False,
-                 stride=-1,
-                 **xargs):
-        super(AverageLayer, self).__init__(
-            name, 'average', 0, inputs=inputs, **xargs)
-        self.config.average_strategy = average_strategy
-        if trans_type == 'seq':
-            config_assert(stride == -1, 'subseq does not support stride window')
-        self.config.trans_type = trans_type
-        self.config.seq_pool_stride = stride
-        config_assert(len(inputs) == 1, 'AverageLayer must have 1 input')
-        for input_index in xrange(len(self.inputs)):
-            input_layer = self.get_input_layer(input_index)
-            self.set_layer_size(input_layer.size)
-        self.create_bias_parameter(bias, self.config.size)
-
-
-@config_layer('tensor')
-class TensorLayer(LayerBase):
-    def __init__(self, name, size, inputs, bias=True, **xargs):
-        super(TensorLayer, self).__init__(
-            name, 'tensor', size, inputs=inputs, **xargs)
-        config_assert(len(self.inputs) == 2, 'TensorLayer must have 2 inputs')
-        config_assert(size > 0, 'size must be positive')
-        config_assert(inputs[1].parameter_name == None,
-                      'second parameter should be None.')
-        input_layer0 = self.get_input_layer(0)
-        input_layer1 = self.get_input_layer(1)
-        psize = size * input_layer0.size * input_layer1.size
-        dims = [input_layer0.size, input_layer1.size, size]
-        self.create_input_parameter(0, psize, dims)
-        self.create_bias_parameter(bias, size)
-
-
-@config_layer('mixed')
-class MixedLayer(LayerBase):
-    def __init__(self, name, inputs, size=0, bias=True, **xargs):
-        config_assert(inputs, 'inputs cannot be empty')
-        super(MixedLayer, self).__init__(
-            name, 'mixed', size, inputs=inputs, **xargs)
-        operator_input_index = []
-        for operator in self.operators:
-            operator_conf = operator.operator_conf
-            for i in xrange(1, len(operator.input_layer_names)):
-                input_index = len(self.config.inputs)
-                operator_conf.input_indices.append(input_index)
-                input_config = Input(operator.input_layer_names[i])
-                self.inputs.append(input_config)
-                layer_input = self.config.inputs.add()
-                layer_input.input_layer_name = input_config.input_layer_name
-            for input_index in operator_conf.input_indices:
-                input_layer = self.get_input_layer(input_index)
-                operator_conf.input_sizes.append(input_layer.size)
-                operator_input_index.append(input_index)
-            if self.config.size == 0:
-                size = operator.calc_output_size(operator_conf.input_sizes)
-                if size != 0:
-                    self.set_layer_size(size)
-            else:
-                sz = operator.calc_output_size(operator_conf.input_sizes)
-                if sz != 0:
-                    config_assert(
-                        sz == self.config.size,
-                        "different inputs have different size: %s vs. %s" %
-                        (sz, self.config.size))
-        for input_index in xrange(len(self.inputs)):
-            input_layer = self.get_input_layer(input_index)
-            input = self.inputs[input_index]
-            if input_index not in operator_input_index:
-                config_assert(
-                    isinstance(input, Projection),
-                    "input should be projection or operation")
-            if self.config.size == 0 and isinstance(input, Projection):
-                size = input.calc_output_size(input_layer)
-                if size != 0:
-                    self.set_layer_size(size)
-            elif isinstance(input, Projection):
-                sz = input.calc_output_size(input_layer)
-                if sz != 0:
-                    config_assert(
-                        sz == self.config.size,
-                        "different inputs have different size: %s vs. %s" %
-                        (sz, self.config.size))
-        config_assert(size != 0, "size is not set")
-
-        for input_index in xrange(len(self.inputs)):
-            input = self.inputs[input_index]
-            if isinstance(input, Projection):
-                input_layer = self.get_input_layer(input_index)
-                input.proj_conf.input_size = input_layer.size
-                input.proj_conf.output_size = size
-
-                input_config = self.config.inputs[input_index]
-                input_config.proj_conf.CopyFrom(input.proj_conf)
-                input_config.proj_conf.name = gen_parameter_name(name,
-                                                                 input_index)
-                psize = input.calc_parameter_size(input_layer.size, size)
-                dims = input.calc_parameter_dims(input_layer.size, size)
-                self.create_input_parameter(input_index, psize, dims)
-
-        for operator in self.operators:
-            operator_conf = operator.operator_conf
-            operator_conf.output_size = self.config.size
-            operator.check_dims()
-            record_operator_conf = self.config.operator_confs.add()
-            record_operator_conf.CopyFrom(operator_conf)
-
-        psize = self.config.size
-        if isinstance(self.inputs[0], ConvProjection):
-            self.config.shared_biases = True
-            psize = 0
-            for input in self.inputs:
-                psize += input.calc_bias_size()
-
-        if bias:
-            self.config.bias_size = psize
-            self.create_bias_parameter(bias, psize)
-
-
-# like MixedLayer, but no bias parameter
-@config_func
-def ExpressionLayer(name, inputs, **xargs):
-    MixedLayer(name, inputs, bias=False, **xargs)
-
-
-@config_layer('concat')
-class ConcatenateLayer(LayerBase):
-    layer_type = 'concat'
-
-    def __init__(self, name, inputs, bias=False, **xargs):
-        config_assert(inputs, 'inputs cannot be empty')
-        config_assert(not bias, 'ConcatenateLayer cannot support bias.')
-        use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0)))
-        if self.layer_type == "mkldnn_concat":
-            config_assert(use_mkldnn, "mkldnn_concat only support MKLDNN")
-        self.layer_type = 'mkldnn_concat' if use_mkldnn else 'concat'
-        super(ConcatenateLayer, self).__init__(
-            name, self.layer_type, 0, inputs=inputs, **xargs)
-        size = 0
-        for input_index in xrange(len(self.inputs)):
-            assert self.get_input_layer(0).height == self.get_input_layer(
-                input_index).height
-            assert self.get_input_layer(0).width == self.get_input_layer(
-                input_index).width
-            assert self.get_input_layer(0).depth == self.get_input_layer(
-                input_index).depth
-            input_layer = self.get_input_layer(input_index)
-            input = self.inputs[input_index]
-            if self.config.size == 0:
-                size += input_layer.size
-
-        self.set_layer_height_width(self.get_input_layer(0).height, \
-                                    self.get_input_layer(0).width)
-        self.set_layer_depth(self.get_input_layer(0).depth)
-        self.set_layer_size(size)
-
-
-@config_layer('mkldnn_concat')
-class MKLDNNConcatLayer(ConcatenateLayer):
-    layer_type = 'mkldnn_concat'
-
-
-# like concat layer, but each input layer was processed by a Projection.
-@config_layer('concat2')
-class ConcatenateLayer2(LayerBase):
-    def __init__(self, name, inputs, bias=False, **xargs):
-        config_assert(inputs, 'inputs cannot be empty')
-        super(ConcatenateLayer2, self).__init__(
-            name, 'concat2', 0, inputs=inputs, **xargs)
-
-        if isinstance(self.inputs[0], ConvProjection):
-            for input_index in xrange(len(self.inputs) - 1):
-                input = self.inputs[input_index + 1]
-                config_assert(
-                    isinstance(input, ConvProjection),
-                    "The first input of ConcatenateLayer2 is ConvProjection, "
-                    "the other inputs should also be ConvProjection.")
-
-        size = 0
-        for input_index in xrange(len(self.inputs)):
-            input_layer = self.get_input_layer(input_index)
-            input = self.inputs[input_index]
-            output_size = input.calc_output_size(input_layer)
-            config_assert(output_size != 0, "proj output size is not set")
-            size += output_size
-
-        self.set_layer_size(size)
-
-        for input_index in xrange(len(self.inputs)):
-            input_layer = self.get_input_layer(input_index)
-            input = self.inputs[input_index]
-            input.proj_conf.input_size = input_layer.size
-            input.proj_conf.output_size = input.calc_output_size(input_layer)
-
-            input_config = self.config.inputs[input_index]
-            input_config.proj_conf.CopyFrom(input.proj_conf)
-            input_config.proj_conf.name = gen_parameter_name(name, input_index)
-            psize = input.calc_parameter_size(input.proj_conf.input_size,
-                                              input.proj_conf.output_size)
-            dims = input.calc_parameter_dims(input.proj_conf.input_size,
-                                             input.proj_conf.output_size)
-            self.create_input_parameter(input_index, psize, dims)
-
-        psize = self.config.size
-        if isinstance(self.inputs[0], ConvProjection):
-            self.config.shared_biases = True
-            psize = 0
-            for input in self.inputs:
-                psize += input.calc_bias_size()
-
-        if bias:
-            self.config.bias_size = psize
-            self.create_bias_parameter(bias, psize)
-
-
-@config_layer('recurrent')
-class RecurrentLayer(LayerBase):
-    layer_type = 'recurrent'
-
-    def __init__(self, name, inputs, reversed=False, bias=True, **xargs):
-        use_mkl_packed = bool(
-            int(g_command_config_args.get("use_mkl_packed", 0)))
-        self.layer_type = 'mkl_packed_recurrent' if use_mkl_packed else 'recurrent'
-        super(RecurrentLayer, self).__init__(name, self.layer_type, 0, inputs,
-                                             **xargs)
-        config_assert(len(self.inputs) == 1, 'RecurrentLayer must have 1 input')
-        input_layer = self.get_input_layer(0)
-        size = input_layer.size
-        self.set_layer_size(size)
-        self.config.reversed = reversed
-        dims = [size, size]
-        self.create_input_parameter(0, size * size, dims)
-        self.create_bias_parameter(bias, self.config.size)
-
-
-@config_layer('lstmemory')
-class LstmLayer(LayerBase):
-    def __init__(self,
-                 name,
-                 inputs,
-                 reversed=False,
-                 active_gate_type="sigmoid",
-                 active_state_type="sigmoid",
-                 bias=True,
-                 **xargs):
-        super(LstmLayer, self).__init__(name, 'lstmemory', 0, inputs, **xargs)
-        config_assert(len(self.inputs) == 1, 'LstmLayer must have 1 input')
-        input_layer = self.get_input_layer(0)
-        #check input_layer.size is divided by 4
-        config_assert(input_layer.size % 4 == 0, "size % 4 should be 0!")
-        size = input_layer.size / 4
-        self.set_layer_size(size)
-        self.config.reversed = reversed
-        self.config.active_gate_type = active_gate_type
-        self.config.active_state_type = active_state_type
-        self.create_input_parameter(0, size * size * 4, [size, size, 4])
-        #bias includes 3 kinds of peephole, 4 + 3 = 7
-        self.create_bias_parameter(bias, size * 7)
-
-
-@config_layer('lstm_step')
-class LstmStepLayer(LayerBase):
-    def __init__(self,
-                 name,
-                 size,
-                 inputs,
-                 active_gate_type="sigmoid",
-                 active_state_type="sigmoid",
-                 bias=True,
-                 **xargs):
-        super(LstmStepLayer, self).__init__(name, 'lstm_step', size, inputs,
-                                            **xargs)
-        config_assert(len(inputs) == 2, 'LstmStepLayer must have 2 inputs')
-        input_layer0 = self.get_input_layer(0)
-        input_layer1 = self.get_input_layer(1)
-        config_assert(input_layer0.size == 4 * size,
-                      'input_layer0.size != 4 * layer.size')
-        config_assert(input_layer1.size == size,
-                      'input_layer1.size != layer.size')
-        self.config.active_gate_type = active_gate_type
-        self.config.active_state_type = active_state_type
-        self.create_bias_parameter(bias, size * 3)
-
-
-# get the specific output from the input layer.
-@config_layer('get_output')
-class GetOutputLayer(LayerBase):
-    def __init__(self, name, size, inputs):
-        super(GetOutputLayer, self).__init__(name, 'get_output', size, inputs)
-        config_assert(
-            len(self.inputs) == 1, 'GetOutputLayer must have 1 inputs')
-        inputs = self.inputs[0]
-        config_assert(inputs.input_layer_argument,
-                      'input_layer_argument cannot be empty')
-
-
-@config_layer('mdlstmemory')
-class MDLstmLayer(LayerBase):
-    def __init__(self,
-                 name,
-                 inputs,
-                 directions=True,
-                 active_gate_type="sigmoid",
-                 active_state_type="sigmoid",
-                 bias=True,
-                 **xargs):
-        super(MDLstmLayer, self).__init__(name, 'mdlstmemory', 0, inputs,
-                                          **xargs)
-        config_assert(len(self.inputs) == 1, 'MDLstmLayer must have 1 input')
-        input_layer = self.get_input_layer(0)
-        dim_num = len(directions)
-        #check input_layer.size is divided by (3+dim_num)
-        config_assert(input_layer.size % (3 + dim_num) == 0,
-                      "size % (dim_num) should be 0!")
-        size = input_layer.size / (3 + dim_num)
-        self.set_layer_size(size)
-        self.config.active_gate_type = active_gate_type
-        self.config.active_state_type = active_state_type
-        for i in xrange(len(directions)):
-            self.config.directions.append(int(directions[i]))
-        self.create_input_parameter(0, size * size * (3 + dim_num),
-                                    [size, size, 3 + dim_num])
-        #bias includes 3 kinds of peephole, 3+dim_num+2+dim_num
-        self.create_bias_parameter(bias, size * (5 + 2 * dim_num))
-
-
-@config_layer('gated_recurrent')
-class GatedRecurrentLayer(LayerBase):
-    def __init__(self,
-                 name,
-                 inputs,
-                 reversed=False,
-                 active_gate_type="sigmoid",
-                 bias=True,
-                 **xargs):
-        super(GatedRecurrentLayer, self).__init__(name, 'gated_recurrent', 0,
-                                                  inputs, **xargs)
-        config_assert(
-            len(self.inputs) == 1, 'GatedRecurrentLayer must have 1 input')
-        input_layer = self.get_input_layer(0)
-        #check input_layer.size is divided by 3
-        config_assert(input_layer.size % 3 == 0, "size % 3 should be 0!")
-        size = input_layer.size / 3
-        self.set_layer_size(size)
-        self.config.reversed = reversed
-        self.config.active_gate_type = active_gate_type
-        self.create_input_parameter(0, size * size * 3, [size, size * 3])
-        self.create_bias_parameter(bias, size * 3)
-
-
-@config_layer('gru_step')
-class GruStepLayer(LayerBase):
-    def __init__(self,
-                 name,
-                 size,
-                 inputs,
-                 active_gate_type="sigmoid",
-                 bias=True,
-                 **xargs):
-        super(GruStepLayer, self).__init__(name, 'gru_step', size, inputs,
-                                           **xargs)
-        config_assert(len(self.inputs) == 2, 'GruStepLayer must have 2 input')
-        input_layer0 = self.get_input_layer(0)
-        input_layer1 = self.get_input_layer(1)
-        config_assert(input_layer0.size == 3 * size,
-                      'input_layer0.size != 3 * layer.size')
-        config_assert(input_layer1.size == size,
-                      'input_layer1.size != layer.size')
-        self.config.active_gate_type = active_gate_type
-        self.create_input_parameter(0, size * size * 3, [size, size * 3])
-        self.create_bias_parameter(bias, size * 3)
-
-
-'''
- A layer for calculating the cost of sequential conditional random field model.
- Example: CRFLayer(name="crf_cost", size=label_num,
-                   inputs=["output", "label", "weight"])
-          where "weight" is optional, one weight for each sequence
- @param coeff: weight of the layer
-'''
-
-
-@config_layer('crf')
-class CRFLayer(LayerBase):
-    def __init__(self, name, size, inputs, coeff=1.0, device=None):
-        super(CRFLayer, self).__init__(name, 'crf', size, inputs, device=device)
-        config_assert(2 <= len(self.inputs) <= 3,
-                      'CRFLayer must have 2 or 3 inputs')
-        self.create_input_parameter(0, size * (size + 2), [size + 2, size])
-        self.config.coeff = coeff
-
-
-'''
- A layer for calculating the decoding sequence of sequential conditional
- random field model.
- The decoding sequence is stored in output_.ids
- If a second input is provided, it is treated as the ground-truth label, and
- this layer will also calculate error, output_.value[i] is 1 for incorrect
- decoding or 0 for correct decoding
-'''
-
-
-@config_layer('crf_decoding')
-class CRFDecodingLayer(LayerBase):
-    def __init__(self, name, size, inputs, device=None):
-        super(CRFDecodingLayer, self).__init__(
-            name, 'crf_decoding', size, inputs, device=device)
-        config_assert(
-            len(self.inputs) <= 2,
-            'CRFDecodingLayer cannot have more than 2 inputs')
-        self.create_input_parameter(0, size * (size + 2), [size + 2, size])
-
-
-@config_layer('ctc')
-class CTCLayer(LayerBase):
-    def __init__(self, name, size, inputs, norm_by_times=False, device=None):
-        super(CTCLayer, self).__init__(name, 'ctc', size, inputs, device=device)
-        self.config.norm_by_times = norm_by_times
-        config_assert(len(self.inputs) == 2, 'CTCLayer must have 2 inputs')
-
-
-@config_layer('kmax_seq_score')
-class KmaxSeqScoreLayer(LayerBase):
-    def __init__(self, name, inputs, beam_size, **xargs):
-        super(KmaxSeqScoreLayer, self).__init__(
-            name, 'kmax_seq_score', 0, inputs=inputs, **xargs)
-        config_assert(
-            len(self.inputs) == 1, 'KmaxSeqScoreLayer has only one input.')
-        self.config.beam_size = beam_size
-
-
-@config_layer('warp_ctc')
-class WarpCTCLayer(LayerBase):
-    def __init__(self,
-                 name,
-                 size,
-                 inputs,
-                 blank=0,
-                 norm_by_times=False,
-                 device=None):
-        super(WarpCTCLayer, self).__init__(
-            name, 'warp_ctc', size=size, inputs=inputs, device=device)
-        self.config.blank = blank
-        self.config.norm_by_times = norm_by_times
-        config_assert(len(self.inputs) == 2, 'WarpCTCLayer must have 2 inputs')
-        input_layer = self.get_input_layer(0)
-        config_assert(
-            (input_layer.active_type == '' or
-             input_layer.active_type == 'linear'),
-            "Expecting the active_type of input layer to be linear or null")
-
-
-@config_layer('recurrent_layer_group')
-class RecurrentLayerGroup(LayerBase):
-    def __init__(self, name, device=None):
-        super(RecurrentLayerGroup, self).__init__(
-            name, 'recurrent_layer_group', 0, inputs=[], device=device)
-
-
-@config_layer('switch_order')
-class SwitchOrderLayer(LayerBase):
-    def __init__(self, name, inputs, reshape, **xargs):
-        super(SwitchOrderLayer, self).__init__(
-            name, 'switch_order', 0, inputs=inputs, **xargs)
-        self.config.reshape_conf.height_axis.extend(reshape['height'])
-        self.config.reshape_conf.width_axis.extend(reshape['width'])
-        input_layer = self.get_input_layer(0)
-        if reshape is None:
-            self.set_layer_size(input_layer.size)
-        else:
-            in_h = input_layer.height
-            in_w = input_layer.width
-            out_dims = None
-            if input_layer.has_depth():
-                in_d = input_layer.depth
-                in_c = input_layer.size / in_h / in_w / in_d
-                # batch_size, depth, height, width, channel
-                out_dims = [0, in_d, in_h, in_w, in_c]
-            else:
-                in_c = input_layer.size / in_h / in_w
-                # batch_size, height, width, channel
-                out_dims = [0, in_h, in_w, in_c]
-            # Because (reshape['width'][0] > 0) always be true.
-            # So out_dims[0] won't be used.
-            size = reduce(lambda x, y: x * y, out_dims[reshape['width'][0]:])
-            self.set_layer_size(size)
-
-
-@config_layer('scale_sub_region')
-class ScaleSubRegionLayer(LayerBase):
-    def __init__(self, name, inputs, value, **xargs):
-        super(ScaleSubRegionLayer, self).__init__(
-            name, 'scale_sub_region', 0, inputs=inputs, **xargs)
-        scale_sub_region_conf = self.config.inputs[0].scale_sub_region_conf
-        scale_sub_region_conf.value = value
-
-        # get channel, width and height from input_0 layer
-        input_layer = self.get_input_layer(0)
-        image_conf = scale_sub_region_conf.image_conf
-        image_conf.img_size = input_layer.width
-        image_conf.img_size_y = input_layer.height
-        image_conf.channels = input_layer.size / (input_layer.width *
-                                                  input_layer.height)
-        self.set_cnn_layer(name, image_conf.img_size_y, image_conf.img_size,
-                           image_conf.channels)
-
-
-@config_layer('factorization_machine')
-class FactorizationMachineLayer(LayerBase):
-    def __init__(self, name, inputs, factor_size, **xargs):
-        super(FactorizationMachineLayer, self).__init__(
-            name, 'factorization_machine', size=1, inputs=inputs, **xargs)
-        config_assert(
-            len(self.inputs) == 1,
-            'factorization machine layer must have one and only one input.')
-        self.config.factor_size = factor_size
-        input_layer = self.get_input_layer(0)
-        psize = input_layer.size * factor_size
-        dims = [input_layer.size, factor_size]
-        self.create_input_parameter(0, psize, dims)
-
-
-# Deprecated, use a new layer specific class instead
-@config_func
-def Layer(name, type, **xargs):
-    layers = {}
-    layers.update(g_cost_map)
-    layers.update(g_layer_type_map)
-    layer_func = layers.get(type)
-    config_assert(layer_func, "layer type '%s' not supported." % type)
-    return layer_func(name, **xargs)
-
-
-@config_func
-def ParameterHook(type, **kwargs):
-    if type == 'pruning':
-        hook = ParameterUpdaterHookConfig()
-        hook.type = type
-        sparsity_ratio = kwargs.get('sparsity_ratio', None)
-        if sparsity_ratio is not None:
-            hook.sparsity_ratio = sparsity_ratio
-        return hook
-    elif type == 'dpruning':
-        hook = ParameterUpdaterHookConfig()
-        hook.type = type
-        return hook
-    else:
-        return None
-
-
-@config_func
-def Parameter(name,
-              size,
-              device,
-              dims,
-              learning_rate=None,
-              momentum=None,
-              decay_rate=None,
-              decay_rate_l1=None,
-              initial_mean=None,
-              initial_std=None,
-              initial_strategy=None,
-              initial_smart=None,
-              num_batches_regularization=None,
-              sparse_remote_update=None,
-              sparse_update=None,
-              gradient_clipping_threshold=None,
-              sparse=None,
-              format=None,
-              need_compact=None,
-              is_static=None,
-              is_shared=None,
-              update_hooks=None,
-              initializer=None):
-
-    config_assert(name not in g_parameter_map,
-                  'Duplicated parameter name: ' + name)
-
-    para = g_config.model_config.parameters.add()
-    para.name = name
-    para.size = size
-    if device is not None:
-        para.device = int(device)
-    para.dims.extend(dims)
-
-    if learning_rate is not None:
-        para.learning_rate = float(learning_rate)
-
-    momentum = default(momentum, g_default_momentum)
-    if momentum is not None:
-        para.momentum = float(momentum)
-
-    config_assert(not momentum or not decay_rate_l1,
-                  "momentum and decay_rate_l1 cannot both be non-zero")
-
-    decay_rate = default(decay_rate, g_default_decay_rate)
-    if decay_rate is not None:
-        para.decay_rate = decay_rate
-
-    if decay_rate_l1 is not None:
-        para.decay_rate_l1 = decay_rate_l1
-    para.initial_std = default(initial_std, g_default_initial_std)
-    para.initial_mean = default(initial_mean, g_default_initial_mean)
-
-    num_batches_regularization = default(num_batches_regularization,
-                                         g_default_num_batches_regularization)
-    if num_batches_regularization is not None:
-        para.num_batches_regularization = int(num_batches_regularization)
-
-    if sparse_remote_update is not None:
-        para.sparse_remote_update = sparse_remote_update
-        if sparse_remote_update:
-            g_config.opt_config.use_sparse_remote_updater = True
-    if sparse_update is not None:
-        para.sparse_update = sparse_update
-    gradient_clipping_threshold = default(gradient_clipping_threshold,
-                                          g_default_gradient_clipping_threshold)
-    if gradient_clipping_threshold is not None:
-        para.gradient_clipping_threshold = gradient_clipping_threshold
-    para.initial_strategy = default(initial_strategy,
-                                    g_default_initial_strategy)
-    para.initial_smart = default(initial_smart, g_default_initial_smart)
-    if para.initial_smart:
-        para.initial_mean = 0.
-        if len(para.dims) != 0:
-            para.initial_std = 1. / math.sqrt(para.dims[0])
-        else:
-            print(
-                "Use initial_smart, but dims not set. Initial_smart may not be used in this layer"
-            )
-            traceback.print_exc()
-            para.initial_std = 1. / math.sqrt(para.size)
-    if g_default_compact_func is not None:
-        sparse, format, need_compact = g_default_compact_func(para.name)
-
-    if sparse is not None:
-        para.is_sparse = sparse
-    if format is not None:
-        para.format = format
-    if need_compact is not None:
-        para.need_compact = need_compact
-    if is_static is not None:
-        para.is_static = is_static
-    config_assert(not para.sparse_remote_update or not para.is_static,
-                  "sparse_remote_update and is_static cannot both be true")
-    if is_shared is not None:
-        para.is_shared = is_shared
-
-    update_hooks = default(update_hooks, g_default_update_hooks)
-
-    if update_hooks is not None:
-        if hasattr(update_hooks, '__call__'):
-            update_hooks = update_hooks()
-
-        if isinstance(update_hooks, list):
-            for hook in update_hooks:
-                para.update_hooks.extend([hook])
-        else:
-            para.update_hooks.extend([update_hooks])
-
-    g_parameter_map[name] = para
-    if initializer is not None:
-        config_assert(
-            callable(initializer),
-            "parameter initializer should be a callable object")
-        g_parameter_initializer_map[name] = initializer
-
-
-@config_func
-def default_initial_std(val):
-    global g_default_initial_std
-    g_default_initial_std = val
-
-
-@config_func
-def default_initial_mean(val):
-    global g_default_initial_mean
-    g_default_initial_mean = val
-
-
-@config_func
-def default_initial_strategy(val):
-    global g_default_initial_strategy
-    g_default_initial_strategy = val
-
-
-@config_func
-def default_initial_smart(val):
-    global g_default_initial_smart
-    g_default_initial_smart = val
-
-
-@config_func
-def default_momentum(val):
-    global g_default_momentum
-    g_default_momentum = val
-
-
-@config_func
-def default_decay_rate(val):
-    global g_default_decay_rate
-    g_default_decay_rate = val
-
-
-@config_func
-def default_num_batches_regularization(val):
-    global g_default_num_batches_regularization
-    g_default_num_batches_regularization = val
-
-
-@config_func
-def default_gradient_clipping_threshold(val):
-    global g_default_gradient_clipping_threshold
-    g_default_gradient_clipping_threshold = val
-
-
-@config_func
-def default_device(val):
-    global g_default_device
-    g_default_device = val
-
-
-@config_func
-def default_update_hooks(val):
-    global g_default_update_hooks
-    g_default_update_hooks = val
-
-
-@config_func
-def default_compact_func(val):
-    global g_default_compact_func
-    g_default_compact_func = val
-
-
-def make_importer(config_dir, config_args):
-    def Import(config_file, local_args={}):
-        if not config_file.startswith('/'):
-            config_file = config_dir + '/' + config_file
-            g_config.config_files.append(config_file)
-        execfile(config_file,
-                 make_config_environment(config_file, config_args), local_args)
-
-    return Import
-
-
-DEFAULT_SETTING = dict(
-    batch_size=None,
-    mini_batch_size=None,
-    algorithm='async_sgd',
-    async_lagged_grad_discard_ratio=1.5,
-    learning_method='momentum',
-    gradient_clipping_threshold=None,
-    num_batches_per_send_parameter=None,
-    num_batches_per_get_parameter=None,
-    center_parameter_update_method=None,
-    learning_rate=1.,
-    learning_rate_decay_a=0.,
-    learning_rate_decay_b=0.,
-    learning_rate_schedule='poly',
-    learning_rate_args='',
-    l1weight=0.1,
-    l2weight=0.,
-    l2weight_zero_iter=0,
-    c1=0.0001,
-    backoff=0.5,
-    owlqn_steps=10,
-    max_backoff=5,
-    average_window=0,
-    do_average_in_cpu=False,
-    max_average_window=None,
-    ada_epsilon=1e-6,
-    ada_rou=0.95,
-    delta_add_rate=1.0,
-    shrink_parameter_value=0,
-    adam_beta1=0.9,
-    adam_beta2=0.999,
-    adam_epsilon=1e-8, )
-
-settings = copy.deepcopy(DEFAULT_SETTING)
-
-settings_deprecated = dict(usage_ratio=1., )
-
-trainer_settings = dict(
-    save_dir="./output/model",
-    init_model_path=None,
-    start_pass=0, )
-
-
-@config_func
-def Settings(**args):
-    for k, v in args.iteritems():
-        if k == "usage_ratio":
-            logger.warning(
-                "Deprecated: define usage_ratio in DataConfig instead")
-            if g_config.HasField("data_config"):
-                g_config.data_config.__setattr__(k, v)
-            settings_deprecated[k] = v
-            continue
-        elif k in settings:
-            settings[k] = v
-        elif k in trainer_settings:
-            trainer_settings[k] = v
-        else:
-            logger.fatal('Unkown setting: %s' % k)
-
-
-@config_func
-def cluster_config(**args):
-    pass
-
-
-@config_func
-def EnableSubmodelSuffix(flag=True):
-    """
-    If enabled, the layer and evaluator names in submodel will be automatically
-    appended with @submodel_name
-    """
-    global g_add_submodel_suffix
-    g_add_submodel_suffix = flag
-
-
-def make_config_environment(config_file, config_args):
-    def make_setter(k):
-        def setter(v):
-            logger.fatal("Obsolete: use Settings(%s=%s, ...) instead" % (k, v))
-
-        return setter
-
-    funcs = {}
-    funcs.update(g_config_funcs)
-
-    for k in settings.iterkeys():
-        funcs[k] = make_setter(k)
-    for k in settings_deprecated.iterkeys():
-        funcs[k] = make_setter(k)
-    config_dir = os.path.dirname(config_file)
-    if not config_dir:
-        config_dir = '.'
-
-    funcs.update(
-        Import=make_importer(config_dir, config_args),
-        get_config_arg=make_get_config_arg(config_args), )
-
-    funcs.update(g_extended_config_funcs)
-
-    return funcs
-
-
-def make_get_config_arg(config_args):
-    def get_config_arg(name, type, default=None):
-        if type == bool:
-            s = config_args.get(name)
-            if not s:
-                return default
-            if s == 'True' or s == '1' or s == 'true':
-                return True
-            if s == 'False' or s == '0' or s == 'false':
-                return False
-            raise ValueError('Value of config_arg %s is not boolean' % name)
-        else:
-            return type(config_args.get(name, default))
-
-    return get_config_arg
-
-
-def importlib(name):
-    __import__(name)
-    return sys.modules[name]
-
-
-def find_caller():
-    stack = traceback.extract_stack()
-    for s in stack[-4::-1]:
-        if not s[0].endswith('config_parser.py'):
-            return s[0], s[1], s[2]
-    return "(unknown file)", 0, "(unknown function)"
-
-
-def my_fatal(s):
-    logger.critical(s)
-    raise Exception()
-
-
-_parse_config_hooks = set()
-
-
-def register_parse_config_hook(f):
-    """
-    Register a hook function for parse_config. parse_config will invoke the hook
-    at the beginning of parse. This make it possible to reset global state for
-    for constructing the model.
-    """
-    _parse_config_hooks.add(f)
-
-
-def update_g_config():
-    '''
-    Update g_config after execute config_file or config_functions.
-    '''
-    for k, v in settings.iteritems():
-        if v is None:
-            continue
-        g_config.opt_config.__setattr__(k, v)
-
-    for k, v in trainer_settings.iteritems():
-        if v is None:
-            continue
-        g_config.__setattr__(k, v)
-
-    for name in g_config.model_config.input_layer_names:
-        assert name in g_layer_map, \
-            'input name "%s" does not correspond to a layer name' % name
-        assert (g_layer_map[name].type == "data" or g_layer_map[name].type == "data_trim"), \
-            'The type of input layer "%s" is not "data"' % name
-    for name in g_config.model_config.output_layer_names:
-        assert name in g_layer_map, \
-            'input name "%s" does not correspond to a layer name' % name
-    return g_config
-
-
-def begin_parse():
-    init_config_environment()
-    for hook in _parse_config_hooks:
-        hook()
-
-    logger.findCaller = find_caller
-    logger.fatal = my_fatal
-
-    g_config.model_config.type = "nn"
-
-    global g_current_submodel, g_root_submodel
-    g_root_submodel = g_config.model_config.sub_models.add()
-    g_root_submodel.name = 'root'
-    g_root_submodel.is_recurrent_layer_group = False
-    g_current_submodel = g_root_submodel
-
-
-def parse_config(trainer_config, config_arg_str):
-    '''
-    @param config_arg_str: a string of the form var1=val1,var2=val2. It will be
-    passed to config script as a dictionary CONFIG_ARGS
-    '''
-
-    begin_parse()
-    config_args = {}
-
-    if config_arg_str:
-        config_args = dict([f.split('=') for f in config_arg_str.split(',')])
-
-    global g_command_config_args
-    g_command_config_args.update(config_args)
-
-    extension_module_name = config_args.get('extension_module_name')
-    if extension_module_name:
-        global g_extended_config_funcs
-        extension_module = importlib(extension_module_name)
-        g_extended_config_funcs = extension_module.get_config_funcs(g_config)
-
-    if hasattr(trainer_config, '__call__'):
-        trainer_config.func_globals.update(
-            make_config_environment("", config_args))
-        trainer_config()
-    else:
-        execfile(trainer_config,
-                 make_config_environment(trainer_config, config_args))
-
-    return update_g_config()
-
-
-def parse_config_and_serialize(trainer_config, config_arg_str):
-    try:
-        config = parse_config(trainer_config, config_arg_str)
-        #logger.info(config)
-        return config.SerializeToString()
-    except:
-        traceback.print_exc()
-        raise
-
-
-if __name__ == '__main__':
-    try:
-        config = parse_config(sys.argv[1], '')
-        config.SerializeToString()
-        __real_print__(str(config))
-    except:
-        traceback.print_exc()
-        raise
diff --git a/python/paddle/trainer/config_parser_extension.py b/python/paddle/trainer/config_parser_extension.py
deleted file mode 100644
index b9e0f3eb13..0000000000
--- a/python/paddle/trainer/config_parser_extension.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.proto.DataConfig_pb2 import DataConfig
-
-g_config = None
-
-
-def SimpleData(files=None,
-               feat_dim=None,
-               context_len=None,
-               buffer_capacity=None):
-
-    data_config = DataConfig()
-    data_config.type = 'simple'
-    data_config.files = files
-    data_config.feat_dim = feat_dim
-    if context_len is not None:
-        data_config.context_len = context_len
-    if buffer_capacity:
-        data_config.buffer_capacity = buffer_capacity
-    return data_config
-
-
-def get_config_funcs(trainer_config):
-    global g_config
-    g_config = trainer_config
-    return dict(SimpleData=SimpleData)
diff --git a/python/paddle/trainer/recurrent_units.py b/python/paddle/trainer/recurrent_units.py
deleted file mode 100644
index ef92107a10..0000000000
--- a/python/paddle/trainer/recurrent_units.py
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# recurrent_units.py
-# Version 2.0
-#
-# Some recurrent units can be used in recurrent layer group,
-#   to use these units, import this module in your config_file:
-#     import trainer.recurrent_units
-#
-# The modules in this file are DEPRECATED.
-# If you would like to use lstm/gru
-# please use the functions defined in paddle.trainer_config_helpers.
-
-from paddle.trainer.config_parser import *
-
-
-# long short term memory, can be used in recurrent machine
-# *inputs* must be a list of Projections, for example:
-#   inputs = [FullMatrixProjection("input_layer_name")],
-# *para_prefix* defines parameter names, if the *para_prefix* of
-#   two LstmRecurrentUnit is same, they share same parameters
-# *out_memory* can be defined outside if it's used outside
-def LstmRecurrentUnit(name,
-                      size,
-                      active_type,
-                      state_active_type,
-                      gate_active_type,
-                      inputs,
-                      para_prefix=None,
-                      error_clipping_threshold=0,
-                      out_memory=None):
-
-    if para_prefix is None:
-        para_prefix = name
-    if out_memory is None:
-        out_memory = Memory(name=name, size=size)
-
-    state_memory = Memory(name=name + "_" + "state", size=size)
-
-    Layer(
-        name=name + "_" + "input_recurrent",
-        type="mixed",
-        size=size * 4,  #(input_s, input_gate, forget_gate, output_gate)
-        error_clipping_threshold=error_clipping_threshold,
-        bias=Bias(
-            initial_std=0, parameter_name=para_prefix + "_input_recurrent.b"),
-        inputs=inputs + [
-            FullMatrixProjection(
-                out_memory, parameter_name=para_prefix + "_input_recurrent.w"),
-        ], )
-    LstmStepLayer(
-        name=name,
-        size=size,
-        bias=Bias(parameter_name=para_prefix + "_check.b"),
-        inputs=[name + "_" + "input_recurrent", state_memory],
-        active_type=active_type,
-        active_gate_type=gate_active_type,
-        active_state_type=state_active_type, )
-    GetOutputLayer(
-        name=name + "_" + "state",
-        size=size,
-        inputs=Input(
-            name, input_layer_argument="state"), )
-
-
-def LstmRecurrentUnitNaive(name,
-                           size,
-                           active_type,
-                           state_active_type,
-                           gate_active_type,
-                           inputs,
-                           para_prefix=None,
-                           error_clipping_threshold=0,
-                           out_memory=None):
-
-    if para_prefix is None:
-        para_prefix = name
-    if out_memory is None:
-        out_memory = Memory(name=name, size=size)
-
-    state_memory = Memory(name=name + "_" + "state", size=size)
-
-    Layer(
-        name=name + "_" + "input_recurrent",
-        type="mixed",
-        size=size * 4,  #(input_s, input_gate, forget_gate, output_gate)
-        error_clipping_threshold=error_clipping_threshold,
-        bias=Bias(
-            initial_std=0, parameter_name=para_prefix + "_input_recurrent.b"),
-        inputs=inputs + [
-            FullMatrixProjection(
-                out_memory, parameter_name=para_prefix + "_input_recurrent.w"),
-        ], )
-    ExpressionLayer(
-        name=name + "_" + "input_s",
-        size=size,
-        active_type=active_type,
-        inputs=[
-            IdentityOffsetProjection(
-                name + "_" + "input_recurrent", offset=0)
-        ], )
-    ExpressionLayer(
-        name=name + "_" + "input_gate",
-        active_type=gate_active_type,
-        inputs=[
-            IdentityOffsetProjection(
-                name + "_" + "input_recurrent", offset=size), DotMulProjection(
-                    state_memory, parameter_name=para_prefix + "_input_check.w")
-        ], )
-    ExpressionLayer(
-        name=name + "_" + "forget_gate",
-        active_type=gate_active_type,
-        inputs=[
-            IdentityOffsetProjection(
-                name + "_" + "input_recurrent", offset=size * 2),
-            DotMulProjection(
-                state_memory, parameter_name=para_prefix + "_forget_check.w")
-        ], )
-    ExpressionLayer(
-        name=name + "_" + "state",
-        inputs=[
-            DotMulOperator([name + "_" + "input_s", name + "_" + "input_gate"]),
-            DotMulOperator([state_memory, name + "_" + "forget_gate"]),
-        ], )
-    ExpressionLayer(
-        name=name + "_" + "output_gate",
-        active_type=gate_active_type,
-        inputs=[
-            IdentityOffsetProjection(
-                name + "_" + "input_recurrent", offset=size * 3),
-            DotMulProjection(
-                name + "_" + "state",
-                parameter_name=para_prefix + "_output_check.w")
-        ], )
-    ExpressionLayer(
-        name=name + "_" + "state_atv",
-        active_type=state_active_type,
-        inputs=IdentityProjection(name + "_" + "state"), )
-    ExpressionLayer(
-        name=name,
-        inputs=DotMulOperator(
-            [name + "_" + "state_atv", name + "_" + "output_gate"]), )
-
-
-# like LstmRecurrentUnit, but it's a layer group.
-# it is equivalent to LstmLayer
-def LstmRecurrentLayerGroup(name,
-                            size,
-                            active_type,
-                            state_active_type,
-                            gate_active_type,
-                            inputs,
-                            para_prefix=None,
-                            error_clipping_threshold=0,
-                            seq_reversed=False):
-
-    input_layer_name = name + "_" + "transform_input"
-    Layer(
-        name=input_layer_name,
-        type="mixed",
-        size=size * 4,
-        active_type="",
-        bias=False,
-        inputs=inputs, )
-
-    RecurrentLayerGroupBegin(
-        name + "_layer_group",
-        in_links=[input_layer_name],
-        out_links=[name],
-        seq_reversed=seq_reversed)
-
-    LstmRecurrentUnit(
-        name=name,
-        size=size,
-        active_type=active_type,
-        state_active_type=state_active_type,
-        gate_active_type=gate_active_type,
-        inputs=[IdentityProjection(input_layer_name)],
-        para_prefix=para_prefix,
-        error_clipping_threshold=error_clipping_threshold, )
-
-    RecurrentLayerGroupEnd(name + "_layer_group")
-
-
-# gated recurrent unit, can be used in recurrent machine
-# *inputs* should be a list of Projections, for example:
-#   inputs = [FullMatrixProjection("input_layer_name")],
-# *para_prefix* defines parameter names, if the *para_prefix* of
-#   two GatedRecurrentUnit is same, they share same parameters
-# *out_memory* can be defined outside if it's used outside
-
-
-def GatedRecurrentUnit(name,
-                       size,
-                       active_type,
-                       gate_active_type,
-                       inputs,
-                       para_prefix=None,
-                       error_clipping_threshold=0,
-                       out_memory=None):
-    if type_of(inputs) == str:  #only used by GatedRecurrentLayerGroup
-        input_layer_name = inputs
-    else:
-        input_layer_name = name + "_" + "transform_input"
-        Layer(
-            name=input_layer_name,
-            type="mixed",
-            size=size * 3,
-            active_type="",
-            bias=False,
-            inputs=inputs, )
-
-    if para_prefix is None:
-        para_prefix = name
-    if out_memory is None:
-        out_memory = Memory(name=name, size=size)
-
-    GruStepLayer(
-        name=name,
-        size=size,
-        bias=Bias(parameter_name=para_prefix + "_gate.b"),
-        inputs=[
-            input_layer_name, Input(
-                out_memory, parameter_name=para_prefix + "_gate.w")
-        ],
-        active_type=active_type,
-        active_gate_type=gate_active_type, )
-
-
-def GatedRecurrentUnitNaive(name,
-                            size,
-                            active_type,
-                            gate_active_type,
-                            inputs,
-                            para_prefix=None,
-                            error_clipping_threshold=0,
-                            out_memory=None):
-
-    if type_of(inputs) == str:  #only used by GatedRecurrentLayerGroup
-        input_layer_name = inputs
-    else:
-        input_layer_name = name + "_" + "transform_input"
-        Layer(
-            name=input_layer_name,
-            type="mixed",
-            size=size * 3,
-            active_type="",
-            bias=False,
-            inputs=inputs, )
-
-    if para_prefix is None:
-        para_prefix = name
-    if out_memory is None:
-        out_memory = Memory(name=name, size=size)
-
-    Layer(
-        name=name + "_" + "update_gate",
-        type="mixed",
-        size=size,
-        active_type=gate_active_type,
-        error_clipping_threshold=error_clipping_threshold,
-        bias=Bias(
-            initial_std=0, parameter_name=para_prefix + "_update_gate.b"),
-        inputs=[
-            IdentityOffsetProjection(
-                input_layer_name, offset=0), FullMatrixProjection(
-                    out_memory, parameter_name=para_prefix + "_update_gate.w")
-        ], )
-    Layer(
-        name=name + "_" + "reset_gate",
-        type="mixed",
-        size=size,
-        active_type=gate_active_type,
-        error_clipping_threshold=error_clipping_threshold,
-        bias=Bias(
-            initial_std=0, parameter_name=para_prefix + "_reset_gate.b"),
-        inputs=[
-            IdentityOffsetProjection(
-                input_layer_name, offset=size), FullMatrixProjection(
-                    out_memory, parameter_name=para_prefix + "_reset_gate.w")
-        ], )
-    ExpressionLayer(
-        name=name + "_" + "reset_output",
-        inputs=DotMulOperator([out_memory, name + "_" + "reset_gate"]), )
-    Layer(
-        name=name + "_" + "output_candidate",
-        type="mixed",
-        size=size,
-        active_type=active_type,
-        error_clipping_threshold=error_clipping_threshold,
-        bias=Bias(
-            initial_std=0, parameter_name=para_prefix + "_output_candidate.b"),
-        inputs=[
-            IdentityOffsetProjection(
-                input_layer_name, offset=size * 2), FullMatrixProjection(
-                    name + "_" + "reset_output",
-                    parameter_name=para_prefix + "_output_candidate.w")
-        ], )
-    ExpressionLayer(  #element-wise interpolation
-        name=name,
-        inputs=[
-            IdentityProjection(out_memory),
-            DotMulOperator(
-                [out_memory, name + "_" + "update_gate"], scale=-1.0),
-            DotMulOperator(
-                [name + "_" + "output_candidate", name + "_" + "update_gate"]),
-        ], )
-
-
-# like GatedRecurrentUnit, but it's a layer group.
-# it is equivalent to GatedRecurrentLayer.
-def GatedRecurrentLayerGroup(name,
-                             size,
-                             active_type,
-                             gate_active_type,
-                             inputs,
-                             para_prefix=None,
-                             error_clipping_threshold=0,
-                             seq_reversed=False):
-
-    input_layer_name = name + "_" + "transform_input"
-    Layer(
-        name=input_layer_name,
-        type="mixed",
-        size=size * 3,
-        active_type="",
-        bias=False,
-        inputs=inputs, )
-
-    RecurrentLayerGroupBegin(
-        name + "_layer_group",
-        in_links=[input_layer_name],
-        out_links=[name],
-        seq_reversed=seq_reversed)
-
-    GatedRecurrentUnit(
-        name=name,
-        size=size,
-        active_type=active_type,
-        gate_active_type=gate_active_type,
-        inputs=input_layer_name,  #transform outside
-        para_prefix=para_prefix,
-        error_clipping_threshold=error_clipping_threshold, )
-
-    RecurrentLayerGroupEnd(name + "_layer_group")
diff --git a/python/paddle/trainer_config_helpers/__init__.py b/python/paddle/trainer_config_helpers/__init__.py
deleted file mode 100644
index 13155ebddb..0000000000
--- a/python/paddle/trainer_config_helpers/__init__.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from activations import *
-from data_sources import *
-from poolings import *
-from evaluators import *
-from layers import *
-from networks import *
-from optimizers import *
-from attrs import *
-from config_parser_utils import *
-# This will enable operator overload for LayerOutput
-import layer_math
diff --git a/python/paddle/trainer_config_helpers/activations.py b/python/paddle/trainer_config_helpers/activations.py
deleted file mode 100644
index 3683968262..0000000000
--- a/python/paddle/trainer_config_helpers/activations.py
+++ /dev/null
@@ -1,263 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-__all__ = [
-    "TanhActivation", "SigmoidActivation", "SoftmaxActivation",
-    "IdentityActivation", "LinearActivation", 'SequenceSoftmaxActivation',
-    'ExpActivation', "ReluActivation", "BReluActivation", "SoftReluActivation",
-    "STanhActivation", "AbsActivation", "SquareActivation", "BaseActivation",
-    "LogActivation", "SqrtActivation", "ReciprocalActivation",
-    "SoftSignActivation"
-]
-
-
-class BaseActivation(object):
-    """
-    A mark for activation class.
-    Each activation inherit BaseActivation, which has two parameters.
-
-    :param name: activation name in paddle config.
-    :type name: basestring
-    :param support_hppl: True if supported by hppl. HPPL is a library used by paddle
-                         internally. Currently, lstm layer can only use activations
-                         supported by hppl.
-    :type support_hppl: bool
-    """
-
-    def __init__(self, name, support_hppl):
-        self.name = name
-        self.support_hppl = support_hppl
-
-    def __repr__(self):
-        return self.name
-
-
-class TanhActivation(BaseActivation):
-    """
-    Tanh activation.
-
-    .. math::
-
-       f(z)=tanh(z)=\\frac{e^z-e^{-z}}{e^z+e^{-z}}
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'tanh', True)
-
-
-class SigmoidActivation(BaseActivation):
-    """
-    Sigmoid activation.
-
-    .. math::
-
-       f(z) = \\frac{1}{1+exp(-z)}
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'sigmoid', True)
-
-
-class SoftmaxActivation(BaseActivation):
-    """
-    Softmax activation for simple input
-
-
-
-    .. math::
-
-       P(y=j|x) = \\frac{e^{x_j}} {\\sum^K_{k=1} e^{x_k} }
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'softmax', False)
-
-
-class SequenceSoftmaxActivation(BaseActivation):
-    """
-    Softmax activation for one sequence. The dimension of input feature must be
-    1 and a sequence.
-
-    ..  code:: python
-
-        result = softmax(for each_feature_vector[0] in input_feature)
-        for i, each_time_step_output in enumerate(output):
-            each_time_step_output = result[i]
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'sequence_softmax', False)
-
-
-class IdentityActivation(BaseActivation):
-    """
-    Identity Activation.
-
-    Just do nothing for output both forward/backward.
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, '', False)
-
-
-LinearActivation = IdentityActivation
-
-
-class ReluActivation(BaseActivation):
-    """
-    Relu activation.
-
-    forward. :math:`y = max(0, z)`
-
-    derivative:
-
-    .. math::
-
-       1  &\\quad if z > 0 \\\\
-       0  &\\quad\\mathrm{otherwize}
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'relu', True)
-
-
-class BReluActivation(BaseActivation):
-    """
-    BRelu Activation.
-
-    forward.  :math:`y = min(24, max(0, z))`
-
-    derivative:
-
-    .. math::
-
-       1  &\\quad if 0 < z < 24 \\\\
-       0  &\\quad \\mathrm{otherwise}
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'brelu', False)
-
-
-class SoftReluActivation(BaseActivation):
-    """
-    SoftRelu Activation.
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'softrelu', False)
-
-
-class STanhActivation(BaseActivation):
-    """
-    Scaled Tanh Activation.
-
-    .. math::
-
-       f(z) = 1.7159 * tanh(2/3*z)
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'stanh', False)
-
-
-class AbsActivation(BaseActivation):
-    """
-    Abs Activation.
-
-    Forward:    :math:`f(z) = abs(z)`
-
-    Derivative:
-
-    .. math::
-
-       1 &\\quad if \\quad z > 0 \\\\
-       -1 &\\quad if \\quad z < 0 \\\\
-       0 &\\quad if \\quad z = 0
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'abs', False)
-
-
-class SquareActivation(BaseActivation):
-    """
-    Square Activation.
-
-    .. math::
-       f(z) = z^2.
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'square', False)
-
-
-class ExpActivation(BaseActivation):
-    """
-    Exponential Activation.
-
-    .. math::
-       f(z) = e^z.
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'exponential', False)
-
-
-class LogActivation(BaseActivation):
-    """
-    Logarithm Activation.
-
-    .. math::
-       f(z) = log(z)
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'log', False)
-
-
-class SqrtActivation(BaseActivation):
-    """
-    Square Root Activation.
-
-    .. math::
-       f(z) = sqrt(z)
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'sqrt', False)
-
-
-class ReciprocalActivation(BaseActivation):
-    """
-    Reciprocal Activation.
-
-    .. math::
-       f(z)=\\frac{1}{z}
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'reciprocal', False)
-
-
-class SoftSignActivation(BaseActivation):
-    """
-    SoftSign Activation.
-
-    .. math::
-       f(z)=\\frac{z}{1 + |z|}
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'softsign', False)
diff --git a/python/paddle/trainer_config_helpers/attrs.py b/python/paddle/trainer_config_helpers/attrs.py
deleted file mode 100644
index 4e3beaf639..0000000000
--- a/python/paddle/trainer_config_helpers/attrs.py
+++ /dev/null
@@ -1,291 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer.config_parser import *
-__all__ = [
-    'HookAttr', 'ParamAttr', 'ExtraAttr', 'ParameterAttribute',
-    'ExtraLayerAttribute'
-]
-
-
-def convert_and_compare(x, Type):
-    """
-    Convert x to be the same type as Type and then convert back to
-    check whether there is a loss of information
-    :param x: object to be checked
-    :param Type: target type to check x over
-
-    """
-    return type(x)(Type(x)) == x
-
-
-def is_compatible_with(x, Type):
-    """
-    Check if x has a type compatible with Type
-    :param x: object to be checked
-    :param Type: target type to check x over
-
-    """
-    if type(x) == Type:
-        return True
-    try:
-        if float == Type or int == Type:
-            # avoid those types that can be converted to float/int but not very
-            # meaningful and  could potentially lead to error
-            # i.e., str and bool typed value should not be used for initializing float/int variable
-            if not isinstance(x, str) and not isinstance(x, bool):
-                return convert_and_compare(x, Type)
-        elif bool == Type:
-            # should not use string type to initialize bool variable
-            if not isinstance(x, str):
-                return convert_and_compare(x, Type)
-        else:
-            return False
-    except:
-        return False
-
-
-class HookAttribute(object):
-    """
-    Hook Attribute object. As a member of ParameterAttribute class, the hook is an auxiliary operation that occurs
-    during training process of a layer with parameters, such as img_conv layer, fc layer.
-
-    :param  type: Hook type, currently supported types:
-                        'pruning' :  user specify a sparsity_ratio before training started, and the
-                            network will prune the parameters based on the sparsity_ratio.
-                            eg: The definition of Hook object can be hk = HookAttribute('pruning', 0.6)
-                            The specific usage can be paddle.layer.img_conv(input=img, filter_size=3,
-                                                                       num_channels=3, num_filters=64,
-                                                                       param_attr=ParameterAttribute(update_hooks=hk) )
-                            The pruning details can be found https://arxiv.org/pdf/1506.02626.pdf
-    :type type: string
-
-    :param sparsity_ratio: Must be specified if hook type is 'pruning',
-                        it represents the ratio of the zero elements to be set by the Parameter.
-    :type sparsity_ratio: float or None
-
-    """
-
-    def __init__(self, type, sparsity_ratio=None):
-        self.type = type
-        self.sparsity_ratio = sparsity_ratio
-        if self.sparsity_ratio is not None:
-            assert is_compatible_with(
-                self.sparsity_ratio,
-                float), 'sparisity_ratio must be float type'
-            assert self.sparsity_ratio <= 1 and self.sparsity_ratio >= 0, 'sparsity_ratio must be a float between [0, 1] '
-
-    def __call__(self):
-        return ParameterHook(self.type, sparsity_ratio=self.sparsity_ratio)
-
-
-class ParameterAttribute(object):
-    """
-    Parameter Attributes object. To fine-tuning network training process, user
-    can set attribute to control training details, such as l1,l2 rate / learning
-    rate / how to init param.
-
-    NOTE: IT IS A HIGH LEVEL USER INTERFACE.
-
-    :param is_static: True if this parameter will be fixed while training.
-    :type is_static: bool
-
-    :param initial_std: Gauss Random initialization standard deviation.
-                        None if not using Gauss Random initialize parameter.
-    :type initial_std: float or None
-    :param initial_mean:  Gauss Random initialization mean.
-                         None if not using Gauss Random initialize parameter.
-    :type initial_mean: float or None
-    :param initial_max: Uniform initialization max value.
-    :type initial_max: float or None
-    :param initial_min: Uniform initialization min value.
-    :type initial_min: float or None
-    :param l1_rate: the l1 regularization factor
-    :type l1_rate: float or None
-    :param l2_rate: the l2 regularization factor
-    :type l2_rate: float or None
-    :param learning_rate: The parameter learning rate. None means 1.
-                          The learning rate when optimize is LEARNING_RATE =
-                          GLOBAL_LEARNING_RATE * PARAMETER_LEARNING_RATE
-                          * SCHEDULER_FACTOR.
-
-    :type learning_rate: float or None
-    :param momentum: The parameter momentum. None means use global value.
-    :type momentum: float or None
-    :param gradient_clipping_threshold: gradient clipping threshold. If gradient
-                                        value larger than some value, will be
-                                        clipped.
-    :type gradient_clipping_threshold: float
-    :param sparse_update: Enable sparse update for this parameter. It will
-                          enable both local and remote sparse update.
-    :type sparse_update: bool
-    :param update_hooks: A HookAttribute object.
-    :type update_hooks: HookAttribute
-    :param initializer: If not None, it should be a callable object which accepts
-                        a parameter name and returns numpy array for the initial
-                        value of the parameter
-    :type initializer: callable object
-    """
-
-    def __init__(self,
-                 name=None,
-                 is_static=False,
-                 initial_std=None,
-                 initial_mean=None,
-                 initial_max=None,
-                 initial_min=None,
-                 l1_rate=None,
-                 l2_rate=None,
-                 learning_rate=None,
-                 momentum=None,
-                 gradient_clipping_threshold=None,
-                 sparse_update=False,
-                 update_hooks=None,
-                 initializer=None):
-        self.attr = {}
-
-        if is_static:
-            self.attr['is_static'] = True
-
-        if initial_std is None and initial_mean is None and initial_max \
-                is None and initial_min is None:
-            self.attr['initial_smart'] = True
-        elif is_compatible_with(initial_std, float) or \
-             is_compatible_with(initial_mean, float):
-            if initial_std is not None:
-                self.attr['initial_std'] = initial_std
-            if initial_mean is not None:
-                self.attr['initial_mean'] = initial_mean
-            self.attr['initial_strategy'] = 0  # Gauss Random
-        elif is_compatible_with(initial_max, float) and \
-             is_compatible_with(initial_min, float):
-            initial_max = initial_max
-            initial_min = initial_min
-            assert initial_min < initial_max
-            initial_mean = (initial_max + initial_min) / 2
-            initial_std = initial_mean - initial_min
-            self.attr['initial_mean'] = initial_mean
-            self.attr['initial_std'] = initial_std
-            self.attr['initial_strategy'] = 1  # Uniform Random
-        else:
-            raise RuntimeError("Unexpected branch.")
-
-        if not is_static and is_compatible_with(l1_rate, float):
-            self.attr['decay_rate_l1'] = l1_rate
-
-        if not is_static and is_compatible_with(l2_rate, float):
-            self.attr['decay_rate'] = l2_rate
-
-        if not is_static and is_compatible_with(learning_rate, float):
-            self.attr['learning_rate'] = learning_rate
-
-        if not is_static and is_compatible_with(momentum, float):
-            self.attr['momentum'] = momentum
-
-        if name is not None:
-            self.attr['parameter_name'] = name
-
-        if sparse_update:
-            self.attr['sparse_update'] = True
-            self.attr['sparse_remote_update'] = True
-
-        if gradient_clipping_threshold is not None and \
-                is_compatible_with(gradient_clipping_threshold, float):
-            self.attr['gradient_clipping_threshold'] = \
-                gradient_clipping_threshold
-        if initializer is not None:
-            self.attr['initializer'] = initializer
-
-        if update_hooks:
-            self.attr['update_hooks'] = update_hooks
-
-    def set_default_parameter_name(self, name):
-        """
-        Set default parameter name. If parameter not set, then will use default
-        parameter name.
-
-
-        :param name: default parameter name.
-        :type name: basestring
-        """
-        if 'parameter_name' not in self.attr:
-            self.attr['parameter_name'] = name
-
-    @staticmethod
-    def to_bias(bias_attr):
-        if isinstance(bias_attr, ParameterAttribute):
-            return Bias(**bias_attr.attr)
-        else:
-            return False
-
-
-class ExtraLayerAttribute(object):
-    """
-    Some high level layer attributes config. You can set all attributes here,
-    but some layer doesn't support all attributes. If you set an attribute to a
-    layer that not support this attribute, paddle will print an error and core.
-
-    :param error_clipping_threshold: Error clipping threshold.
-    :type error_clipping_threshold: float
-    :param drop_rate: Dropout rate. Dropout will create a mask on layer output.
-                      The dropout rate is the zero rate of this mask. The
-                      details of what dropout is please refer to `JMLRdropout
-                      <https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf
-                      >`_.
-    :type drop_rate: float
-    :param device: device ID of layer. device=-1, use CPU. device>=0, use GPU.
-                   The details allocation in parallel_nn please refer to `use_case
-                   <https://github.com/PaddlePaddle/Paddle/blob/develop/doc/v2
-                   /howto/cmd_parameter/use_case_en.md#case-2-specify-layers-in
-                   -different-devices>`_.
-    :type device: int
-    """
-
-    def __init__(self,
-                 error_clipping_threshold=None,
-                 drop_rate=None,
-                 device=None):
-        self.attr = dict()
-        if error_clipping_threshold is not None:
-            error_clipping_threshold = float(error_clipping_threshold)
-            if error_clipping_threshold < 0:
-                raise ValueError("Error clipping must > 0")
-            self.attr['error_clipping_threshold'] = error_clipping_threshold
-        if drop_rate is not None:
-            drop_rate = float(drop_rate)
-            if drop_rate < 0:
-                raise ValueError("Dropout rate must > 0")
-            self.attr["drop_rate"] = drop_rate
-
-        if isinstance(device, int):
-            self.attr["device"] = device
-
-    def check(self, layer_name):
-        for key in self.attr:
-            if not hasattr(self, 'can_%s' % key) or \
-                    not getattr(self, 'can_%s' % key):
-                raise NotImplementedError("Layer %s does not support %s" %
-                                          (layer_name, key))
-
-    @staticmethod
-    def to_kwargs(attr):
-        if attr is None:
-            return dict()
-        else:
-            return attr.attr
-
-
-HookAttr = HookAttribute
-ParamAttr = ParameterAttribute
-ExtraAttr = ExtraLayerAttribute
diff --git a/python/paddle/trainer_config_helpers/config_parser_utils.py b/python/paddle/trainer_config_helpers/config_parser_utils.py
deleted file mode 100644
index ee5bbbfb2d..0000000000
--- a/python/paddle/trainer_config_helpers/config_parser_utils.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import copy
-import paddle.trainer.config_parser as config_parser
-from paddle.proto.TrainerConfig_pb2 import OptimizationConfig
-'''
-This file is a wrapper of formal config_parser. The main idea of this file is to
-separete different config logic into different function, such as network configuration
- and optimizer configuration.
-'''
-
-__all__ = [
-    "parse_trainer_config", "parse_network_config", "parse_optimizer_config",
-    "reset_parser"
-]
-
-
-def parse_trainer_config(trainer_conf, config_arg_str):
-    return config_parser.parse_config(trainer_conf, config_arg_str)
-
-
-def parse_network_config(network_conf, config_arg_str=''):
-    config = config_parser.parse_config(network_conf, config_arg_str)
-    return config.model_config
-
-
-def parse_optimizer_config(optimizer_conf, config_arg_str=''):
-    config_parser.settings = copy.deepcopy(config_parser.DEFAULT_SETTING)
-    optimizer_conf()
-    opt_config = OptimizationConfig()
-    for k, v in config_parser.settings.iteritems():
-        if v is None:
-            continue
-        opt_config.__setattr__(k, v)
-    return opt_config
-
-
-def reset_parser():
-    config_parser.begin_parse()
diff --git a/python/paddle/trainer_config_helpers/data_sources.py b/python/paddle/trainer_config_helpers/data_sources.py
deleted file mode 100644
index a2a32d848c..0000000000
--- a/python/paddle/trainer_config_helpers/data_sources.py
+++ /dev/null
@@ -1,213 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Data Sources are helpers to define paddle training data or testing data.
-"""
-from paddle.trainer.config_parser import *
-from .utils import deprecated
-
-try:
-    import cPickle as pickle
-except ImportError:
-    import six.moves.cPickle as pickle
-
-__all__ = ['define_py_data_sources2']
-
-
-def define_py_data_source(file_list,
-                          cls,
-                          module,
-                          obj,
-                          args=None,
-                          async=False,
-                          data_cls=PyData):
-    """
-    Define a python data source.
-
-    For example, the simplest usage in trainer_config.py as follow:
-
-    ..  code-block:: python
-
-        define_py_data_source("train.list", TrainData, "data_provider", "process")
-
-    Or. if you want to pass arguments from trainer_config to data_provider.py, then
-
-    ..  code-block:: python
-
-        define_py_data_source("train.list", TrainData, "data_provider", "process",
-                              args={"dictionary": dict_name})
-
-    :param data_cls:
-    :param file_list: file list name, which contains all data file paths
-    :type file_list: basestring
-    :param cls: Train or Test Class.
-    :type cls: TrainData or TestData
-    :param module: python module name.
-    :type module: basestring
-    :param obj: python object name. May be a function name if using
-                PyDataProviderWrapper.
-    :type obj: basestring
-    :param args: The best practice is using dict to pass arguments into
-                 DataProvider, and use :code:`@init_hook_wrapper` to
-                 receive arguments.
-    :type args: string or picklable object
-    :param async: Load Data asynchronously or not.
-    :type async: bool
-    :return: None
-    :rtype: None
-    """
-    if isinstance(file_list, list):
-        file_list_name = 'train.list'
-        if cls == TestData:
-            file_list_name = 'test.list'
-        with open(file_list_name, 'w') as f:
-            f.writelines(file_list)
-        file_list = file_list_name
-
-    if not isinstance(args, basestring) and args is not None:
-        args = pickle.dumps(args, 0)
-
-    cls(
-        data_cls(
-            files=file_list,
-            load_data_module=module,
-            load_data_object=obj,
-            load_data_args=args,
-            async_load_data=async))
-
-
-def define_py_data_sources(train_list,
-                           test_list,
-                           module,
-                           obj,
-                           args=None,
-                           train_async=False,
-                           data_cls=PyData):
-    """
-    The annotation is almost the same as define_py_data_sources2, except that
-    it can specific train_async and data_cls.
-
-    :param data_cls:
-    :param train_list: Train list name.
-    :type train_list: basestring
-    :param test_list: Test list name.
-    :type test_list: basestring
-    :param module: python module name. If train and test is different, then
-                   pass a tuple or list to this argument.
-    :type module: basestring or tuple or list
-    :param obj: python object name. May be a function name if using
-                PyDataProviderWrapper. If train and test is different, then pass
-                a tuple or list to this argument.
-    :type obj: basestring or tuple or list
-    :param args: The best practice is using dict() to pass arguments into
-                 DataProvider, and use :code:`@init_hook_wrapper` to receive
-                 arguments. If train and test is different, then pass a tuple
-                 or list to this argument.
-    :type args: string or picklable object or list or tuple.
-    :param train_async: Is training data load asynchronously or not.
-    :type train_async: bool
-    :return: None
-    :rtype: None
-    """
-
-    def __is_splitable__(o):
-        return (isinstance(o, list) or
-                isinstance(o, tuple)) and hasattr(o, '__len__') and len(o) == 2
-
-    assert train_list is not None or test_list is not None
-    assert module is not None and obj is not None
-
-    test_module = module
-    train_module = module
-    if __is_splitable__(module):
-        train_module, test_module = module
-
-    test_obj = obj
-    train_obj = obj
-    if __is_splitable__(obj):
-        train_obj, test_obj = obj
-
-    if args is None:
-        args = ""
-
-    train_args = args
-    test_args = args
-    if __is_splitable__(args):
-        train_args, test_args = args
-
-    if train_list is not None:
-        define_py_data_source(train_list, TrainData, train_module, train_obj,
-                              train_args, train_async, data_cls)
-
-    if test_list is not None:
-        define_py_data_source(test_list, TestData, test_module, test_obj,
-                              test_args, False, data_cls)
-
-
-def define_py_data_sources2(train_list, test_list, module, obj, args=None):
-    """
-    Define python Train/Test data sources in one method. If train/test use
-    the same Data Provider configuration, module/obj/args contain one argument,
-    otherwise contain a list or tuple of arguments. For example\:
-
-    ..  code-block:: python
-
-        define_py_data_sources2(train_list="train.list",
-                                test_list="test.list",
-                                module="data_provider"
-                                # if train/test use different configurations,
-                                # obj=["process_train", "process_test"]
-                                obj="process",
-                                args={"dictionary": dict_name})
-
-    The related data provider can refer to :ref:`api_pydataprovider2_sequential_model` .
-
-    :param train_list: Train list name.
-    :type train_list: basestring
-    :param test_list: Test list name.
-    :type test_list: basestring
-    :param module: python module name. If train and test is different, then
-                   pass a tuple or list to this argument.
-    :type module: basestring or tuple or list
-    :param obj: python object name. May be a function name if using
-                PyDataProviderWrapper. If train and test is different, then pass
-                a tuple or list to this argument.
-    :type obj: basestring or tuple or list
-    :param args: The best practice is using dict() to pass arguments into
-                 DataProvider, and use :code:`@init_hook_wrapper` to receive
-                 arguments. If train and test is different, then pass a tuple
-                 or list to this argument.
-    :type args: string or picklable object or list or tuple.
-    :return: None
-    :rtype: None
-    """
-
-    def py_data2(files, load_data_module, load_data_object, load_data_args,
-                 **kwargs):
-        data = create_data_config_proto()
-        data.type = 'py2'
-        data.files = files
-        data.load_data_module = load_data_module
-        data.load_data_object = load_data_object
-        data.load_data_args = load_data_args
-        data.async_load_data = False
-        return data
-
-    define_py_data_sources(
-        train_list=train_list,
-        test_list=test_list,
-        module=module,
-        obj=obj,
-        args=args,
-        data_cls=py_data2)
diff --git a/python/paddle/trainer_config_helpers/default_decorators.py b/python/paddle/trainer_config_helpers/default_decorators.py
deleted file mode 100644
index 69d860d9da..0000000000
--- a/python/paddle/trainer_config_helpers/default_decorators.py
+++ /dev/null
@@ -1,164 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import functools
-import inspect
-from .attrs import ParamAttr
-from .activations import TanhActivation
-from paddle.trainer.config_parser import *
-
-__all__ = [
-    'wrap_name_default', 'wrap_param_attr_default', 'wrap_bias_attr_default',
-    'wrap_act_default', 'wrap_param_default'
-]
-
-
-def __default_not_set_callback__(kwargs, name):
-    return name not in kwargs or kwargs[name] is None
-
-
-def wrap_param_default(param_names=None,
-                       default_factory=None,
-                       not_set_callback=__default_not_set_callback__):
-    assert param_names is not None
-    assert isinstance(param_names, list) or isinstance(param_names, tuple)
-    for each_param_name in param_names:
-        assert isinstance(each_param_name, basestring)
-
-    def __impl__(func):
-        @functools.wraps(func)
-        def __wrapper__(*args, **kwargs):
-            if len(args) != 0:
-                argspec = inspect.getargspec(func)
-                num_positional = len(argspec.args)
-                if argspec.defaults:
-                    num_positional -= len(argspec.defaults)
-                if not argspec.varargs and len(args) > num_positional:
-                    logger.fatal(
-                        "Must use keyword arguments for non-positional args")
-            for name in param_names:
-                if not_set_callback(kwargs, name):  # Not set
-                    kwargs[name] = default_factory(func)
-            return func(*args, **kwargs)
-
-        if hasattr(func, 'argspec'):
-            __wrapper__.argspec = func.argspec
-        else:
-            __wrapper__.argspec = inspect.getargspec(func)
-        return __wrapper__
-
-    return __impl__
-
-
-class DefaultNameFactory(object):
-    def __init__(self, name_prefix):
-        self.__counter__ = 0
-        self.__name_prefix__ = name_prefix
-
-    def __call__(self, func):
-        if self.__name_prefix__ is None:
-            self.__name_prefix__ = func.__name__
-        tmp = "__%s_%d__" % (self.__name_prefix__, self.__counter__)
-        self.__check_name__(tmp)
-        self.__counter__ += 1
-        return tmp
-
-    def __check_name__(self, nm):
-        """
-        @TODO(yuyang18): Implement it!
-        @param nm:
-        @return:
-        """
-        pass
-
-    def reset(self):
-        self.__counter__ = 0
-
-
-_name_factories = []
-
-
-def reset_hook():
-    for factory in _name_factories:
-        factory.reset()
-
-
-register_parse_config_hook(reset_hook)
-
-
-def wrap_name_default(name_prefix=None, name_param="name"):
-    """
-    Decorator to set "name" arguments default to "{name_prefix}_{invoke_count}".
-
-    ..  code:: python
-
-        @wrap_name_default("some_name")
-        def func(name=None):
-            print name      # name will never be None. If name is not set,
-                            # name will be "some_name_%d"
-
-    :param name_prefix: name prefix. wrapped function's __name__ if None.
-    :type name_prefix: basestring
-    :return: a decorator to set default name
-    :rtype: callable
-    """
-    factory = DefaultNameFactory(name_prefix)
-    _name_factories.append(factory)
-    return wrap_param_default([name_param], factory)
-
-
-def wrap_param_attr_default(param_names=None, default_factory=None):
-    """
-    Setting Default Parameter Attributes Decorator.
-
-    :param default_factory:
-    :param param_names: Parameter Attribute's Names, list of string
-    :type param_names: list
-    :return: decorator
-    """
-    if param_names is None:
-        param_names = ['param_attr']
-    if default_factory is None:
-        default_factory = lambda _: ParamAttr()
-
-    return wrap_param_default(param_names, default_factory)
-
-
-def wrap_bias_attr_default(param_names=None,
-                           default_factory=None,
-                           has_bias=True):
-    if param_names is None:
-        param_names = ['bias_attr']
-    if default_factory is None:
-        default_factory = lambda _: ParamAttr(initial_std=0., initial_mean=0.)
-
-    def __bias_attr_not_set__(kwargs, name):
-        if has_bias:
-            return name not in kwargs or kwargs[name] is None or \
-                   kwargs[name] == True
-        else:
-            return name in kwargs and kwargs[name] == True
-
-    return wrap_param_default(param_names, default_factory,
-                              __bias_attr_not_set__)
-
-
-def wrap_act_default(param_names=None, act=None):
-    if param_names is None:
-        param_names = ["act"]
-
-    if act is None:
-        act = TanhActivation()
-
-    return wrap_param_default(param_names, lambda _: act)
diff --git a/python/paddle/trainer_config_helpers/evaluators.py b/python/paddle/trainer_config_helpers/evaluators.py
deleted file mode 100644
index 0eeaf7eabb..0000000000
--- a/python/paddle/trainer_config_helpers/evaluators.py
+++ /dev/null
@@ -1,813 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer.config_parser import *
-from default_decorators import *
-
-__all__ = [
-    "evaluator_base",
-    "classification_error_evaluator",
-    "auc_evaluator",
-    "pnpair_evaluator",
-    "precision_recall_evaluator",
-    "ctc_error_evaluator",
-    "chunk_evaluator",
-    "sum_evaluator",
-    "column_sum_evaluator",
-    "value_printer_evaluator",
-    "gradient_printer_evaluator",
-    "maxid_printer_evaluator",
-    "maxframe_printer_evaluator",
-    "seqtext_printer_evaluator",
-    "classification_error_printer_evaluator",
-    "detection_map_evaluator",
-]
-
-
-class EvaluatorAttribute(object):
-    FOR_CLASSIFICATION = 1
-    FOR_REGRESSION = 1 << 1
-    FOR_RANK = 1 << 2
-    FOR_PRINT = 1 << 3
-    FOR_UTILS = 1 << 4
-    FOR_DETECTION = 1 << 5
-
-    KEYS = [
-        "for_classification", "for_regression", "for_rank", "for_print",
-        "for_utils", "for_detection"
-    ]
-
-    @staticmethod
-    def to_key(idx):
-        tmp = 1
-        for i in xrange(0, len(EvaluatorAttribute.KEYS)):
-            if idx == tmp:
-                return EvaluatorAttribute.KEYS[i]
-            else:
-                tmp = (tmp << 1)
-
-
-def evaluator(*attrs):
-    def impl(method):
-        for attr in attrs:
-            setattr(method, EvaluatorAttribute.to_key(attr), True)
-        method.is_evaluator = True
-        return method
-
-    return impl
-
-
-def evaluator_base(input,
-                   type,
-                   label=None,
-                   weight=None,
-                   name=None,
-                   chunk_scheme=None,
-                   num_chunk_types=None,
-                   classification_threshold=None,
-                   positive_label=None,
-                   dict_file=None,
-                   result_file=None,
-                   num_results=None,
-                   delimited=None,
-                   top_k=None,
-                   excluded_chunk_types=None,
-                   overlap_threshold=None,
-                   background_id=None,
-                   evaluate_difficult=None,
-                   ap_type=None):
-    """
-    Evaluator will evaluate the network status while training/testing.
-
-    User can use evaluator by classify/regression job. For example.
-
-    ..  code-block:: python
-
-        classify(prediction, output, evaluator=classification_error_evaluator)
-
-    And user could define evaluator separately as follow.
-
-    ..  code-block:: python
-
-        classification_error_evaluator("ErrorRate", prediction, label)
-
-    The evaluator often contains a name parameter. It will also be printed when
-    evaluating network. The printed information may look like the following.
-
-    ..  code-block:: text
-
-         Batch=200 samples=20000 AvgCost=0.679655 CurrentCost=0.662179 Eval:
-         classification_error_evaluator=0.4486
-         CurrentEval: ErrorRate=0.3964
-
-    :param input: Input layers, a object of LayerOutput or a list of
-                  LayerOutput.
-    :type input: list|LayerOutput
-    :param label: An input layer containing the ground truth label.
-    :type label: LayerOutput|None
-    :param weight: An input layer which is a weight for each sample.
-                   Each evaluator may calculate differently to use this weight.
-    :type weight: LayerOutput.
-    :param top_k: number k in top-k error rate
-    :type top_k: int
-    :param overlap_threshold: In detection tasks to filter detection results
-    :type overlap_threshold: float
-    :param background_id: Identifier of background class
-    :type background_id: int
-    :param evaluate_difficult: Whether to evaluate difficult objects
-    :type evaluate_difficult: bool
-    :param ap_type: How to calculate average persicion
-    :type ap_type: str
-    """
-    # inputs type assertions.
-    assert classification_threshold is None or isinstance(
-        classification_threshold, float)
-    assert positive_label is None or isinstance(positive_label, int)
-    assert num_results is None or isinstance(num_results, int)
-    assert top_k is None or isinstance(top_k, int)
-
-    if not isinstance(input, list):
-        input = [input]
-
-    if label:
-        input.append(label)
-    if weight:
-        input.append(weight)
-
-    Evaluator(
-        name=name,
-        type=type,
-        inputs=[i.name for i in input],
-        chunk_scheme=chunk_scheme,
-        num_chunk_types=num_chunk_types,
-        classification_threshold=classification_threshold,
-        positive_label=positive_label,
-        dict_file=dict_file,
-        result_file=result_file,
-        delimited=delimited,
-        num_results=num_results,
-        top_k=top_k,
-        excluded_chunk_types=excluded_chunk_types,
-        overlap_threshold=overlap_threshold,
-        background_id=background_id,
-        evaluate_difficult=evaluate_difficult,
-        ap_type=ap_type)
-
-
-@evaluator(EvaluatorAttribute.FOR_DETECTION)
-@wrap_name_default()
-def detection_map_evaluator(input,
-                            label,
-                            overlap_threshold=0.5,
-                            background_id=0,
-                            evaluate_difficult=False,
-                            ap_type="11point",
-                            name=None):
-    """
-    Detection mAP Evaluator. It will print mean Average Precision (mAP) for detection.
-
-    The detection mAP Evaluator based on the output of detection_output layer counts
-    the true positive and the false positive bbox and integral them to get the
-    mAP.
-
-    The simple usage is:
-
-    .. code-block:: python
-
-       eval =  detection_map_evaluator(input=det_output,label=lbl)
-
-    :param input: Input layer.
-    :type input: LayerOutput
-    :param label: Label layer.
-    :type label: LayerOutput
-    :param overlap_threshold: The bbox overlap threshold of a true positive.
-    :type overlap_threshold: float
-    :param background_id: The background class index.
-    :type background_id: int
-    :param evaluate_difficult: Whether evaluate a difficult ground truth.
-    :type evaluate_difficult: bool
-    """
-    if not isinstance(input, list):
-        input = [input]
-
-    if label:
-        input.append(label)
-
-    evaluator_base(
-        name=name,
-        type="detection_map",
-        input=input,
-        label=label,
-        overlap_threshold=overlap_threshold,
-        background_id=background_id,
-        evaluate_difficult=evaluate_difficult,
-        ap_type=ap_type)
-
-
-@evaluator(EvaluatorAttribute.FOR_CLASSIFICATION)
-@wrap_name_default()
-def classification_error_evaluator(input,
-                                   label,
-                                   name=None,
-                                   weight=None,
-                                   top_k=None,
-                                   threshold=None):
-    """
-    Classification Error Evaluator. It will print error rate for classification.
-
-    The classification error is:
-
-    ..  math::
-
-        classification\\_error = \\frac{NumOfWrongPredicts}{NumOfAllSamples}
-
-    The simple usage is:
-
-    .. code-block:: python
-
-       eval =  classification_error_evaluator(input=prob,label=lbl)
-
-    :param name: Evaluator name.
-    :type name: basestring
-    :param input: Input Layer name. The output prediction of network.
-    :type input: LayerOutput
-    :param label: Label layer name.
-    :type label: basestring
-    :param weight: Weight Layer name. It should be a matrix with size
-                  [sample_num, 1]. And will just multiply to NumOfWrongPredicts
-                  and NumOfAllSamples. So, the elements of weight are all one,
-                  then means not set weight. The larger weight it is, the more
-                  important this sample is.
-    :type weight: LayerOutput
-    :param top_k: number k in top-k error rate
-    :type top_k: int
-    :param threshold: The classification threshold.
-    :type threshold: float
-    :return: None.
-    """
-
-    evaluator_base(
-        name=name,
-        type="classification_error",
-        input=input,
-        label=label,
-        weight=weight,
-        top_k=top_k,
-        classification_threshold=threshold, )
-
-
-@evaluator(EvaluatorAttribute.FOR_CLASSIFICATION)
-@wrap_name_default()
-def auc_evaluator(
-        input,
-        label,
-        name=None,
-        weight=None, ):
-    """
-    Auc Evaluator which adapts to binary classification.
-
-    The simple usage:
-
-    .. code-block:: python
-
-       eval = auc_evaluator(input, label)
-
-    :param name: Evaluator name.
-    :type name: None|basestring
-    :param input: Input Layer name. The output prediction of network.
-    :type input: LayerOutput
-    :param label: Label layer name.
-    :type label: None|basestring
-    :param weight: Weight Layer name. It should be a matrix with size
-                  [sample_num, 1].
-    :type weight: LayerOutput
-    """
-    evaluator_base(
-        name=name,
-        type="last-column-auc",
-        input=input,
-        label=label,
-        weight=weight)
-
-
-@evaluator(EvaluatorAttribute.FOR_RANK)
-@wrap_name_default()
-def pnpair_evaluator(
-        input,
-        label,
-        query_id,
-        weight=None,
-        name=None, ):
-    """
-    Positive-negative pair rate Evaluator which adapts to rank task like
-    learning to rank. This evaluator must contain at least three layers.
-
-    The simple usage:
-
-    .. code-block:: python
-
-       eval = pnpair_evaluator(input, label, query_id)
-
-    :param input: Input Layer name. The output prediction of network.
-    :type input: LayerOutput
-    :param label: Label layer name.
-    :type label: LayerOutput
-    :param query_id: Query_id layer name. Query_id indicates that which query
-     each sample belongs to. Its shape should be
-     the same as output of Label layer.
-    :type query_id: LayerOutput
-    :param weight: Weight Layer name. It should be a matrix with size
-                  [sample_num, 1] which indicates the weight of each sample.
-                  The default weight of sample is 1 if the weight layer is None.
-                  And the pair weight is the mean of the two samples' weight.
-    :type weight: LayerOutput
-    :param name: Evaluator name.
-    :type name: None|basestring
-    """
-    if not isinstance(input, list):
-        input = [input]
-    if label:
-        input.append(label)
-    if query_id:
-        input.append(query_id)
-    evaluator_base(
-        input=input,
-        type="pnpair",
-        weight=weight,
-        name=name, )
-
-
-@evaluator(EvaluatorAttribute.FOR_CLASSIFICATION)
-@wrap_name_default()
-def precision_recall_evaluator(
-        input,
-        label,
-        positive_label=None,
-        weight=None,
-        name=None, ):
-    """
-    An Evaluator to calculate precision and recall, F1-score.
-    It is adapt to the task with multiple labels.
-
-    - If positive_label=-1, it will print the average precision, recall,
-      F1-score of all labels.
-
-    - If use specify positive_label, it will print the precision, recall,
-      F1-score of this label.
-
-    The simple usage:
-
-    .. code-block:: python
-
-       eval = precision_recall_evaluator(input, label)
-
-    :param name: Evaluator name.
-    :type name: None|basestring
-    :param input: Input Layer name. The output prediction of network.
-    :type input: LayerOutput
-    :param label: Label layer name.
-    :type label: LayerOutput
-    :param positive_label: The input label layer.
-    :type positive_label: LayerOutput.
-    :param weight: Weight Layer name. It should be a matrix with size
-                  [sample_num, 1]. (TODO, explaination)
-    :type weight: LayerOutput
-    """
-    evaluator_base(
-        name=name,
-        type="precision_recall",
-        input=input,
-        label=label,
-        positive_label=positive_label,
-        weight=weight)
-
-
-@evaluator(EvaluatorAttribute.FOR_CLASSIFICATION)
-@wrap_name_default()
-def ctc_error_evaluator(
-        input,
-        label,
-        name=None, ):
-    """
-    This evaluator is to calculate sequence-to-sequence edit distance.
-
-    The simple usage is :
-
-    .. code-block:: python
-
-       eval = ctc_error_evaluator(input=input, label=lbl)
-
-    :param name: Evaluator name.
-    :type name: None|basestring
-    :param input: Input Layer. Should be the same as the input for ctc_layer.
-    :type input: LayerOutput
-    :param label: input label, which is a data_layer. Should be the same as the
-                  label for ctc_layer
-    :type label: LayerOutput
-    """
-    evaluator_base(
-        name=name, type="ctc_edit_distance", input=input, label=label)
-
-
-@evaluator(EvaluatorAttribute.FOR_CLASSIFICATION)
-@wrap_name_default()
-def chunk_evaluator(
-        input,
-        label,
-        chunk_scheme,
-        num_chunk_types,
-        name=None,
-        excluded_chunk_types=None, ):
-    """
-    Chunk evaluator is used to evaluate segment labelling accuracy for a
-    sequence. It calculates precision, recall and F1 scores for the chunk detection.
-
-    To use chunk evaluator, several concepts need to be clarified firstly.
-
-    * **Chunk type** is the type of the whole chunk and a chunk consists of one or several words.  (For example in NER, ORG for organization name, PER for person name etc.)
-
-    * **Tag type** indicates the position of a word in a chunk. (B for begin, I for inside, E for end, S for single)
-    We can name a label by combining tag type and chunk type. (ie. B-ORG for begining of an organization name)
-
-    The construction of label dictionary should obey the following rules:
-
-    - Use one of the listed labelling schemes. These schemes differ in ways indicating chunk boundry.
-
-    .. code-block:: text
-
-        Scheme    Description
-        plain    Use the same label for the whole chunk.
-        IOB      Two labels for chunk type X, B-X for chunk begining and I-X for chunk inside.
-        IOE      Two labels for chunk type X, E-X for chunk ending and I-X for chunk inside.
-        IOBES    Four labels for chunk type X, B-X for chunk begining, I-X for chunk inside, E-X for chunk end and S-X for single word chunk.
-
-    To make it clear, let's illustrate by an NER example.
-    Assuming that there are three named entity types including ORG, PER and LOC which are called 'chunk type' here,
-    if 'IOB' scheme were used, the label set will be extended to a set including B-ORG, I-ORG, B-PER, I-PER, B-LOC, I-LOC and O,
-    in which B-ORG for begining of ORG and I-ORG for inside of ORG.
-    Prefixes which are called 'tag type' here are added to chunk types and there are two tag types including B and I.
-    Of course, the training data should be labeled accordingly.
-
-    - Mapping is done correctly by the listed equations and assigning protocol.
-
-    The following table are equations to extract tag type and chunk type from a label.
-
-    .. code-block:: text
-
-        tagType = label % numTagType
-        chunkType = label / numTagType
-        otherChunkType = numChunkTypes
-
-    The following table shows the mapping rule between tagType and tag type in each scheme.
-
-    .. code-block:: text
-
-        Scheme Begin Inside End   Single
-        plain  0     -      -     -
-        IOB    0     1      -     -
-        IOE    -     0      1     -
-        IOBES  0     1      2     3
-
-    Continue the NER example, and the label dict should look like this to satify above equations:
-
-    .. code-block:: text
-
-        B-ORG  0
-        I-ORG  1
-        B-PER  2
-        I-PER  3
-        B-LOC  4
-        I-LOC  5
-        O      6
-
-    In this example, chunkType has three values: 0 for ORG, 1 for PER, 2 for LOC, because the scheme is
-    "IOB" so tagType has two values: 0 for B and 1 for I.
-    Here we will use I-LOC to explain the above mapping rules in detail.
-    For I-LOC, the label id is 5, so we can get tagType=1 and chunkType=2, which means I-LOC is a part of NER chunk LOC
-    and the tag is I.
-
-    The simple usage is:
-
-    .. code-block:: python
-
-       eval = chunk_evaluator(input, label, chunk_scheme, num_chunk_types)
-
-
-    :param input: The input layers.
-    :type input: LayerOutput
-    :param label: An input layer containing the ground truth label.
-    :type label: LayerOutput
-    :param chunk_scheme: The labelling schemes support 4 types. It is one of
-                         "IOB", "IOE", "IOBES", "plain". It is required.
-    :type chunk_scheme: basestring
-    :param num_chunk_types: number of chunk types other than "other"
-    :param name: The Evaluator name, it is optional.
-    :type name: basename|None
-    :param excluded_chunk_types: chunks of these types are not considered
-    :type excluded_chunk_types: list of integer|None
-    """
-    evaluator_base(
-        name=name,
-        type="chunk",
-        input=input,
-        label=label,
-        chunk_scheme=chunk_scheme,
-        num_chunk_types=num_chunk_types,
-        excluded_chunk_types=excluded_chunk_types, )
-
-
-@evaluator(EvaluatorAttribute.FOR_UTILS)
-@wrap_name_default()
-def sum_evaluator(
-        input,
-        name=None,
-        weight=None, ):
-    """
-    An Evaluator to sum the result of input.
-
-    The simple usage:
-
-    .. code-block:: python
-
-       eval = sum_evaluator(input)
-
-    :param name: Evaluator name.
-    :type name: None|basestring
-    :param input: Input Layer name.
-    :type input: LayerOutput
-    :param weight: Weight Layer name. It should be a matrix with size
-                  [sample_num, 1]. (TODO, explaination)
-    :type weight: LayerOutput
-    """
-    evaluator_base(name=name, type="sum", input=input, weight=weight)
-
-
-@evaluator(EvaluatorAttribute.FOR_UTILS)
-@wrap_name_default()
-def column_sum_evaluator(
-        input,
-        name=None,
-        weight=None, ):
-    """
-    This Evaluator is used to sum the last column of input.
-
-    The simple usage is:
-
-    .. code-block:: python
-
-       eval = column_sum_evaluator(input, label)
-
-    :param name: Evaluator name.
-    :type name: None|basestring
-    :param input: Input Layer name.
-    :type input: LayerOutput
-    """
-    evaluator_base(
-        name=name, type="last-column-sum", input=input, weight=weight)
-
-
-"""
-The following are printer Evaluators which are usually used to
-print the result, like value or gradient of input layers, the
-results generated in machine translation, the classification error etc.
-"""
-
-
-@evaluator(EvaluatorAttribute.FOR_PRINT)
-@wrap_name_default()
-def value_printer_evaluator(
-        input,
-        name=None, ):
-    """
-    This Evaluator is used to print the values of input layers. It contains
-    one or more input layers.
-
-    The simple usage is:
-
-    .. code-block:: python
-
-       eval = value_printer_evaluator(input)
-
-    :param input: One or more input layers.
-    :type input: LayerOutput|list
-    :param name: Evaluator name.
-    :type name: None|basestring
-    """
-    evaluator_base(name=name, type="value_printer", input=input)
-
-
-@evaluator(EvaluatorAttribute.FOR_PRINT)
-@wrap_name_default()
-def gradient_printer_evaluator(
-        input,
-        name=None, ):
-    """
-    This Evaluator is used to print the gradient of input layers. It contains
-    one or more input layers.
-
-    The simple usage is:
-
-    .. code-block:: python
-
-       eval = gradient_printer_evaluator(input)
-
-    :param input: One or more input layers.
-    :type input: LayerOutput|list
-    :param name: Evaluator name.
-    :type name: None|basestring
-    """
-    evaluator_base(name=name, type="gradient_printer", input=input)
-
-
-@evaluator(EvaluatorAttribute.FOR_PRINT)
-@wrap_name_default()
-def maxid_printer_evaluator(
-        input,
-        num_results=None,
-        name=None, ):
-    """
-    This Evaluator is used to print maximum top k values and their indexes
-    of each row of input layers. It contains one or more input layers.
-    k is specified by num_results.
-
-    The simple usage is:
-
-    .. code-block:: python
-
-       eval = maxid_printer_evaluator(input)
-
-    :param input: Input Layer name.
-    :type input: LayerOutput|list
-    :param num_results: This number is used to specify the top k numbers.
-                        It is 1 by default.
-    :type num_results: int.
-    :param name: Evaluator name.
-    :type name: None|basestring
-    """
-    evaluator_base(
-        name=name, type="max_id_printer", input=input, num_results=num_results)
-
-
-@evaluator(EvaluatorAttribute.FOR_PRINT)
-@wrap_name_default()
-def maxframe_printer_evaluator(
-        input,
-        num_results=None,
-        name=None, ):
-    """
-    This Evaluator is used to print the top k frames of each input layers.
-    The input layers should contain sequences info or sequences type.
-    k is specified by num_results.
-    It contains one or more input layers.
-
-    Note:
-        The width of each frame is 1.
-
-    The simple usage is:
-
-    .. code-block:: python
-
-       eval = maxframe_printer_evaluator(input)
-
-    :param input: Input Layer name.
-    :type input: LayerOutput|list
-    :param name: Evaluator name.
-    :type name: None|basestring
-    """
-    evaluator_base(
-        name=name,
-        type="max_frame_printer",
-        input=input,
-        num_results=num_results)
-
-
-@evaluator(EvaluatorAttribute.FOR_PRINT)
-@wrap_name_default()
-def seqtext_printer_evaluator(
-        input,
-        result_file,
-        id_input=None,
-        dict_file=None,
-        delimited=None,
-        name=None, ):
-    """
-    Sequence text printer will print text according to index matrix and a
-    dictionary. There can be multiple input to this layer:
-
-    1. If there is no id_input, the input must be a matrix containing
-    the sequence of indices;
-
-    2. If there is id_input, it should be ids, and interpreted as sample ids.
-
-    The output format will be:
-
-    1. sequence without sub-sequence, and there is probability.
-
-    .. code-block:: python
-
-         id \t prob space_seperated_tokens_from_dictionary_according_to_seq
-
-    2. sequence without sub-sequence, and there is not probability.
-
-    .. code-block:: python
-
-         id \t space_seperated_tokens_from_dictionary_according_to_seq
-
-    3. sequence with sub-sequence, and there is not probability.
-
-    .. code-block:: python
-
-         id \t space_seperated_tokens_from_dictionary_according_to_sub_seq
-         \t \t space_seperated_tokens_from_dictionary_according_to_sub_seq
-         ...
-
-    Typically SequenceTextPrinter layer takes output of maxid or RecurrentGroup
-    with maxid (when generating) as an input.
-
-    The simple usage is:
-
-    .. code-block:: python
-
-       eval = seqtext_printer_evaluator(input=maxid_layer,
-                                        id_input=sample_id,
-                                        dict_file=dict_file,
-                                        result_file=result_file)
-
-    :param input: Input Layer name.
-    :type input: LayerOutput|list
-    :param result_file: Path of the file to store the generated results.
-    :type result_file: basestring
-    :param id_input: Index of the input sequence, and the specified index will
-                     be prited in the gereated results. This an optional
-                     parameter.
-    :type id_input: LayerOutput
-    :param dict_file: Path of dictionary. This is an optional parameter.
-                      Every line is a word in the dictionary with
-                      (line number - 1) as the word index.
-                      If this parameter is set to None, or to an empty string,
-                      only word index are printed in the generated results.
-    :type dict_file: basestring
-    :param delimited: Whether to use space to separate output tokens.
-                Default is True. No space is added if set to False.
-    :type delimited: bool
-    :param name: Evaluator name.
-    :type name: None|basestring
-    :return: The seq_text_printer that prints the generated sequence to a file.
-    :rtype: evaluator
-    """
-    assert isinstance(result_file, basestring)
-    if id_input is None:
-        inputs = [input]
-    else:
-        inputs = [id_input, input]
-        input.parents.append(id_input)
-
-    evaluator_base(
-        name=name,
-        type="seq_text_printer",
-        input=inputs,
-        dict_file=dict_file,
-        result_file=result_file,
-        delimited=delimited)
-
-
-@evaluator(EvaluatorAttribute.FOR_PRINT)
-@wrap_name_default()
-def classification_error_printer_evaluator(
-        input,
-        label,
-        threshold=0.5,
-        name=None, ):
-    """
-    This Evaluator is used to print the classification error of each sample.
-
-    The simple usage is:
-
-    .. code-block:: python
-
-       eval = classification_error_printer_evaluator(input)
-
-    :param input: Input layer.
-    :type input: LayerOutput
-    :param label: Input label layer.
-    :type label: LayerOutput
-    :param name: Evaluator name.
-    :type name: None|basestring
-    """
-    evaluator_base(
-        name=name,
-        type="classification_error_printer",
-        input=input,
-        label=label,
-        classification_threshold=threshold)
diff --git a/python/paddle/trainer_config_helpers/layer_math.py b/python/paddle/trainer_config_helpers/layer_math.py
deleted file mode 100644
index ee84188bac..0000000000
--- a/python/paddle/trainer_config_helpers/layer_math.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .layers import LayerOutput, mixed_layer, identity_projection, \
-    slope_intercept_layer, scaling_layer, repeat_layer
-from .attrs import is_compatible_with
-from .default_decorators import *
-import activations as act
-from paddle.trainer.config_parser import logger
-
-__all__ = []
-
-
-def register_unary_math_op(op_name, act):
-    def op(input, name=None):
-        return mixed_layer(
-            input=[identity_projection(input=input)], name=name, act=act)
-
-    op = wrap_name_default(op_name)(op)
-    op.__doc__ = type(act).__doc__
-    globals()[op_name] = op
-    __all__.append(op_name)
-
-
-register_unary_math_op('exp', act.ExpActivation())
-register_unary_math_op('log', act.LogActivation())
-register_unary_math_op('abs', act.AbsActivation())
-register_unary_math_op('sigmoid', act.SigmoidActivation())
-register_unary_math_op('tanh', act.TanhActivation())
-register_unary_math_op('square', act.SquareActivation())
-register_unary_math_op('relu', act.ReluActivation())
-register_unary_math_op('sqrt', act.SqrtActivation())
-register_unary_math_op('reciprocal', act.ReciprocalActivation())
-
-
-def add(layeroutput, other):
-    if is_compatible_with(other, float):
-        return slope_intercept_layer(input=layeroutput, intercept=other)
-    if not isinstance(other, LayerOutput):
-        logger.fatal("LayerOutput can only be added with"
-                     " another LayerOutput or a number")
-    if layeroutput.size == other.size:
-        return mixed_layer(input=[
-            identity_projection(input=layeroutput),
-            identity_projection(input=other)
-        ])
-    if other.size != 1 and layeroutput.size != 1:
-        logger.fatal("Two LayerOutput can be added only if they have equal size"
-                     " or one of their sizes is 1. sizes are %s and %s" %
-                     (layeroutput.size, other.size))
-    elif layeroutput.size == 1:
-        tmp = layeroutput
-        layeroutput = other
-        other = tmp
-    other = repeat_layer(other, layeroutput.size)
-    return mixed_layer(input=[
-        identity_projection(input=layeroutput), identity_projection(input=other)
-    ])
-
-
-LayerOutput.__radd__ = add
-LayerOutput.__add__ = add
-
-
-def sub(layeroutput, other):
-    if is_compatible_with(other, float):
-        return slope_intercept_layer(input=layeroutput, intercept=-other)
-    if not isinstance(other, LayerOutput):
-        logger.fatal("LayerOutput can only be subtracted with"
-                     " another Layeroutput or a number")
-    neg = slope_intercept_layer(input=other, slope=-1.0)
-    return add(layeroutput, neg)
-
-
-LayerOutput.__sub__ = sub
-
-
-def rsub(layeroutput, other):
-    neg = slope_intercept_layer(input=layeroutput, slope=-1.0)
-    return add(neg, other)
-
-
-LayerOutput.__rsub__ = rsub
-
-
-def mul(layeroutput, other):
-    if is_compatible_with(other, float):
-        return slope_intercept_layer(input=layeroutput, slope=other)
-    if not isinstance(other, LayerOutput):
-        logger.fatal("LayerOutput can only be multiplied with"
-                     " another Layeroutput or a number")
-    elif layeroutput.size == 1:
-        return scaling_layer(input=other, weight=layeroutput)
-    elif other.size == 1:
-        return scaling_layer(input=layeroutput, weight=other)
-    else:
-        logger.fatal("At least one of the operand of '*' must be a number"
-                     " or a LayerOutput with size=1")
-
-
-LayerOutput.__mul__ = mul
-LayerOutput.__rmul__ = mul
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
deleted file mode 100644
index ee34c15733..0000000000
--- a/python/paddle/trainer_config_helpers/layers.py
+++ /dev/null
@@ -1,7610 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import functools
-import collections
-import inspect
-
-import paddle.trainer.config_parser as cp
-from paddle.trainer.config_parser import *
-from .activations import LinearActivation, SigmoidActivation, TanhActivation, \
-    ReluActivation, IdentityActivation, SoftmaxActivation, BaseActivation
-from .evaluators import *
-from .poolings import MaxPooling, AvgPooling, MaxWithMaskPooling, BasePoolingType, \
-    CudnnAvgPooling, CudnnAvgInclPadPooling, CudnnMaxPooling
-from .attrs import *
-from .default_decorators import *
-
-try:
-    import cPickle as pickle
-except ImportError:
-    import six.moves.cPickle as pickle
-import copy
-
-__all__ = [
-    'full_matrix_projection',
-    'AggregateLevel',
-    'ExpandLevel',
-    'identity_projection',
-    'dotmul_projection',
-    'dotmul_operator',
-    'repeat_layer',
-    'seq_reshape_layer',
-    'table_projection',
-    'mixed_layer',
-    'data_layer',
-    'embedding_layer',
-    'fc_layer',
-    'grumemory',
-    'pooling_layer',
-    'lstmemory',
-    'last_seq',
-    'first_seq',
-    'cos_sim',
-    'l2_distance_layer',
-    'hsigmoid',
-    'conv_projection',
-    'square_error_cost',
-    'regression_cost',
-    'classification_cost',
-    'LayerOutput',
-    'img_conv_layer',
-    'img_pool_layer',
-    'batch_norm_layer',
-    'img_cmrnorm_layer',
-    'addto_layer',
-    'concat_layer',
-    'seq_concat_layer',
-    'lstm_step_layer',
-    'recurrent_group',
-    'memory',
-    'StaticInput',
-    'expand_layer',
-    'scaling_layer',
-    'scaling_projection',
-    'power_layer',
-    'interpolation_layer',
-    'bilinear_interp_layer',
-    'trans_layer',
-    'rotate_layer',
-    'sum_to_one_norm_layer',
-    'row_l2_norm_layer',
-    'get_output_layer',
-    'LayerType',
-    'context_projection',
-    'beam_search',
-    'maxid_layer',
-    'GeneratedInput',
-    'SubsequenceInput',
-    'gru_step_layer',
-    'gru_step_naive_layer',
-    'recurrent_layer',
-    'BaseGeneratedInput',
-    'conv_operator',
-    'conv_shift_layer',
-    'tensor_layer',
-    'selective_fc_layer',
-    'sampling_id_layer',
-    'slope_intercept_layer',
-    'trans_full_matrix_projection',
-    'linear_comb_layer',
-    'convex_comb_layer',
-    'ctc_layer',
-    'warp_ctc_layer',
-    'crf_layer',
-    'crf_decoding_layer',
-    'nce_layer',
-    'cross_entropy_with_selfnorm',
-    'cross_entropy',
-    'BeamInput',
-    'cross_entropy_over_beam',
-    'multi_binary_label_cross_entropy',
-    'sum_cost',
-    'rank_cost',
-    'lambda_cost',
-    'huber_regression_cost',
-    'huber_classification_cost',
-    'block_expand_layer',
-    'maxout_layer',
-    'dot_prod_layer',
-    'out_prod_layer',
-    'printer_layer',
-    'print_layer',
-    'priorbox_layer',
-    'cross_channel_norm_layer',
-    'multibox_loss_layer',
-    'detection_output_layer',
-    'roi_pool_layer',
-    'spp_layer',
-    'pad_layer',
-    'eos_layer',
-    'smooth_l1_cost',
-    'layer_support',
-    'multiplex_layer',
-    'row_conv_layer',
-    'dropout_layer',
-    'prelu_layer',
-    'switch_order_layer',
-    'gated_unit_layer',
-    'crop_layer',
-    'sub_nested_seq_layer',
-    'clip_layer',
-    'slice_projection',
-    'seq_slice_layer',
-    'kmax_seq_score_layer',
-    'img_pool3d_layer',
-    'scale_shift_layer',
-    'img_conv3d_layer',
-    'resize_layer',
-    'sub_seq_layer',
-    'scale_sub_region_layer',
-    'upsample_layer',
-    'factorization_machine',
-]
-
-
-class LayerType(object):
-    """
-    Layer type enumerations.
-    """
-
-    DATA = 'data'
-    MIXED_LAYER = 'mixed'
-    LSTMEMORY = 'lstmemory'
-    GRUMEMORY = 'gated_recurrent'
-    SEQUENCE_LAST_INSTANCE = 'seqlastins'
-    SEQUENCE_FIRST_INSTANCE = 'seqfirstins'
-    SEQUENCE_RESHAPE = 'seqreshape'
-    POOLING_MAX = 'max'
-    POOLING_AVG = 'average'
-    UPSAMPLE_LAYER = 'upsample'
-    FC_LAYER = 'fc'
-    COST = 'cost'
-    COSINE_SIM_VEC = 'cos_vm'
-    COSINE_SIM = 'cos'
-    L2_DISTANCE = 'l2_distance'
-    HSIGMOID = 'hsigmoid'
-    CONV_LAYER = 'conv'
-    CONVTRANS_LAYER = 'convt'
-    EXCONV_LAYER = 'exconv'
-    EXCONVTRANS_LAYER = 'exconvt'
-    CUDNNCONV_LAYER = 'cudnn_conv'
-    CUDNNCONVTRANS_LAYER = 'cudnn_convt'
-    POOL_LAYER = 'pool'
-    POOL3D_LAYER = 'pool3d'
-    BATCH_NORM_LAYER = 'batch_norm'
-    NORM_LAYER = 'norm'
-    SUM_TO_ONE_NORM_LAYER = 'sum_to_one_norm'
-    ROW_L2_NORM_LAYER = 'row_l2_norm'
-    ADDTO_LAYER = 'addto'
-
-    CONCAT_LAYER = 'concat'
-    CONCAT_PROJ_LAYER = 'concat2'
-    SEQUENCE_CONCAT_LAYER = 'seqconcat'
-
-    LSTM_STEP_LAYER = 'lstm_step'
-    GRU_STEP_LAYER = 'gru_step'
-    GET_OUTPUT_LAYER = 'get_output'
-
-    EXPAND_LAYER = 'expand'
-    INTERPOLATION_LAYER = 'interpolation'
-    BILINEAR_INTERP_LAYER = 'bilinear_interp'
-    POWER_LAYER = 'power'
-    SCALING_LAYER = 'scaling'
-    TRANS_LAYER = 'trans'
-    ROTATE_LAYER = 'rotate'
-    DOT_PROD_LAYER = 'dot_prod'
-    OUT_PROD_LAYER = 'out_prod'
-    FEATURE_MAP_EXPAND_LAYER = 'featmap_expand'
-
-    MEMORY = 'memory'
-    MAXID_LAYER = 'maxid'
-    EOSID_LAYER = 'eos_id'
-    RECURRENT_LAYER = 'recurrent'
-
-    CONV_SHIFT_LAYER = "conv_shift"
-    TENSOR_LAYER = "tensor"
-    SEL_FC_LAYER = "selective_fc"
-    SAMPLING_ID_LAYER = "sampling_id"
-    SLOPE_INTERCEPT_LAYER = "slope_intercept"
-    LINEAR_COMBINATION_LAYER = "convex_comb"
-    BLOCK_EXPAND = "blockexpand"
-    MAXOUT = "maxout"
-    SPP_LAYER = "spp"
-    PAD_LAYER = "pad"
-    MULTIPLEX_LAYER = "multiplex"
-    ROW_CONV_LAYER = "row_conv"
-
-    PRINT_LAYER = 'print'
-    PRIORBOX_LAYER = 'priorbox'
-    MULTIBOX_LOSS_LAYER = 'multibox_loss'
-    DETECTION_OUTPUT_LAYER = 'detection_output'
-    ROI_POOL_LAYER = 'roi_pool'
-
-    CTC_LAYER = 'ctc'
-    WARP_CTC_LAYER = 'warp_ctc'
-    CRF_LAYER = 'crf'
-    CRF_DECODING_LAYER = 'crf_decoding'
-    NCE_LAYER = 'nce'
-
-    CONV3D_LAYER = 'conv3d'
-    DECONV3D_LAYER = 'deconv3d'
-
-    RANK_COST = 'rank-cost'
-    LAMBDA_COST = 'lambda_cost'
-    HUBER_REGRESSION = 'huber_regression'
-    HUBER_CLASSIFICATION = 'huber_classification'
-    CROSS_ENTROPY = 'multi-class-cross-entropy'
-    CROSS_ENTROPY_WITH_SELFNORM = 'multi_class_cross_entropy_with_selfnorm'
-    CROSS_ENTROPY_OVER_BEAM = 'cross_entropy_over_beam'
-    SOFT_BIN_CLASS_CROSS_ENTROPY = 'soft_binary_class_cross_entropy'
-    MULTI_BIN_LABEL_CROSS_ENTROPY = 'multi_binary_label_cross_entropy'
-    SUM_COST = 'sum_cost'
-    SMOOTH_L1 = 'smooth_l1'
-
-    PRELU = 'prelu'
-    SWITCH_ORDER_LAYER = 'switch_order'
-    CROP_LAYER = 'crop'
-    SUB_NESTED_SEQ = 'sub_nested_seq'
-    CLIP_LAYER = 'clip'
-    SEQ_SLICE = 'seq_slice'
-
-    KMAX_SEQ_SCORE = 'kmax_seq_score'
-    SCALE_SHIFT_LAYER = 'scale_shift'
-
-    RESIZE = 'resize'
-    SUB_SEQ_LAYER = 'subseq'
-
-    SCALE_SUB_REGION_LAYER = 'scale_sub_region'
-
-    FACTORIZATION_MACHINE = 'factorization_machine'
-
-    @staticmethod
-    def is_layer_type(type_name):
-        """
-        Whether type_name is a layer type.
-
-        :param type_name: layer type name. Because layer type enumerations are
-                          strings.
-        :type type_name: basestring
-        :return: True if is a layer_type
-        :rtype: bool
-        """
-        for key in dir(LayerType):
-            if key.isupper():
-                att = getattr(LayerType, key)
-                if isinstance(att, basestring) and type_name == att:
-                    return True
-        return False
-
-
-class AggregateLevel(object):
-    """
-    PaddlePaddle supports three sequence types:
-
-    - :code:`SequenceType.NO_SEQUENCE` means the sample is not a sequence.
-    - :code:`SequenceType.SEQUENCE` means the sample is a sequence.
-    - :code:`SequenceType.SUB_SEQUENCE` means the sample is a nested sequence,
-      each timestep of which is also a sequence.
-
-    Accordingly, AggregateLevel supports two modes:
-
-    - :code:`AggregateLevel.TO_NO_SEQUENCE` means the aggregation acts on each
-      timestep of a sequence, both :code:`SUB_SEQUENCE` and :code:`SEQUENCE` will
-      be aggregated to :code:`NO_SEQUENCE`.
-
-    - :code:`AggregateLevel.TO_SEQUENCE` means the aggregation acts on each
-      sequence of a nested sequence, :code:`SUB_SEQUENCE` will be aggregated to
-      :code:`SEQUENCE`.
-    """
-    TO_NO_SEQUENCE = 'non-seq'
-    TO_SEQUENCE = 'seq'
-    # compatible with previous configuration
-    EACH_TIMESTEP = TO_NO_SEQUENCE
-    EACH_SEQUENCE = TO_SEQUENCE
-
-
-class LayerOutput(object):
-    """
-    LayerOutput is output for layer function. It is used internally by several
-    reasons.
-
-    - Check layer connection make sense.
-
-        - FC(Softmax) => Cost(MSE Error) is not good for example.
-
-    - Tracking layer connection.
-
-    - Pass to layer methods as input.
-
-    :param name: Layer output name.
-    :type name: basestring
-    :param layer_type: Current Layer Type. One of LayerType enumeration.
-    :type layer_type: basestring
-    :param activation: Layer Activation.
-    :type activation: BaseActivation.
-    :param parents: Layer's parents.
-    :type parents: list | tuple | collections.Sequence
-    """
-
-    def __init__(self,
-                 name,
-                 layer_type,
-                 parents=None,
-                 activation=None,
-                 num_filters=None,
-                 img_norm_type=None,
-                 size=None,
-                 outputs=None,
-                 reverse=None):
-        assert isinstance(name, basestring)
-        assert isinstance(layer_type, basestring)
-        assert size is not None
-        assert LayerType.is_layer_type(layer_type)
-        self.name = name
-        self.full_name = MakeLayerNameInSubmodel(name)
-        self.layer_type = layer_type
-        if parents is not None and type(parents) != list:
-            parents = [parents]
-        self.parents = [] if parents is None else parents
-        self.activation = activation
-        self.num_filters = num_filters
-        self.img_norm_type = img_norm_type
-        self.size = size
-        if outputs is None:
-            outputs = ['default']
-        self.outputs = outputs
-        self.reverse = reverse
-
-    @property
-    def width(self):
-        return cp.g_layer_map[self.full_name].width
-
-    @property
-    def height(self):
-        return cp.g_layer_map[self.full_name].height
-
-    @property
-    def depth(self):
-        return cp.g_layer_map[self.full_name].depth
-
-    def set_input(self, input):
-        """
-        Set the input for a memory layer. Can only be used for memory layer
-        """
-        assert isinstance(input, LayerOutput)
-        assert self.layer_type == LayerType.MEMORY
-        SetMemoryInput(self.name, input.name)
-
-
-ERROR_CLIPPING = 'error_clipping_threshold'
-DROPOUT = 'drop_rate'
-DEVICE = 'device'
-
-
-def layer_support(*attrs):
-    attrs_list = list(attrs)
-    attrs_list.append(DEVICE)
-
-    def decorator(method):
-        @functools.wraps(method)
-        def wrapper(*args, **kwargs):
-            for attr in attrs_list:
-                for each in args:
-                    if isinstance(each, ExtraLayerAttribute):
-                        setattr(each, '_'.join(['can', attr]), True)
-                for key in kwargs:
-                    val = kwargs[key]
-                    if isinstance(val, ExtraLayerAttribute):
-                        setattr(val, '_'.join(['can', attr]), True)
-            for each in args:
-                if isinstance(each, ExtraLayerAttribute):
-                    each.check(method.__name__)
-            for key in kwargs:
-                val = kwargs[key]
-                if isinstance(val, ExtraLayerAttribute):
-                    val.check(method.__name__)
-            return method(*args, **kwargs)
-
-        if hasattr(method, 'argspec'):
-            wrapper.argspec = method.argspec
-        else:
-            wrapper.argspec = inspect.getargspec(method)
-
-        return wrapper
-
-    return decorator
-
-
-@wrap_param_attr_default()
-def full_matrix_projection(input, size=0, param_attr=None):
-    """
-    Full Matrix Projection. It performs full matrix multiplication.
-
-    ..  math::
-        out.row[i] += in.row[i] * weight
-
-    There are two styles of usage.
-
-    1. When used in mixed_layer like this, you can only set the input:
-
-    .. code-block:: python
-
-       with mixed_layer(size=100) as m:
-           m += full_matrix_projection(input=layer)
-
-    2. When used as an independent object like this, you must set the size:
-
-    .. code-block:: python
-
-       proj = full_matrix_projection(input=layer,
-                                     size=100,
-                                     param_attr=ParamAttr(name='_proj'))
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param size: The dimension of this layer.
-    :type size: int
-    :param param_attr: The parameter attribute. See ParameterAttribute for details.
-    :type param_attr: ParameterAttribute
-    :return: FullMatrixProjection Object.
-    :rtype: FullMatrixProjection
-    """
-    proj = FullMatrixProjection(
-        input_layer_name=input.name, size=size, **param_attr.attr)
-    proj.origin = input
-    return proj
-
-
-@wrap_param_attr_default()
-def trans_full_matrix_projection(input, size=0, param_attr=None):
-    """
-    Different from full_matrix_projection, this projection performs matrix
-    multiplication, using the transpose of weight.
-
-    ..  math::
-        out.row[i] += in.row[i] * w^\mathrm{T}
-
-    :math:`w^\mathrm{T}` means the transpose of weight.
-    The simply usage is:
-
-    .. code-block:: python
-
-       proj = trans_full_matrix_projection(input=layer,
-                                           size=100,
-                                           param_attr=ParamAttr(
-                                                name='_proj',
-                                                initial_mean=0.0,
-                                                initial_std=0.01))
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param size: The parameter size. Means the width of parameter.
-    :type size: int
-    :param param_attr: The parameter attribute. See ParameterAttribute for details.
-    :type param_attr: ParameterAttribute
-    :return: TransposedFullMatrixProjection Object.
-    :rtype: TransposedFullMatrixProjection
-    """
-    proj = TransposedFullMatrixProjection(
-        input_layer_name=input.name, size=size, **param_attr.attr)
-    proj.origin = input
-    return proj
-
-
-@wrap_param_attr_default()
-def table_projection(input, size=0, param_attr=None):
-    """
-    Table Projection. It selects rows from parameter where row\_id
-    is in input\_ids.
-
-    .. math::
-       out.row[i] += table.row[ids[i]]
-
-    where :math:`out` is output, :math:`table` is parameter, :math:`ids` is input\_ids,
-    and :math:`i` is row\_id.
-
-    There are two styles of usage.
-
-    1. When used in mixed_layer like this, you can only set the input:
-
-    .. code-block:: python
-
-       with mixed_layer(size=100) as m:
-           m += table_projection(input=layer)
-
-    2. When used as an independent object like this, you must set the size:
-
-    .. code-block:: python
-
-       proj = table_projection(input=layer,
-                               size=100,
-                               param_attr=ParamAttr(name='_proj'))
-
-
-    :param input: The input of this layer, which must contains id fields.
-    :type input: LayerOutput
-    :param size: The dimension of the output.
-    :type size: int
-    :param param_attr: The parameter attribute. See ParameterAttribute for details.
-    :type param_attr: ParameterAttribute
-    :return: TableProjection Object.
-    :rtype: TableProjection
-    """
-    proj = TableProjection(
-        input_layer_name=input.name, size=size, **param_attr.attr)
-    proj.origin = input
-    return proj
-
-
-def identity_projection(input, offset=None, size=None):
-    """
-    1. If offset=None, it performs IdentityProjection as follows:
-
-    .. math::
-       out.row[i] += in.row[i]
-
-    The example usage is:
-
-    .. code-block:: python
-
-       proj = identity_projection(input=layer)
-
-
-    2. If offset!=None, It executes IdentityOffsetProjection and takes the
-       elements of the input in the range [offset, offset+size) as output.
-
-    .. math::
-       out.row[i] += in.row[i + \\textrm{offset}]
-
-    The example usage is:
-
-    .. code-block:: python
-
-       proj = identity_projection(input=layer,
-                                  offset=10)
-
-    Note that neither of the projections have trainable parameter.
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param offset: The offset from the start of the input. The input's
-                   elements in the range [offset, offset+size) will be
-                   taken as output. If this parameter is not set or set
-                   to None, the output will be the same as the input.
-    :type offset: int
-    :param size: The dimension of this layer. It will be neglected
-                 when offset is None or not set.
-    :type size: int
-    :return: IdentityProjection or IdentityOffsetProjection object
-    :rtype: IdentityProjection | IdentityOffsetProjection
-    """
-    if offset is None:
-        proj = IdentityProjection(input_layer_name=input.name)
-        proj.origin = input
-    else:
-        if size is None:
-            size = input.size - offset
-        proj = IdentityOffsetProjection(
-            input_layer_name=input.name, offset=offset, size=size)
-        proj.origin = input
-    return proj
-
-
-def slice_projection(input, slices):
-    """
-    slice_projection slices the input value into multiple parts,
-    then selects and merges some of them into a new output.
-
-    .. math::
-       output = [input.slices()]
-
-    The example usage is:
-
-    .. code-block:: python
-
-       proj = slice_projection(input=layer, slices=[(0, 10), (20, 30)])
-
-    Note that slice_projection has no trainable parameter.
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param slices: A list of start and end offsets of each slice.
-    :type slices: list of tuple
-    :return: SliceProjection object.
-    :rtype: SliceProjection
-    """
-    assert len(slices) >= 1
-    start = 0
-    for i in xrange(len(slices)):
-        assert len(slices[i]) == 2
-        # The start position of the next slice needs to be greater than
-        # or equal to the end position of the previous slice.
-        assert slices[i][0] >= start
-        assert slices[i][1] >= slices[i][0]
-        start = slices[i][1]
-    proj = SliceProjection(input_layer_name=input.name, slices=slices)
-    proj.origin = input
-    return proj
-
-
-@wrap_param_attr_default()
-def scaling_projection(input, param_attr=None):
-    """
-    scaling_projection multiplies the input with a scalar parameter.
-
-    .. math::
-       out += w * in
-
-    The example usage is:
-
-    .. code-block:: python
-
-       proj = scaling_projection(input=layer)
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param param_attr: The parameter attribute. See ParameterAttribute for details.
-    :type param_attr: ParameterAttribute
-    :return: ScalingProjection object.
-    :rtype: ScalingProjection
-    """
-    proj = ScalingProjection(input_layer_name=input.name, **param_attr.attr)
-    proj.origin = input
-    return proj
-
-
-@wrap_param_attr_default()
-def dotmul_projection(input, param_attr=None):
-    """
-    DotMulProjection takes a layer as input and performs
-    element-wise multiplication with weight.
-
-    ..  math::
-        out.row[i] += in.row[i] .* weight
-
-    where :math:`.*` means element-wise multiplication.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       proj = dotmul_projection(input=layer)
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param param_attr: The parameter attribute. See ParameterAttribute for details.
-    :type param_attr: ParameterAttribute
-    :return: DotMulProjection object.
-    :rtype: DotMulProjection
-    """
-    proj = DotMulProjection(
-        input_layer_name=input.name, size=input.size, **param_attr.attr)
-    proj.origin = input
-    return proj
-
-
-def dotmul_operator(a=None, b=None, scale=1, **kwargs):
-    """
-    DotMulOperator takes two inputs and performs element-wise multiplication:
-
-    .. math::
-       out.row[i] += scale * (a.row[i] .* b.row[i])
-
-    where :math:`.*` means element-wise multiplication, and
-    scale is a config scalar, its default value is 1.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       op = dotmul_operator(a=layer1, b=layer2, scale=0.5)
-
-    :param a: The first input of this layer.
-    :type a: LayerOutput
-    :param b: The second input of this layer.
-    :type b: LayerOutput
-    :param scale: A scalar to scale the product. Its default value is 1.
-    :type scale: float
-    :return: DotMulOperator object.
-    :rtype: DotMulOperator
-    """
-    if 'x' in kwargs or 'y' in kwargs:
-        logger.warning('x and y arguments for dotmul_operator is deprecated. '
-                       'Please use a and b as parameter.')
-    a = kwargs.get('x', a)  # For Backward capacity.
-    b = kwargs.get('y', b)
-    assert isinstance(a, LayerOutput)
-    assert isinstance(b, LayerOutput)
-    if a.size is not None and b.size is not None:
-        assert a.size == b.size
-
-    op = DotMulOperator(input_layer_names=[a.name, b.name], scale=scale)
-    op.origin = [a, b]
-    return op
-
-
-@wrap_bias_attr_default(['padding_attr'])
-def context_projection(input,
-                       context_len,
-                       context_start=None,
-                       padding_attr=False):
-    """
-    Context Projection.
-
-    It just reorganizes input sequence, combines "context_len" elements of the
-    sequence to one context from context_start. "context_start" will be set to
-    -(context_len - 1) / 2 by default. When context position is out of sequence
-    length, padding will be filled as zero if padding_attr = False, otherwise
-    it is trainable.
-
-    For example, origin sequence is [A B C D E F G], context len is 3, padding_attr
-    is not set, then after context projection, sequence will
-    be [ 0AB ABC BCD CDE DEF EFG FG0 ].
-
-    :param input: The input of this layer, which should be a sequence.
-    :type input: LayerOutput
-    :param context_len: The length of the context.
-    :type context_len: int
-    :param context_start: The start position of the context. The default value is
-                          -(context_len - 1)/2
-    :type context_start: int
-    :param padding_attr: Parameter attribute of the padding. If the parameter is
-                         set to False, padding will be zero. In other cases, the
-                         padding is trainable, and its parameter attribute is set
-                         by this parameter.
-    :type padding_attr: bool | ParameterAttribute
-    :return: Projection object.
-    :rtype: Projection
-    """
-    context_start = -(
-        context_len - 1) / 2 if context_start is None else context_start
-
-    extra_dict = dict()
-    trainable = isinstance(padding_attr, ParameterAttribute)
-    if trainable:
-        extra_dict = padding_attr.attr
-
-    proj = ContextProjection(
-        input_layer_name=input.name,
-        context_length=context_len,
-        context_start=context_start,
-        trainable_padding=trainable,
-        **extra_dict)
-    proj.origin = input
-    return proj
-
-
-class MixedLayerType(LayerOutput):
-    """
-    The internal object for trainer_helpers.
-    """
-
-    class AddToSealedMixedLayerException(Exception):
-        def __init__(self):
-            Exception.__init__(self)
-
-    def __init__(self, name, size, act, bias_attr, layer_attr, parents=None):
-        """
-        :param name: The name of this layer.
-        :type name: basestring
-        :param size: The dimension of this layer.
-        :type size: int
-        :param act: Activation type.
-        :type act: BaseActivation
-        :param bias_attr: The bias attribute. If the parameter is set to False or an object
-                          whose type is not ParameterAttribute, no bias is defined. If the
-                          parameter is set to True, the bias is initialized to zero.
-        :type bias_attr: ParameterAttribute | None | bool | Any
-        :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                           details.
-        :type layer_attr: ExtraLayerAttribute | None
-        """
-        LayerOutput.__init__(
-            self,
-            name,
-            LayerType.MIXED_LAYER,
-            parents,
-            size=size,
-            activation=act)
-        self.bias_attr = bias_attr
-        self.layer_attr = layer_attr
-        self.inputs = []
-        self.finalized = False
-
-    def __iadd__(self, other):
-        """
-        + += operator
-        :param other: Other projection.
-        :type other: Projection
-        :return: self.
-        :rtype: MixedLayerType
-        """
-        if not self.finalized:
-            assert isinstance(other, Projection) or isinstance(other, Operator)
-            self.inputs.append(other)
-            if isinstance(other, Projection):
-                self.parents.append(other.origin)
-            else:
-                self.parents.extend(other.origin)
-            return self
-        else:
-            raise MixedLayerType.AddToSealedMixedLayerException()
-
-    def __enter__(self):
-        assert len(self.inputs) == 0
-        return self
-
-    def __exit__(self, exc_type, exc_value, tb):
-        if exc_value is not None:
-            raise exc_value
-        assert len(self.inputs) != 0
-        ml = MixedLayer(
-            name=self.name,
-            size=self.size,
-            active_type=self.activation.name,
-            bias=ParamAttr.to_bias(self.bias_attr),
-            inputs=self.inputs,
-            **ExtraLayerAttribute.to_kwargs(self.layer_attr))
-        # update the size which might be computed inside MixedLayer
-        # according to the operator's output size
-        self.size = ml.config.size
-        self.finalized = True
-
-
-@wrap_name_default("mixed")
-@wrap_act_default(act=LinearActivation())
-@wrap_bias_attr_default(has_bias=False)
-@layer_support(ERROR_CLIPPING, DROPOUT)
-def mixed_layer(size=0,
-                input=None,
-                name=None,
-                act=None,
-                bias_attr=False,
-                layer_attr=None):
-    """
-    Mixed Layer. A mixed layer will add all inputs together, then activate the sum.
-    Each input is a projection or operator.
-
-    There are two styles of usages.
-
-    1. When the parameter input is not set, use mixed_layer like this:
-
-    .. code-block:: python
-
-       with mixed_layer(size=256) as m:
-           m += full_matrix_projection(input=layer1)
-           m += identity_projection(input=layer2)
-
-    2. You can also set all inputs when invoke mixed_layer as follows:
-
-    .. code-block:: python
-
-       m = mixed_layer(size=256,
-                       input=[full_matrix_projection(input=layer1),
-                              full_matrix_projection(input=layer2)])
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param size: The dimension of this layer.
-    :type size: int
-    :param input: The input of this layer. It is an optional parameter.
-    :param act: Activation Type. LinearActivation is the default activation.
-    :type act: BaseActivation
-    :param bias_attr: The bias attribute. If the parameter is set to False or an object
-                      whose type is not ParameterAttribute, no bias is defined. If the
-                      parameter is set to True, the bias is initialized to zero.
-    :type bias_attr: ParameterAttribute | None | bool | Any
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: MixedLayerType object.
-    :rtype: MixedLayerType
-    """
-
-    if input is None:
-        return MixedLayerType(name, size, act, bias_attr, layer_attr)
-    else:
-        with mixed_layer(
-                name=name,
-                size=size,
-                act=act,
-                bias_attr=bias_attr,
-                layer_attr=layer_attr) as m:
-            if isinstance(input, collections.Sequence):
-                for each in input:
-                    m += each
-            else:
-                m += input
-        return m
-
-
-@layer_support()
-def data_layer(name, size, depth=None, height=None, width=None,
-               layer_attr=None):
-    """
-    Define DataLayer For NeuralNetwork.
-
-    The example usage is:
-
-    ..  code-block:: python
-
-        data = data_layer(name="input", size=1000)
-
-    :param name: The name of this layer.
-    :type name: basestring
-    :param size: The dimension of this data layer.
-    :type size: int
-    :param height: The height of the input image data.
-    :type height: int | None
-    :param width: The width of the input image data.
-    :type width: int | None
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    Layer(
-        type=LayerType.DATA,
-        name=name,
-        size=size,
-        depth=depth,
-        height=height,
-        width=width,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-
-    if depth is None:
-        depth = 1
-    num_filters = None
-    if height is not None and width is not None:
-        num_filters = size / (width * height * depth)
-        assert num_filters * width * height * depth == size, \
-                "size=%s width=%s height=%s depth=%s" % (size, width, height, depth)
-
-    return LayerOutput(name, LayerType.DATA, size=size, num_filters=num_filters)
-
-
-@wrap_name_default("embedding")
-@wrap_param_attr_default()
-@layer_support(ERROR_CLIPPING, DROPOUT)
-def embedding_layer(input, size, name=None, param_attr=None, layer_attr=None):
-    """
-    Define a embedding Layer.
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input of this layer, whose type must be Index Data.
-    :type input: LayerOutput
-    :param size: The dimension of the embedding vector.
-    :type size: int
-    :param param_attr: The embedding parameter attribute. See ParameterAttribute
-                      for details.
-    :type param_attr: ParameterAttribute
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute | None
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    with mixed_layer(
-            name=name,
-            size=size,
-            act=LinearActivation(),
-            bias_attr=False,
-            layer_attr=layer_attr) as mix:
-        mix += table_projection(input=input, size=size, param_attr=param_attr)
-    return mix
-
-
-@wrap_name_default()
-@wrap_param_attr_default()
-@wrap_bias_attr_default()
-@wrap_act_default()
-@layer_support(ERROR_CLIPPING, DROPOUT)
-def fc_layer(input,
-             size,
-             act=None,
-             name=None,
-             param_attr=None,
-             bias_attr=None,
-             layer_attr=None):
-    """
-    The fully connected layer.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       fc = fc_layer(input=layer,
-                     size=1024,
-                     act=LinearActivation(),
-                     bias_attr=False)
-
-    which is equal to:
-
-    .. code-block:: python
-
-       with mixed_layer(size=1024) as fc:
-           fc += full_matrix_projection(input=layer)
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input of this layer.
-    :type input: LayerOutput | list | tuple
-    :param size: The dimension of this layer.
-    :type size: int
-    :param act: Activation Type. TanhActivation is the default activation.
-    :type act: BaseActivation
-    :param param_attr: The parameter attribute. See ParameterAttribute for details.
-    :type param_attr: ParameterAttribute
-    :param bias_attr: The bias attribute. If the parameter is set to False or an object
-                      whose type is not ParameterAttribute, no bias is defined. If the
-                      parameter is set to True, the bias is initialized to zero.
-    :type bias_attr: ParameterAttribute | None | bool | Any
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute | None
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    if isinstance(input, LayerOutput):
-        input = [input]
-        assert not isinstance(param_attr, collections.Sequence)
-        param_attr = [param_attr]
-    else:
-        if isinstance(param_attr, collections.Sequence):
-            assert len(input) == len(param_attr)
-        else:
-            if "parameter_name" in param_attr.attr and len(input) > 1:
-                logger.fatal(
-                    "When the name field of param_attr is manually specified "
-                    "and the input is a list, the param_attr should also be a "
-                    "list with each item being the param_attr for each input "
-                    "item. If only one named param_attr is provided, all the "
-                    "input items would share this parameter.")
-            param_attr = [copy.deepcopy(param_attr) for _ in range(len(input))]
-
-    assert isinstance(input, collections.Sequence)
-
-    Layer(
-        inputs=[
-            Input(ipt.name, **attr.attr) for ipt, attr in zip(input, param_attr)
-        ],
-        name=name,
-        type=LayerType.FC_LAYER,
-        size=size,
-        bias=ParamAttr.to_bias(bias_attr),
-        active_type=act.name,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name, LayerType.FC_LAYER, input, activation=act, size=size)
-
-
-@wrap_name_default("print")
-def printer_layer(input, format=None, name=None):
-    """
-    Print the output value of the layers specified by the parameter input.
-    This layer is useful for debugging.
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input of this layer.
-    :type input: LayerOutput | list | tuple
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    if isinstance(input, LayerOutput):
-        input = [input]
-    assert isinstance(input, collections.Sequence)  # list or tuple
-    for each in input:
-        assert isinstance(each, LayerOutput)
-
-    Layer(
-        name=name,
-        format=format,
-        type=LayerType.PRINT_LAYER,
-        inputs=[l.name for l in input], )
-    # this layer don't return anything, can not be input of other layer.
-
-# Keep print_layer for compatibility with V1 API.
-# 'print_layer' does not work for V2 API because it will be changed to
-# 'print' for V2 API. But 'print' is a reserved key word in python.
-
-
-print_layer = printer_layer
-
-
-@wrap_name_default("priorbox")
-def priorbox_layer(input,
-                   image,
-                   aspect_ratio,
-                   variance,
-                   min_size,
-                   max_size=[],
-                   name=None):
-    """
-    Compute the priorbox and set the variance. This layer is necessary for ssd.
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param image: The network input image.
-    :type image: LayerOutput
-    :param aspect_ratio: The aspect ratio.
-    :type aspect_ratio: list
-    :param variance: The bounding box variance.
-    :type min_size: The minimum size of the priorbox width/height.
-    :param min_size: list
-    :type max_size: The maximum size of the priorbox width/height. It could be NULL.
-    :param max_size: list
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    # plus one for ratio 1.
-    num_filters = (len(aspect_ratio) * 2 + 1 + len(max_size)) * 4
-    size = (input.size / input.num_filters) * num_filters * 2
-    Layer(
-        name=name,
-        type=LayerType.PRIORBOX_LAYER,
-        inputs=[input.name, image.name],
-        size=size,
-        min_size=min_size,
-        max_size=max_size,
-        aspect_ratio=aspect_ratio,
-        variance=variance)
-    return LayerOutput(
-        name,
-        LayerType.PRIORBOX_LAYER,
-        parents=[input, image],
-        num_filters=num_filters,
-        size=size)
-
-
-@wrap_name_default("multibox_loss")
-def multibox_loss_layer(input_loc,
-                        input_conf,
-                        priorbox,
-                        label,
-                        num_classes,
-                        overlap_threshold=0.5,
-                        neg_pos_ratio=3.0,
-                        neg_overlap=0.5,
-                        background_id=0,
-                        name=None):
-    """
-    Compute the location loss and the confidence loss for ssd.
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input_loc: The input predicted locations.
-    :type input_loc: LayerOutput | List of LayerOutput
-    :param input_conf: The input priorbox confidence.
-    :type input_conf: LayerOutput | List of LayerOutput
-    :param priorbox: The input priorbox location and the variance.
-    :type priorbox: LayerOutput
-    :param label: The input label.
-    :type label: LayerOutput
-    :param num_classes: The number of the classification.
-    :type num_classes: int
-    :param overlap_threshold: The threshold of the overlap.
-    :type overlap_threshold: float
-    :param neg_pos_ratio: The ratio of the negative bounding box to
-                          the positive bounding box.
-    :type neg_pos_ratio: float
-    :param neg_overlap: The negative bounding box overlap threshold.
-    :type neg_overlap: float
-    :param background_id: The background class index.
-    :type background_id: int
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    if isinstance(input_loc, LayerOutput):
-        input_loc = [input_loc]
-    assert isinstance(input_loc, collections.Sequence)  # list or tuple
-    for each in input_loc:
-        assert isinstance(each, LayerOutput)
-    input_loc_num = len(input_loc)
-
-    if isinstance(input_conf, LayerOutput):
-        input_conf = [input_conf]
-    assert isinstance(input_conf, collections.Sequence)  # list or tuple
-    for each in input_conf:
-        assert isinstance(each, LayerOutput)
-    input_conf_num = len(input_conf)
-    # Check the input layer number.
-    assert input_loc_num == input_conf_num
-
-    inputs = [priorbox.name, label.name]
-    inputs.extend([l.name for l in input_loc])
-    inputs.extend([l.name for l in input_conf])
-    parents = [priorbox, label]
-    parents.extend(input_loc)
-    parents.extend(input_conf)
-
-    Layer(
-        name=name,
-        type=LayerType.MULTIBOX_LOSS_LAYER,
-        inputs=inputs,
-        input_num=input_loc_num,
-        num_classes=num_classes,
-        overlap_threshold=overlap_threshold,
-        neg_pos_ratio=neg_pos_ratio,
-        neg_overlap=neg_overlap,
-        background_id=background_id)
-    return LayerOutput(
-        name, LayerType.MULTIBOX_LOSS_LAYER, parents=parents, size=1)
-
-
-@wrap_name_default("detection_output")
-def detection_output_layer(input_loc,
-                           input_conf,
-                           priorbox,
-                           num_classes,
-                           nms_threshold=0.45,
-                           nms_top_k=400,
-                           keep_top_k=200,
-                           confidence_threshold=0.01,
-                           background_id=0,
-                           name=None):
-    """
-    Apply the NMS to the output of network and compute the predict bounding
-    box location. The output's shape of this layer could be zero if there is
-    no valid bounding box.
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input_loc: The input predict locations.
-    :type input_loc: LayerOutput | List of LayerOutput.
-    :param input_conf: The input priorbox confidence.
-    :type input_conf: LayerOutput | List of LayerOutput.
-    :param priorbox: The input priorbox location and the variance.
-    :type priorbox: LayerOutput
-    :param num_classes: The number of the classes.
-    :type num_classes: int
-    :param nms_threshold: The Non-maximum suppression threshold.
-    :type nms_threshold: float
-    :param nms_top_k: The bounding boxes number kept of the NMS's output.
-    :type nms_top_k: int
-    :param keep_top_k: The bounding boxes number kept of the layer's output.
-    :type keep_top_k: int
-    :param confidence_threshold: The classification confidence threshold.
-    :type confidence_threshold: float
-    :param background_id: The background class index.
-    :type background_id: int
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    if isinstance(input_loc, LayerOutput):
-        input_loc = [input_loc]
-    assert isinstance(input_loc, collections.Sequence)  # list or tuple
-    for each in input_loc:
-        assert isinstance(each, LayerOutput)
-    input_loc_num = len(input_loc)
-
-    if isinstance(input_conf, LayerOutput):
-        input_conf = [input_conf]
-    assert isinstance(input_conf, collections.Sequence)  # list or tuple
-    for each in input_conf:
-        assert isinstance(each, LayerOutput)
-    input_conf_num = len(input_conf)
-
-    # Check the input layer number.
-    assert input_loc_num == input_conf_num
-
-    inputs = [priorbox.name]
-    inputs.extend([l.name for l in input_loc])
-    inputs.extend([l.name for l in input_conf])
-    parents = [priorbox]
-    parents.extend(input_loc)
-    parents.extend(input_conf)
-
-    size = keep_top_k * 7
-
-    Layer(
-        name=name,
-        type=LayerType.DETECTION_OUTPUT_LAYER,
-        inputs=inputs,
-        size=size,
-        input_num=input_loc_num,
-        num_classes=num_classes,
-        nms_threshold=nms_threshold,
-        nms_top_k=nms_top_k,
-        keep_top_k=keep_top_k,
-        confidence_threshold=confidence_threshold,
-        background_id=background_id)
-    return LayerOutput(
-        name, LayerType.DETECTION_OUTPUT_LAYER, parents=parents, size=size)
-
-
-@wrap_name_default("roi_pool")
-def roi_pool_layer(input,
-                   rois,
-                   pooled_width,
-                   pooled_height,
-                   spatial_scale,
-                   num_channels=None,
-                   name=None):
-    """
-    A layer used by Fast R-CNN to extract feature maps of ROIs from the last
-    feature map.
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input layer.
-    :type input: LayerOutput.
-    :param rois: The input ROIs' data.
-    :type rois: LayerOutput.
-    :param pooled_width: The width after pooling.
-    :type pooled_width: int
-    :param pooled_height: The height after pooling.
-    :type pooled_height: int
-    :param spatial_scale: The spatial scale between the image and feature map.
-    :type spatial_scale: float
-    :param num_channels: The number of the input channels.
-    :type num_channels: int
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    if num_channels is None:
-        assert input.num_filters is not None
-        num_channels = input.num_filters
-    size = num_channels * pooled_width * pooled_height
-    Layer(
-        name=name,
-        type=LayerType.ROI_POOL_LAYER,
-        inputs=[input.name, rois.name],
-        pooled_width=pooled_width,
-        pooled_height=pooled_height,
-        spatial_scale=spatial_scale,
-        num_channels=num_channels)
-    return LayerOutput(
-        name, LayerType.ROI_POOL_LAYER, parents=[input, rois], size=size)
-
-
-@wrap_name_default("cross_channel_norm")
-def cross_channel_norm_layer(input, name=None, param_attr=None):
-    """
-    Normalize a layer's output. This layer is necessary for ssd. This
-    layer applys normalization across the channels of each sample to
-    a convolutional layer's output and scales the output by a group of
-    trainable factors whose dimensions equal to the channel's number.
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param param_attr: The parameter attribute. See ParameterAttribute for details.
-    :type param_attr: ParameterAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert input.num_filters is not None
-    Layer(
-        name=name,
-        type=LayerType.NORM_LAYER,
-        inputs=[
-            Input(
-                input.name,
-                norm=Norm(
-                    norm_type="cross-channel-norm",
-                    channels=input.num_filters,
-                    size=input.size,
-                    scale=0,
-                    pow=0,
-                    blocked=0),
-                **param_attr.attr)
-        ])
-    return LayerOutput(
-        name,
-        LayerType.NORM_LAYER,
-        parents=input,
-        num_filters=input.num_filters,
-        size=input.size)
-
-
-@wrap_name_default("seq_pooling")
-@wrap_bias_attr_default(has_bias=False)
-@wrap_param_default(['pooling_type'], default_factory=lambda _: MaxPooling())
-@layer_support()
-def pooling_layer(input,
-                  pooling_type=None,
-                  name=None,
-                  bias_attr=None,
-                  agg_level=AggregateLevel.TO_NO_SEQUENCE,
-                  stride=-1,
-                  layer_attr=None):
-    """
-    Pooling layer for sequence inputs, not used for Image.
-
-    If stride > 0, this layer slides a window whose size is determined by stride,
-    and returns the pooling value of the sequence in the window as the output. Thus,
-    a long sequence will be shortened. Note that for sequence with sub-sequence, the
-    default value of stride is -1.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       seq_pool = pooling_layer(input=layer,
-                                pooling_type=AvgPooling(),
-                                agg_level=AggregateLevel.TO_NO_SEQUENCE)
-
-    :param agg_level: AggregateLevel.TO_NO_SEQUENCE or
-                      AggregateLevel.TO_SEQUENCE
-    :type agg_level: AggregateLevel
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param pooling_type: Type of pooling. MaxPooling is the default pooling.
-    :type pooling_type: BasePoolingType | None
-    :param stride: The step size between successive pooling regions.
-    :type stride: int
-    :param bias_attr: The bias attribute. If the parameter is set to False or an object
-                      whose type is not ParameterAttribute, no bias is defined. If the
-                      parameter is set to True, the bias is initialized to zero.
-    :type bias_attr: ParameterAttribute | None | bool | Any
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute | None
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    extra_dict = dict()
-    # noinspection PyUnresolvedReferences
-    if isinstance(pooling_type, AvgPooling):
-        extra_dict['average_strategy'] = pooling_type.strategy
-    elif isinstance(pooling_type, MaxPooling) and \
-                    pooling_type.output_max_index is not None:
-        assert isinstance(pooling_type.output_max_index, bool)
-        extra_dict['output_max_index'] = pooling_type.output_max_index
-    extra_dict.update(ExtraLayerAttribute.to_kwargs(layer_attr))
-
-    if agg_level == AggregateLevel.TO_SEQUENCE:
-        assert stride == -1
-
-    Layer(
-        name=name,
-        type=pooling_type.name,
-        inputs=[Input(input.name)],
-        bias=ParamAttr.to_bias(bias_attr),
-        trans_type=agg_level,
-        stride=stride,
-        **extra_dict)
-
-    return LayerOutput(
-        name, pooling_type.name, parents=[input], size=input.size)
-
-
-@wrap_bias_attr_default()
-@wrap_param_attr_default()
-@wrap_act_default(param_names=['gate_act'], act=SigmoidActivation())
-@wrap_act_default(param_names=["act", 'state_act'], act=TanhActivation())
-@wrap_name_default("lstmemory")
-@layer_support()
-def lstmemory(input,
-              name=None,
-              size=None,
-              reverse=False,
-              act=None,
-              gate_act=None,
-              state_act=None,
-              bias_attr=None,
-              param_attr=None,
-              layer_attr=None):
-    """
-    Long Short-term Memory Cell.
-
-    The memory cell was implemented as follow equations.
-
-    ..  math::
-
-        i_t & = \\sigma(W_{xi}x_{t} + W_{hi}h_{t-1} + W_{ci}c_{t-1} + b_i)
-
-        f_t & = \\sigma(W_{xf}x_{t} + W_{hf}h_{t-1} + W_{cf}c_{t-1} + b_f)
-
-        c_t & = f_tc_{t-1} + i_t tanh (W_{xc}x_t+W_{hc}h_{t-1} + b_c)
-
-        o_t & = \\sigma(W_{xo}x_{t} + W_{ho}h_{t-1} + W_{co}c_t + b_o)
-
-        h_t & = o_t tanh(c_t)
-
-
-    NOTE: In PaddlePaddle's implementation, the multiplications
-    :math:`W_{xi}x_{t}` , :math:`W_{xf}x_{t}`,
-    :math:`W_{xc}x_t`, :math:`W_{xo}x_{t}` are not done in the lstmemory layer,
-    so an additional mixed_layer with full_matrix_projection or a fc_layer must
-    be included in the configuration file to complete the input-to-hidden
-    mappings before lstmemory is called.
-
-    NOTE: This is a low level user interface. You can use network.simple_lstm
-    to config a simple plain lstm layer.
-
-    Reference:
-        `Generating Sequences With Recurrent Neural Networks
-        <https://arxiv.org/pdf/1308.0850.pdf>`_
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param size: DEPRECATED. The dimension of the lstm cell.
-    :type size: int
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param reverse: Whether the input sequence is processed in a reverse order.
-    :type reverse: bool
-    :param act: Activation type. TanhActivation is the default activation.
-    :type act: BaseActivation
-    :param gate_act: Activation type of this layer's gates. SigmoidActivation is the
-                     default activation.
-    :type gate_act: BaseActivation
-    :param state_act: Activation type of the state. TanhActivation is the default activation.
-    :type state_act: BaseActivation
-    :param bias_attr: The bias attribute. If the parameter is set to False or an object
-                      whose type is not ParameterAttribute, no bias is defined. If the
-                      parameter is set to True, the bias is initialized to zero.
-    :type bias_attr: ParameterAttribute | None | bool | Any
-    :param param_attr: The parameter attribute. See ParameterAttribute for details.
-    :type param_attr: ParameterAttribute
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute | None
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-
-    assert gate_act.support_hppl
-    assert state_act.support_hppl
-    assert act.support_hppl
-    assert input.size is not None and input.size % 4 == 0
-
-    if size is not None:
-        if input.size / 4 == size:
-            plog = logger.warning
-        else:
-            plog = logger.fatal
-        plog("size of lstmemory layer: %s is automatically set to "
-             "size of input layer / 4. The parameter size passing to "
-             "this layer is ignored." % (name))
-
-    Layer(
-        name=name,
-        type=LayerType.LSTMEMORY,
-        active_type=act.name,
-        active_state_type=state_act.name,
-        active_gate_type=gate_act.name,
-        reversed=reverse,
-        bias=ParamAttr.to_bias(bias_attr),
-        inputs=[Input(input.name, **param_attr.attr)],
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-
-    return LayerOutput(
-        name,
-        LayerType.LSTMEMORY, [input],
-        size=input.size / 4,
-        reverse=reverse)
-
-
-@wrap_bias_attr_default()
-@wrap_param_attr_default()
-@wrap_act_default(param_names=['gate_act'], act=SigmoidActivation())
-@wrap_act_default(param_names=["act"], act=TanhActivation())
-@wrap_name_default("gru")
-@layer_support()
-def grumemory(input,
-              size=None,
-              name=None,
-              reverse=False,
-              act=None,
-              gate_act=None,
-              bias_attr=None,
-              param_attr=None,
-              layer_attr=None):
-    """
-    Gate Recurrent Unit Layer.
-
-    The memory cell was implemented as follow equations.
-
-    1. update gate :math:`z`: defines how much of the previous memory to
-    keep around or the unit updates its activations. The update gate
-    is computed by:
-
-    ..  math::
-
-        z_t = \\sigma(W_{z}x_{t} + U_{z}h_{t-1} + b_z)
-
-    2. reset gate :math:`r`: determines how to combine the new input with the
-    previous memory. The reset gate is computed similarly to the update gate:
-
-    ..  math::
-
-        r_t = \\sigma(W_{r}x_{t} + U_{r}h_{t-1} + b_r)
-
-    3. The candidate activation :math:`\\tilde{h_t}` is computed similarly to
-    that of the traditional recurrent unit:
-
-    ..  math::
-
-        {\\tilde{h_t}} = tanh(W x_{t} + U (r_{t} \odot h_{t-1}) + b)
-
-    4. The hidden activation :math:`h_t` of the GRU at time t is a linear
-    interpolation between the previous activation :math:`h_{t-1}` and the
-    candidate activation :math:`\\tilde{h_t}`:
-
-    ..  math::
-
-        h_t = (1 - z_t) h_{t-1} + z_t {\\tilde{h_t}}
-
-    NOTE: In PaddlePaddle's implementation, the multiplication operations
-    :math:`W_{r}x_{t}`, :math:`W_{z}x_{t}` and :math:`W x_t` are not performed
-    in gate_recurrent layer. Consequently, an additional mixed_layer with
-    full_matrix_projection or a fc_layer must be included before grumemory
-    is called.
-
-    Reference:
-        `Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling
-        <https://arxiv.org/abs/1412.3555>`_
-
-    The simple usage is:
-
-    .. code-block:: python
-
-       gru = grumemory(input)
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input of this layer.
-    :type input: LayerOutput.
-    :param size: DEPRECATED. The dimension of the gru cell.
-    :type size: int
-    :param reverse: Whether the input sequence is processed in a reverse order.
-    :type reverse: bool
-    :param act: Activation type, TanhActivation is the default. This activation
-                affects the :math:`{\\tilde{h_t}}`.
-    :type act: BaseActivation
-    :param gate_act: Activation type of this layer's two gates. SigmoidActivation is
-                     the default activation. This activation affects the :math:`z_t`
-                     and :math:`r_t`. It is the :math:`\\sigma` in the above formula.
-    :type gate_act: BaseActivation
-    :param bias_attr: The bias attribute. If the parameter is set to False or an object
-                      whose type is not ParameterAttribute, no bias is defined. If the
-                      parameter is set to True, the bias is initialized to zero.
-    :type bias_attr: ParameterAttribute | None | bool | Any
-    :param param_attr: The parameter attribute. See ParameterAttribute for details.
-    :type param_attr: ParameterAttribute
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute | None
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert act.support_hppl
-    assert gate_act.support_hppl
-    assert input.size is not None and input.size % 3 == 0
-    if size is not None:
-        if input.size / 3 == size:
-            plog = logger.warning
-        else:
-            plog = logger.fatal
-        plog("size of grumemory layer: %s is automatically set to "
-             "size of input layer / 3. The parameter size passing to this "
-             "layer is ignored." % (name))
-
-    Layer(
-        name=name,
-        type=LayerType.GRUMEMORY,
-        active_type=act.name,
-        active_gate_type=gate_act.name,
-        reversed=reverse,
-        bias=ParamAttr.to_bias(bias_attr),
-        inputs=[Input(input.name, **param_attr.attr)],
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-
-    return LayerOutput(
-        name,
-        LayerType.GRUMEMORY, [input],
-        size=input.size / 3,
-        reverse=reverse)
-
-
-@wrap_name_default()
-@layer_support()
-def last_seq(input,
-             name=None,
-             agg_level=AggregateLevel.TO_NO_SEQUENCE,
-             stride=-1,
-             layer_attr=None):
-    """
-    Get Last Timestamp Activation of a sequence.
-
-    If stride > 0, this layer will slide a window whose size is determined by stride,
-    and return the last value of the sequence in the window as the output. Thus, a
-    long sequence will be shortened. Note that for sequence with sub-sequence, the
-    default value of stride is -1.
-
-    The simple usage is:
-
-    .. code-block:: python
-
-       seq = last_seq(input=layer)
-
-    :param agg_level: Aggregated level
-    :type agg_level: AggregateLevel
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param stride: The step size between successive pooling regions.
-    :type stride: int
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    if input.reverse is not None and input.reverse:
-        logger.warning("You are getting the last instance of a sequence that"
-                       " is a output of a REVERSED layer. There is no time"
-                       " series information at all. Maybe you want to use"
-                       " first_seq instead.")
-
-    if agg_level == AggregateLevel.TO_SEQUENCE:
-        assert stride == -1
-
-    Layer(
-        name=name,
-        type=LayerType.SEQUENCE_LAST_INSTANCE,
-        inputs=[input.name],
-        trans_type=agg_level,
-        stride=stride,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name,
-        LayerType.SEQUENCE_LAST_INSTANCE,
-        parents=[input],
-        size=input.size)
-
-
-@wrap_name_default()
-@layer_support()
-def first_seq(input,
-              name=None,
-              agg_level=AggregateLevel.TO_NO_SEQUENCE,
-              stride=-1,
-              layer_attr=None):
-    """
-    Get First Timestamp Activation of a sequence.
-
-    If stride > 0, this layer will slide a window whose size is determined by stride,
-    and return the first value of the sequence in the window as the output. Thus, a
-    long sequence will be shortened. Note that for sequence with sub-sequence, the
-    default value of stride is -1.
-
-    The simple usage is:
-
-    .. code-block:: python
-
-       seq = first_seq(input=layer)
-
-    :param agg_level: aggregation level
-    :type agg_level: AggregateLevel
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param stride: The step size between successive pooling regions.
-    :type stride: int
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute.
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-
-    if input.reverse is not None and not input.reverse:
-        logger.warning('You are getting the first instance for a time series,'
-                       ' and it is a normal recurrent layer output. There is no'
-                       ' time series information at all. Maybe you want to use'
-                       ' last_seq instead.')
-
-    if agg_level == AggregateLevel.TO_SEQUENCE:
-        assert stride == -1
-
-    Layer(
-        name=name,
-        type=LayerType.SEQUENCE_FIRST_INSTANCE,
-        inputs=[input.name],
-        trans_type=agg_level,
-        stride=stride,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name,
-        LayerType.SEQUENCE_FIRST_INSTANCE,
-        parents=[input],
-        size=input.size)
-
-
-class ExpandLevel(object):
-    """
-    Please refer to AggregateLevel first.
-
-    ExpandLevel supports two modes:
-
-    - :code:`ExpandLevel.FROM_NO_SEQUENCE` means the expansion acts on
-      :code:`NO_SEQUENCE`, which will be expanded to
-      :code:`SEQUENCE` or :code:`SUB_SEQUENCE`.
-
-    - :code:`ExpandLevel.FROM_SEQUENCE` means the expansion acts on
-      :code:`SEQUENCE`, which will be expanded to
-      :code:`SUB_SEQUENCE`.
-    """
-    FROM_NO_SEQUENCE = AggregateLevel.TO_NO_SEQUENCE
-    FROM_SEQUENCE = AggregateLevel.TO_SEQUENCE
-    # compatible with previous configuration
-    FROM_TIMESTEP = FROM_NO_SEQUENCE
-
-
-@wrap_name_default()
-@layer_support()
-def expand_layer(input,
-                 expand_as,
-                 name=None,
-                 bias_attr=False,
-                 expand_level=ExpandLevel.FROM_NO_SEQUENCE,
-                 layer_attr=None):
-    """
-    A layer for expanding dense data or (sequence data where the length of each
-    sequence is one) to sequence data.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       expand = expand_layer(input=layer1,
-                             expand_as=layer2,
-                             expand_level=ExpandLevel.FROM_NO_SEQUENCE)
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param expand_as: Expand the input according to this layer's sequence infomation. And
-                      after the operation, the input expanded will have the same number of
-                      elememts as this layer.
-    :type expand_as: LayerOutput
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param bias_attr: The bias attribute. If the parameter is set to False or an object
-                      whose type is not ParameterAttribute, no bias is defined. If the
-                      parameter is set to True, the bias is initialized to zero.
-    :type bias_attr: ParameterAttribute | None | bool | Any
-    :param expand_level: Whether the input layer is a sequence or the element of a sequence.
-    :type expand_level: ExpandLevel
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute.
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-
-    Layer(
-        inputs=[input.name, expand_as.name],
-        name=name,
-        bias=ParamAttr.to_bias(bias_attr=bias_attr),
-        type=LayerType.EXPAND_LAYER,
-        trans_type=expand_level,
-        **ExtraAttr.to_kwargs(layer_attr))
-    return LayerOutput(
-        name=name,
-        size=input.size,
-        layer_type=LayerType.EXPAND_LAYER,
-        parents=[input, expand_as])
-
-
-@wrap_name_default()
-@wrap_act_default(act=IdentityActivation())
-@layer_support()
-def repeat_layer(input,
-                 num_repeats,
-                 as_row_vector=True,
-                 act=None,
-                 name=None,
-                 layer_attr=None):
-    """
-    A layer for repeating the input for num_repeats times.
-
-    If as_row_vector:
-
-    .. math::
-       y  = [x_1,\cdots, x_n, \cdots, x_1, \cdots, x_n]
-
-    If not as_row_vector:
-
-    .. math::
-       y  = [x_1,\cdots, x_1, \cdots, x_n, \cdots, x_n]
-
-
-    The example usage is:
-
-    .. code-block:: python
-
-       expand = repeat_layer(input=layer, num_repeats=4)
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param num_repeats: The times of repeating the input.
-    :type num_repeats: int
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param as_row_vector: Whether to treat the input as row vectors or not. If
-                          the parameter is set to True, the repeating operation
-                          will be performed in the column direction. Otherwise,
-                          it will be performed in the row direction.
-    :type as_row_vector: bool
-    :param act: Activation type. IdentityActivation is the default activation.
-    :type act: BaseActivation
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute.
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-
-    l = Layer(
-        inputs=[input.name],
-        name=name,
-        active_type=act.name,
-        num_filters=num_repeats,
-        as_row_vector=as_row_vector,
-        type=LayerType.FEATURE_MAP_EXPAND_LAYER,
-        **ExtraAttr.to_kwargs(layer_attr))
-    return LayerOutput(
-        name=name,
-        size=l.config.size,
-        layer_type=LayerType.FEATURE_MAP_EXPAND_LAYER,
-        activation=act,
-        parents=[input])
-
-
-@wrap_name_default("seqreshape")
-@wrap_act_default(act=IdentityActivation())
-@wrap_bias_attr_default(has_bias=False)
-@layer_support(ERROR_CLIPPING, DROPOUT)
-def seq_reshape_layer(input,
-                      reshape_size,
-                      act=None,
-                      name=None,
-                      layer_attr=None,
-                      bias_attr=None):
-    """
-    A layer for reshaping the sequence. Assume the input sequence has T instances,
-    the dimension of each instance is M, and the input reshape_size is N, then the
-    output sequence has T*M/N instances, the dimension of each instance is N.
-
-    Note that T*M/N must be an integer.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       reshape = seq_reshape_layer(input=layer, reshape_size=4)
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param reshape_size: The dimension of the reshaped sequence.
-    :type reshape_size: int
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param act: Activation type. IdentityActivation is the default activation.
-    :type act: BaseActivation
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute.
-    :param bias_attr: The bias attribute. If the parameter is set to False or an object
-                      whose type is not ParameterAttribute, no bias is defined. If the
-                      parameter is set to True, the bias is initialized to zero.
-    :type bias_attr: ParameterAttribute | None | bool | Any
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-
-    Layer(
-        inputs=[input.name],
-        name=name,
-        size=reshape_size,
-        type=LayerType.SEQUENCE_RESHAPE,
-        bias=ParamAttr.to_bias(bias_attr),
-        **ExtraAttr.to_kwargs(layer_attr))
-    return LayerOutput(
-        name=name,
-        size=reshape_size,
-        layer_type=LayerType.SEQUENCE_RESHAPE,
-        parents=[input])
-
-
-@wrap_name_default()
-@layer_support()
-def interpolation_layer(input, weight, name=None, layer_attr=None):
-    """
-    This layer performs linear interpolation on two inputs,
-    which is used in NEURAL TURING MACHINE.
-
-    .. math::
-       y.row[i] = w[i] * x_1.row[i] + (1 - w[i]) * x_2.row[i]
-
-    where :math:`x_1` and :math:`x_2` are two (batchSize x dataDim) inputs,
-    :math:`w` is (batchSize x 1) weight vector, and :math:`y` is
-    (batchSize x dataDim) output.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       interpolation = interpolation_layer(input=[layer1, layer2], weight=layer3)
-
-    :param input: The input of this layer.
-    :type input: list | tuple
-    :param weight: Weight layer.
-    :type weight: LayerOutput
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute.
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert isinstance(input, collections.Sequence)
-    assert len(input) == 2
-    assert isinstance(input[0], LayerOutput) and isinstance(input[1],
-                                                            LayerOutput)
-    if input[0].size is not None and input[1].size is not None:
-        assert input[0].size == input[1].size
-    assert isinstance(weight, LayerOutput)
-    if weight.size is not None:
-        assert weight.size == 1
-    Layer(
-        name=name,
-        type=LayerType.INTERPOLATION_LAYER,
-        inputs=[weight.name, input[0].name, input[1].name],
-        **ExtraAttr.to_kwargs(layer_attr))
-    return LayerOutput(
-        name,
-        LayerType.INTERPOLATION_LAYER,
-        parents=[weight, input[0], input[1]],
-        size=input[0].size)
-
-
-@wrap_name_default()
-@layer_support()
-def bilinear_interp_layer(input,
-                          out_size_x=None,
-                          out_size_y=None,
-                          name=None,
-                          layer_attr=None):
-    """
-    This layer implements bilinear interpolation on convolutional layer's output.
-
-    Please refer to Wikipedia: https://en.wikipedia.org/wiki/Bilinear_interpolation
-
-    The simple usage is:
-
-    .. code-block:: python
-
-       bilinear = bilinear_interp_layer(input=layer1, out_size_x=64, out_size_y=64)
-
-    :param input: The input of this layer.
-    :type input: LayerOutput.
-    :param out_size_x: The width of the output.
-    :type out_size_x: int
-    :param out_size_y: The height of the output.
-    :type out_size_y: int
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert input.layer_type == LayerType.CONV_LAYER
-    assert isinstance(input.activation, LinearActivation)
-    assert out_size_x > 0 and out_size_y > 0
-    assert input.num_filters is not None
-    num_channels = input.num_filters
-    l = Layer(
-        name=name,
-        inputs=Input(
-            input.name,
-            bilinear_interp=BilinearInterp(
-                out_size_x=out_size_x,
-                out_size_y=out_size_y,
-                channels=num_channels)),
-        type=LayerType.BILINEAR_INTERP_LAYER,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name,
-        LayerType.BILINEAR_INTERP_LAYER,
-        parents=[input],
-        num_filters=num_channels,
-        size=l.config.size)
-
-
-@wrap_name_default()
-@layer_support()
-def power_layer(input, weight, name=None, layer_attr=None):
-    """
-    This layer applies a power function to a vector element-wise,
-    which is used in NEURAL TURING MACHINE.
-
-    .. math::
-       y = x^w
-
-    where :math:`x` is an input vector, :math:`w` is a scalar exponent,
-    and :math:`y` is an output vector.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       power = power_layer(input=layer1, weight=layer2)
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param weight: The exponent of the power.
-    :type weight: LayerOutput
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute.
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert isinstance(input, LayerOutput) and isinstance(weight, LayerOutput)
-    if weight.size is not None:
-        assert weight.size == 1
-    Layer(
-        name=name,
-        type=LayerType.POWER_LAYER,
-        inputs=[weight.name, input.name],
-        **ExtraAttr.to_kwargs(layer_attr))
-    return LayerOutput(
-        name, LayerType.POWER_LAYER, parents=[input, weight], size=input.size)
-
-
-@wrap_name_default()
-@layer_support()
-def scaling_layer(input, weight, name=None, layer_attr=None):
-    """
-    A layer for multiplying input vector by weight scalar.
-
-    .. math::
-       y  = w x
-
-    where :math:`x` is size=dataDim input, :math:`w` is size=1 weight,
-    and :math:`y` is size=dataDim output.
-
-    Note that the above computation is for one sample. Multiple samples are
-    processed in one batch.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       scale = scaling_layer(input=layer1, weight=layer2)
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param weight: The weight of each sample.
-    :type weight: LayerOutput
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute.
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert isinstance(weight, LayerOutput) and isinstance(input, LayerOutput)
-    if weight.size is not None:
-        assert weight.size == 1
-    Layer(
-        name=name,
-        type=LayerType.SCALING_LAYER,
-        inputs=[weight.name, input.name],
-        **ExtraAttr.to_kwargs(layer_attr))
-    return LayerOutput(
-        name, LayerType.SCALING_LAYER, parents=[weight, input], size=input.size)
-
-
-@wrap_name_default()
-@layer_support()
-def trans_layer(input, name=None, layer_attr=None):
-    """
-    A layer for transposing a minibatch matrix.
-
-    .. math::
-       y = x^\mathrm{T}
-
-    where :math:`x` is (M x N) input, and :math:`y` is (N x M) output.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       trans = trans_layer(input=layer)
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute.
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    Layer(
-        name=name,
-        type=LayerType.TRANS_LAYER,
-        inputs=[input.name],
-        **ExtraAttr.to_kwargs(layer_attr))
-    return LayerOutput(
-        name, LayerType.TRANS_LAYER, parents=[input], size=input.size)
-
-
-@wrap_name_default()
-@layer_support()
-def rotate_layer(input, height, width, name=None, layer_attr=None):
-    """
-    A layer for rotating 90 degrees (clock-wise) for each feature channel,
-    usually used when the input sample is some image or feature map.
-
-    .. math::
-       y(j,i,:) = x(M-i-1,j,:)
-
-    where :math:`x` is (M x N x C) input, and :math:`y` is (N x M x C) output.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       rot = rotate_layer(input=layer,
-                          height=100,
-                          width=100)
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param height: The height of the sample matrix.
-    :type height: int
-    :param width: The width of the sample matrix.
-    :type width: int
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute.
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert isinstance(input, LayerOutput)
-    l = Layer(
-        name=name,
-        height=height,
-        width=width,
-        type=LayerType.ROTATE_LAYER,
-        inputs=[input.name],
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name=name,
-        layer_type=LayerType.ROTATE_LAYER,
-        parents=[input],
-        size=l.config.size)
-
-
-@wrap_name_default()
-@layer_support()
-def cos_sim(a, b, scale=1, size=1, name=None, layer_attr=None):
-    """
-    Cosine Similarity Layer. The cosine similarity equation is here.
-
-    ..  math::
-        similarity = cos(\\theta) = {\\mathbf{a} \\cdot \\mathbf{b}
-        \\over \\|\\mathbf{a}\\| \\|\\mathbf{b}\\|}
-
-    The size of a is M, size of b is M*N,
-    Similarity will be calculated N times by step M. The output size is
-    N. The scale will be multiplied to similarity.
-
-    Note that the above computation is for one sample. Multiple samples are
-    processed in one batch.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       cos = cos_sim(a=layer1, b=layer2, size=3)
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param a: The first input of this layer.
-    :type a: LayerOutput
-    :param b: The second input of this layer.
-    :type b: LayerOutput
-    :param scale: The scale of the cosine similarity. 1 is the default value.
-    :type scale: float
-    :param size: The dimension of this layer. NOTE size_a * size should equal size_b.
-    :type size: int
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert isinstance(a, LayerOutput) and isinstance(b, LayerOutput)
-    if size == 1:
-        Layer(
-            name=name,
-            type=LayerType.COSINE_SIM,
-            cos_scale=scale,
-            inputs=[a.name, b.name],
-            **ExtraLayerAttribute.to_kwargs(layer_attr))
-    else:
-        if a.size is not None and b.size is not None:
-            assert size == b.size / a.size
-        Layer(
-            name=name,
-            type=LayerType.COSINE_SIM_VEC,
-            size=size,
-            cos_scale=scale,
-            inputs=[a.name, b.name],
-            **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(name, LayerType.COSINE_SIM, parents=[a, b], size=size)
-
-
-@wrap_name_default()
-@layer_support()
-def l2_distance_layer(x, y, name=None, layer_attr=None):
-    """
-    This layer calculates and returns the Euclidean distance between two input
-    vectors x and y. The equation is as follows:
-
-    ..  math::
-        l2_distance(\\mathbf{x}, \\mathbf{y}) = \\sqrt{\\sum_{i=1}^D(x_i - y_i)}
-
-    The output size of this layer is fixed to be 1. Note that the above
-    computation is for one sample. Multiple samples are processed in one batch.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       l2_sim = l2_distance(x=layer1, y=layer2)
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param x: The first input x for this layer, whose output is a matrix with
-              dimensionality N x D. N is the sample number in a mini-batch.
-              D is the dimensionality of x's output.
-    :type x: LayerOutput
-    :param y: The second input y for this layer, whose output is a matrix with
-              dimensionality N x D. N is the sample number in a mini-batch.
-              D is the dimensionality of y's output.
-    :type y: LayerOutput
-    :param layer_attr: The extra layer attributes, for example, drop rate.
-                       See ExtraLayerAttribute for more details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: The returned LayerOutput object.
-    :rtype: LayerOutput
-    """
-
-    assert isinstance(x, LayerOutput) and isinstance(y, LayerOutput)
-    Layer(
-        name=name,
-        type=LayerType.L2_DISTANCE,
-        inputs=[x.name, y.name],
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(name, LayerType.L2_DISTANCE, parents=[x, y], size=1)
-
-
-@wrap_name_default()
-@wrap_bias_attr_default(has_bias=True)
-@wrap_param_attr_default()
-@layer_support()
-def hsigmoid(input,
-             label,
-             num_classes=None,
-             name=None,
-             bias_attr=None,
-             param_attr=None,
-             layer_attr=None):
-    """
-    Organize the classes into a binary tree. At each node, a sigmoid function
-    is used to calculate the probability of belonging to the right branch.
-
-    Reference:
-        `Hierarchical Probabilistic Neural Network Language Model
-        <http://www.gatsby.ucl.ac.uk/aistats/fullpapers/208.pdf>`_
-
-    The example usage is:
-
-    ..  code-block:: python
-
-        cost = hsigmoid(input=[layer1, layer2],
-                        label=data_layer)
-
-    :param input: The input of this layer.
-    :type input: LayerOutput | list | tuple
-    :param label: The input label.
-    :type label: LayerOutput
-    :param num_classes: The number of classes. And it should be larger than 2. If the parameter
-                        is not set or set to None, its actual value will be automatically set to
-                        the number of labels.
-    :type num_classes: int
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param bias_attr: The bias attribute. If the parameter is set to False or an object
-                      whose type is not ParameterAttribute, no bias is defined. If the
-                      parameter is set to True, the bias is initialized to zero.
-    :type bias_attr: ParameterAttribute | None | bool | Any
-    :param param_attr: The parameter attribute. See ParameterAttribute for details.
-    :type param_attr: ParameterAttribute
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    if isinstance(input, LayerOutput):
-        input = [input]
-        if not isinstance(param_attr, collections.Sequence):
-            param_attr = [param_attr]
-    else:
-        if not isinstance(param_attr, collections.Sequence):
-            param_attr = [param_attr] * len(input)
-        else:
-            assert len(param_attr) == len(input)
-
-    assert isinstance(input, collections.Sequence)
-    assert isinstance(label, LayerOutput)
-    assert label.layer_type == LayerType.DATA
-
-    if num_classes is None:
-        num_classes = label.size
-    if num_classes is None or num_classes <= 2:
-        raise ValueError("hsigmoid label size must larger than 2.")
-
-    ipts_for_layer = []
-    parents = []
-    for each_input, each_param_attr in zip(input, param_attr):
-        assert isinstance(each_input, LayerOutput)
-        ipts_for_layer.append(Input(each_input.name, **each_param_attr.attr))
-        parents.append(each_input)
-    ipts_for_layer.append(label.name)
-    parents.append(label)
-
-    l = Layer(
-        name=name,
-        type=LayerType.HSIGMOID,
-        num_classes=num_classes,
-        bias=ParamAttr.to_bias(bias_attr),
-        inputs=ipts_for_layer,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name, LayerType.HSIGMOID, parents=parents, size=l.config.size)
-
-
-@wrap_name_default("conv")
-@wrap_param_attr_default()
-@wrap_bias_attr_default()
-@wrap_act_default(act=ReluActivation())
-@layer_support(DROPOUT)
-def img_conv_layer(input,
-                   filter_size,
-                   num_filters,
-                   name=None,
-                   num_channels=None,
-                   act=None,
-                   groups=1,
-                   stride=1,
-                   padding=0,
-                   dilation=1,
-                   bias_attr=None,
-                   param_attr=None,
-                   shared_biases=True,
-                   layer_attr=None,
-                   filter_size_y=None,
-                   stride_y=None,
-                   padding_y=None,
-                   dilation_y=None,
-                   trans=False,
-                   layer_type=None):
-    """
-    Convolution layer for image. Paddle can support both square and non-square
-    input currently.
-
-    The details of convolution layer, please refer UFLDL's `convolution
-    <http://ufldl.stanford.edu/tutorial/supervised/
-    FeatureExtractionUsingConvolution/>`_ .
-
-    Convolution Transpose (deconv) layer for image. Paddle can support both square
-    and non-square input currently.
-
-    The details of convolution transpose layer,
-    please refer to the following explanation and references therein
-    <http://datascience.stackexchange.com/questions/6107/
-    what-are-deconvolutional-layers/>`_ .
-    The num_channel means input image's channel number. It may be 1 or 3 when
-    input is raw pixels of image(mono or RGB), or it may be the previous layer's
-    num_filters.
-
-    There are several groups of filters in PaddlePaddle implementation.
-    If the groups attribute is greater than 1, for example groups=2,
-    the input will be splitted into 2 parts along the channel axis, and
-    the filters will also be splitted into 2 parts. The first half of the filters 
-    is only connected to the first half of the input channels, while the second 
-    half of the filters is only connected to the second half of the input. After
-    the computation of convolution for each part of input,
-    the output will be obtained by concatenating the two results.
-
-    The details of grouped convolution, please refer to:
-    `ImageNet Classification With Deep Convolutional Neural Networks
-    <http://www.cs.toronto.edu/~kriz/imagenet_classification_with_deep_convolutional.pdf>`_
-    
-    The example usage is:
-
-    ..  code-block:: python
-
-        conv = img_conv_layer(input=data, filter_size=1, filter_size_y=1,
-                              num_channels=8,
-                              num_filters=16, stride=1,
-                              bias_attr=False,
-                              act=ReluActivation())
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param filter_size: The dimensions of the filter kernel. If the parameter is
-                        set to one integer, the two dimensions on x and y axises
-                        will be same when filter_size_y is not set. If it is set
-                        to a list, the first element indicates the dimension on
-                        the x axis, and the second is used to specify the dimension
-                        on the y axis when filter_size_y is not provided.
-    :type filter_size: int | tuple | list
-    :param filter_size_y: The dimension of the filter kernel on the y axis. If the parameter
-                          is not set, it will be set automatically according to filter_size.
-    :type filter_size_y: int
-    :param num_filters: The number of filters. It is as same as the output image channel.
-    :type num_filters: int
-    :param act: Activation type. ReluActivation is the default activation.
-    :type act: BaseActivation
-    :param groups: The group number. 1 is the default group number.
-    :type groups: int
-    :param stride: The strides. If the parameter is set to one integer, the strides
-                   on x and y axises will be same when stride_y is not set. If it is
-                   set to a list, the first element indicates the stride on the x axis,
-                   and the second is used to specify the stride on the y axis when
-                   stride_y is not provided. 1 is the default value.
-    :type stride: int | tuple | list
-    :param stride_y: The stride on the y axis.
-    :type stride_y: int
-    :param padding: The padding sizes. If the parameter is set to one integer, the padding
-                    sizes on x and y axises will be same when padding_y is not set. If it
-                    is set to a list, the first element indicates the padding size on the
-                    x axis, and the second is used to specify the padding size on the y axis
-                    when padding_y is not provided. 0 is the default padding size.
-    :type padding: int | tuple | list
-    :param padding_y: The padding size on the y axis.
-    :type padding_y: int
-    :param dilation: The dimensions of the dilation. If the parameter is set to one integer,
-                     the two dimensions on x and y axises will be same when dilation_y is not
-                     set. If it is set to a list, the first element indicates the dimension
-                     on the x axis, and the second is used to specify the dimension on the y
-                     axis when dilation_y is not provided. 1 is the default dimension.
-    :type dilation: int | tuple | list
-    :param dilation_y: The dimension of the dilation on the y axis.
-    :type dilation_y: int
-    :param bias_attr: The bias attribute. If the parameter is set to False or an object
-                      whose type is not ParameterAttribute, no bias is defined. If the
-                      parameter is set to True, the bias is initialized to zero.
-    :type bias_attr: ParameterAttribute | None | bool | Any
-    :param num_channels: The number of input channels. If the parameter is not set or
-                         set to None, its actual value will be automatically set to
-                         the channel number of the input.
-    :type num_channels: int
-    :param param_attr: The parameter attribute. See ParameterAttribute for
-                       details.
-    :type param_attr: ParameterAttribute
-    :param shared_biases: Whether biases will be shared between filters or not.
-    :type shared_biases: bool
-    :param layer_attr: The extra layer attributes. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :param trans: True if it is a convTransLayer, False if it is a convLayer
-    :type trans: bool
-    :param layer_type: Specify the layer type. If the dilation's dimension on one axis is
-                       larger than 1, layer_type has to be "cudnn_conv" or "cudnn_convt".
-                       If trans=True, layer_type has to be "exconvt" or "cudnn_convt",
-                       otherwise layer_type has to be either "exconv" or "cudnn_conv".
-    :type layer_type: basestring
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    if num_channels is None:
-        assert input.num_filters is not None
-        num_channels = input.num_filters
-
-    if filter_size_y is None:
-        if isinstance(filter_size, collections.Sequence):
-            assert len(filter_size) == 2
-            filter_size, filter_size_y = filter_size
-        else:
-            filter_size_y = filter_size
-
-    if stride_y is None:
-        if isinstance(stride, collections.Sequence):
-            assert len(stride) == 2
-            stride, stride_y = stride
-        else:
-            stride_y = stride
-
-    if padding_y is None:
-        if isinstance(padding, collections.Sequence):
-            assert len(padding) == 2
-            padding, padding_y = padding
-        else:
-            padding_y = padding
-
-    if dilation_y is None:
-        if isinstance(dilation, collections.Sequence):
-            assert len(dilation) == 2
-            dilation, dilation_y = dilation
-        else:
-            dilation_y = dilation
-
-    if param_attr.attr.get('initial_smart'):
-        # special initial for conv layers.
-        init_w = (2.0 / (filter_size**2 * num_channels))**0.5
-        param_attr.attr["initial_mean"] = 0.0
-        param_attr.attr["initial_std"] = init_w
-        param_attr.attr["initial_strategy"] = 0
-        param_attr.attr["initial_smart"] = False
-
-    if layer_type:
-        if dilation > 1 or dilation_y > 1:
-            assert layer_type in [
-                "cudnn_conv", "cudnn_convt", "exconv", "exconvt"
-            ]
-        if trans:
-            assert layer_type in ["exconvt", "cudnn_convt"]
-        else:
-            assert layer_type in ["exconv", "cudnn_conv"]
-        lt = layer_type
-    else:
-        lt = LayerType.CONVTRANS_LAYER if trans else LayerType.CONV_LAYER
-
-    l = Layer(
-        name=name,
-        inputs=Input(
-            input.name,
-            conv=Conv(
-                filter_size=filter_size,
-                padding=padding,
-                dilation=dilation,
-                stride=stride,
-                channels=num_channels,
-                groups=groups,
-                filter_size_y=filter_size_y,
-                padding_y=padding_y,
-                dilation_y=dilation_y,
-                stride_y=stride_y),
-            **param_attr.attr),
-        active_type=act.name,
-        num_filters=num_filters,
-        bias=ParamAttr.to_bias(bias_attr),
-        shared_biases=shared_biases,
-        type=lt,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name,
-        lt,
-        parents=[input],
-        activation=act,
-        num_filters=num_filters,
-        size=l.config.size)
-
-
-@wrap_name_default("pool")
-@layer_support()
-def img_pool_layer(input,
-                   pool_size,
-                   name=None,
-                   num_channels=None,
-                   pool_type=None,
-                   stride=1,
-                   padding=0,
-                   layer_attr=None,
-                   pool_size_y=None,
-                   stride_y=None,
-                   padding_y=None,
-                   ceil_mode=True,
-                   exclude_mode=None):
-    """
-    Image pooling Layer.
-
-    The details of pooling layer, please refer to ufldl's pooling_ .
-
-    .. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/
-
-    - ceil_mode=True:
-
-    ..  math::
-
-        w & = 1 + ceil(\\frac{input\_width + 2 * padding - pool\_size}{stride})
-
-        h & = 1 + ceil(\\frac{input\_height + 2 * padding\_y - pool\_size\_y}{stride\_y})
-
-    - ceil_mode=False:
-
-    ..  math::
-
-        w & = 1 + floor(\\frac{input\_width + 2 * padding - pool\_size}{stride})
-
-        h & = 1 + floor(\\frac{input\_height + 2 * padding\_y - pool\_size\_y}{stride\_y})
-
-    The example usage is:
-
-    ..  code-block:: python
-
-        maxpool = img_pool_layer(input=conv,
-                                 pool_size=3,
-                                 pool_size_y=5,
-                                 num_channels=8,
-                                 stride=1,
-                                 stride_y=2,
-                                 padding=1,
-                                 padding_y=2,
-                                 pool_type=MaxPooling())
-
-    :param padding: The padding size on the x axis. 0 is the default padding size.
-    :type padding: int
-    :param padding_y: The padding size on the y axis. If the parameter is not set
-                      or set to None, it will be set to 'padding' automatically.
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param pool_size: The pooling window length on the x axis.
-    :type pool_size: int
-    :param pool_size_y: The pooling window length on the y axis. If the parameter is
-                        not set or set to None, its actual value will be automatically
-                        set to pool_size.
-    :type pool_size_y: int
-    :param num_channels: The number of input channels. If the parameter is not set or
-                         set to None, its actual value will be automatically set to
-                         the channels number of the input.
-    :type num_channels: int
-    :param pool_type: Pooling type. MaxPooling is the default pooling.
-    :type pool_type: BasePoolingType
-    :param stride: The stride on the x axis. 1 is the default value.
-    :type stride: int
-    :param stride_y: The stride on the y axis. If the parameter is not set or set to
-                     None, its actual value will be automatically set to 'stride'.
-    :type stride_y: int
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :param ceil_mode: Whether to use the ceil function to calculate output height and width.
-                      True is the default. If it is set to False, the floor function will
-                      be used.
-    :type ceil_mode: bool
-    :param exclude_mode: Whether to exclude the padding cells when calculating, but only 
-                         work when pool_type is AvgPooling. If None, also exclude the padding 
-                         cells. If use cudnn, use CudnnAvgPooling or CudnnAvgInclPadPooling 
-                         as pool_type to identify the mode.
-    :type exclude_mode: bool
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    if num_channels is None:
-        assert input.num_filters is not None
-        num_channels = input.num_filters
-
-    if pool_type is None:
-        pool_type = MaxPooling()
-    elif isinstance(pool_type, AvgPooling):
-        pool_type.name = 'avg'
-
-    assert type(pool_type) in [AvgPooling, MaxPooling, MaxWithMaskPooling, CudnnAvgPooling,
-                               CudnnMaxPooling, CudnnAvgInclPadPooling], \
-        "only (Cudnn)AvgPooling, (Cudnn)MaxPooling, MaxWithMaskPooling are supported"
-
-    type_name = pool_type.name + '-projection' \
-        if (
-        isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \
-        else pool_type.name
-    pool_size_y = pool_size if pool_size_y is None else pool_size_y
-    stride_y = stride if stride_y is None else stride_y
-    padding_y = padding if padding_y is None else padding_y
-
-    l = Layer(
-        name=name,
-        type=LayerType.POOL_LAYER,
-        inputs=[
-            Input(
-                input.name,
-                pool=Pool(
-                    pool_type=type_name,
-                    channels=num_channels,
-                    size_x=pool_size,
-                    start=None,
-                    stride=stride,
-                    padding=padding,
-                    size_y=pool_size_y,
-                    stride_y=stride_y,
-                    padding_y=padding_y))
-        ],
-        ceil_mode=ceil_mode,
-        exclude_mode=exclude_mode,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name,
-        LayerType.POOL_LAYER,
-        parents=[input],
-        num_filters=num_channels,
-        size=l.config.size)
-
-
-@wrap_name_default("pool3d")
-@layer_support()
-def img_pool3d_layer(input,
-                     pool_size,
-                     name=None,
-                     num_channels=None,
-                     pool_type=None,
-                     stride=1,
-                     padding=0,
-                     layer_attr=None,
-                     pool_size_y=None,
-                     stride_y=None,
-                     padding_y=None,
-                     pool_size_z=None,
-                     stride_z=None,
-                     padding_z=None,
-                     ceil_mode=True):
-    """
-    Image pooling Layer.
-
-    The details of pooling layer, please refer ufldl's pooling_ .
-
-    .. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/
-
-    - ceil_mode=True:
-
-    ..  math::
-
-        w & = 1 + \\frac{ceil(input\_width + 2 * padding - pool\_size)}{stride}
-
-        h & = 1 + \\frac{ceil(input\_height + 2 * padding\_y - pool\_size\_y)}{stride\_y}
-
-        d & = 1 + \\frac{ceil(input\_depth + 2 * padding\_z - pool\_size\_z)}{stride\_z}
-
-    - ceil_mode=False:
-
-    ..  math::
-
-        w & = 1 + \\frac{floor(input\_width + 2 * padding - pool\_size)}{stride}
-
-        h & = 1 + \\frac{floor(input\_height + 2 * padding\_y - pool\_size\_y)}{stride\_y}
-
-        d & = 1 + \\frac{floor(input\_depth + 2 * padding\_z - pool\_size\_z)}{stride\_z}
-
-    The example usage is:
-
-    ..  code-block:: python
-
-        maxpool = img_pool3d_layer(input=conv,
-                                 pool_size=3,
-                                 num_channels=8,
-                                 stride=1,
-                                 padding=1,
-                                 pool_type=MaxPooling())
-
-    :param padding: pooling padding width.
-    :type padding: int | tuple | list
-    :param name: The name of this layer. It is optional.
-    :type name: basestring.
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param pool_size: The pooling window lengths along three axises. If the parameter
-                      is set to one integer, the three lengths will be same.
-    :type pool_size: int | tuple | list
-    :param num_channels: The number of input channels. If the parameter is not set or
-                         set to None, its actual value will be automatically set to
-                         the channels number of the input.
-    :type num_channels: int
-    :param pool_type: Pooling type. MaxPooling is the default pooling.
-    :type pool_type: BasePoolingType
-    :param stride: The strides of the pooling along three axises. If the parameter
-                   is set to one integer, the three strides will be same. 1 is the
-                   default value.
-    :type stride: int | tuple | list
-    :param padding: The sizes of padding along three axises. If the parameter is set to
-                    one integer, they will be same. 0 is the default padding size.
-    :type padding: int | tuple | list
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :param ceil_mode: Wether to use the ceil function to calculate output height and width.
-                      True is the default. If it is set to False, the floor function will
-                      be used.
-    :type ceil_mode: bool
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    if num_channels is None:
-        assert input.num_filters is not None
-        num_channels = input.num_filters
-
-    if pool_type is None:
-        pool_type = MaxPooling()
-    elif isinstance(pool_type, AvgPooling):
-        pool_type.name = 'avg'
-
-    type_name = pool_type.name + '-projection' \
-        if (
-        isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \
-        else pool_type.name
-
-    if isinstance(pool_size, collections.Sequence):
-        assert len(pool_size) == 3
-        pool_size, pool_size_y, pool_size_z = pool_size
-    else:
-        pool_size_y = pool_size
-        pool_size_z = pool_size
-
-    if isinstance(stride, collections.Sequence):
-        assert len(stride) == 3
-        stride, stride_y, stride_z = stride
-    else:
-        stride_y = stride
-        stride_z = stride
-
-    if isinstance(padding, collections.Sequence):
-        assert len(padding) == 3
-        padding, padding_y, padding_y = padding
-    else:
-        padding_y = padding
-        padding_z = padding
-
-    l = Layer(
-        name=name,
-        type=LayerType.POOL3D_LAYER,
-        inputs=[
-            Input(
-                input.name,
-                pool=Pool3d(
-                    pool_type=type_name,
-                    channels=num_channels,
-                    size_x=pool_size,
-                    start=None,
-                    stride=stride,
-                    padding=padding,
-                    size_y=pool_size_y,
-                    stride_y=stride_y,
-                    padding_y=padding_y,
-                    size_z=pool_size_z,
-                    stride_z=stride_z,
-                    padding_z=padding_z))
-        ],
-        ceil_mode=ceil_mode,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name,
-        LayerType.POOL_LAYER,
-        parents=[input],
-        num_filters=num_channels,
-        size=l.config.size)
-
-
-@wrap_name_default("upsample")
-@layer_support()
-def upsample_layer(input,
-                   name=None,
-                   scale=None,
-                   scale_y=None,
-                   upsample_size=None,
-                   upsample_size_y=None,
-                   pad_out_x=False,
-                   pad_out_y=False,
-                   layer_attr=None):
-    """
-    The DePooling process.
-    Inputs should be a list of length 2. The first input is a layer,
-    and the second input should be the MaxWithMaskPoolingLayer
-
-    The example usage is:
-
-    ..  code-block:: python
-        pool1 = paddle.v2.layer.img_pool(input=input, pool_size=2, stride=2,
-                                        pool_type=paddle.pooling.MaxWithMask())
-        upsample = paddle.v2.layer.upsample(input=[layer1, pool1])
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: contains an input layer and a MaxWithMaskPoolingLayer
-    :type input: list | tuple | collections.Sequence
-    :param scale: outputSize =  scale * inputSize
-    :type scale: int | list | tuple | .
-    :param scale_y: scale_y will be equal to scale, if it's value is None, 
-    :type scale: int | None. 
-    :param upsample_size: specify the outputSize.
-    :type upsample_size: int | list | tuple.
-    :param upsample_size_y: specify the y dimension outputSize.
-    :type upsample_size_y: int.
-    :param pad_out_x: specify exact x dimension size. This parameter only works when scale is 2
-    :type pad_out_x: bool.
-    :param pad_out_y: specify exact y dimension size. This parameter only works when scale is 2
-    :type pad_out_y: bool.
-    :param layer_attr: Extra Layer Attribute.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-
-    assert (scale is not None) or (upsample_size is not None), \
-            'scale or upsample_size, there must be one to be designated'
-
-    assert len(input) == 2, 'layer input size must be 2'
-
-    assert input[1].layer_type == LayerType.POOL_LAYER, \
-            'the second input should be the MaxPoolWithMaskLayer'
-
-    scale_y = scale \
-            if scale is not None else scale_y
-    upsample_size_y = upsample_size  \
-            if upsample_size is not None else upsample_size_y
-
-    layer_type = LayerType.UPSAMPLE_LAYER
-
-    layer = Layer(
-        name=name,
-        type=layer_type,
-        inputs=[
-            Input(
-                input[0].name,
-                upsample=Upsample(scale, scale_y, pad_out_x, pad_out_y,
-                                  upsample_size, upsample_size_y)),
-            Input(input[1].name)
-        ],
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-
-    sz = layer.config.size
-
-    return LayerOutput(name, layer_type=layer_type, parents=input, size=sz)
-
-
-@wrap_name_default("spp")
-@layer_support()
-def spp_layer(input,
-              name=None,
-              num_channels=None,
-              pool_type=None,
-              pyramid_height=None,
-              layer_attr=None):
-    """
-    A layer performs spatial pyramid pooling.
-
-    Reference:
-        `Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition
-        <https://arxiv.org/abs/1406.4729>`_
-
-    The example usage is:
-
-    ..  code-block:: python
-
-        spp = spp_layer(input=data,
-                        pyramid_height=2,
-                        num_channels=16,
-                        pool_type=MaxPooling())
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param num_channels: The number of input channels. If the parameter is not set or
-                         set to None, its actual value will be automatically set to
-                         the channels number of the input.
-    :type num_channels: int
-    :param pool_type: Pooling type. MaxPooling is the default pooling.
-    :type scale: BasePoolingType
-    :param pyramid_height: The pyramid height of this pooling.
-    :type pyramid_height: int
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    if num_channels is None:
-        assert input.num_filters is not None
-        num_channels = input.num_filters
-
-    if pool_type is None:
-        pool_type = MaxPooling()
-    elif isinstance(pool_type, AvgPooling):
-        pool_type.name = 'avg'
-
-    type_name = pool_type.name
-    if (isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)):
-        type_name += '-projection'
-
-    l = Layer(
-        name=name,
-        type=LayerType.SPP_LAYER,
-        inputs=Input(
-            input.name,
-            spp=SpatialPyramidPool(
-                pool_type=type_name,
-                channels=num_channels,
-                pyramid_height=pyramid_height)),
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name,
-        layer_type=LayerType.SPP_LAYER,
-        parents=[input],
-        num_filters=num_channels,
-        size=l.config.size)
-
-
-def __img_norm_layer__(name, input, size, norm_type, scale, power, num_channels,
-                       blocked, layer_attr):
-    if num_channels is None:
-        assert input.num_filters is not None
-        num_channels = input.num_filters
-
-    l = Layer(
-        name=name,
-        type=LayerType.NORM_LAYER,
-        inputs=Input(
-            input.name,
-            norm=Norm(
-                norm_type=norm_type,
-                channels=num_channels,
-                size=size,
-                scale=scale,
-                pow=power,
-                blocked=blocked)),
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name,
-        layer_type=LayerType.NORM_LAYER,
-        parents=[input],
-        num_filters=num_channels,
-        img_norm_type=norm_type,
-        size=l.config.size)
-
-
-@wrap_name_default("crmnorm")
-@layer_support()
-def img_cmrnorm_layer(input,
-                      size,
-                      scale=0.0128,
-                      power=0.75,
-                      name=None,
-                      num_channels=None,
-                      layer_attr=None):
-    """
-    Response normalization across feature maps.
-
-    Reference:
-        `ImageNet Classification with Deep Convolutional Neural Networks
-        <http://www.cs.toronto.edu/~fritz/absps/imagenet.pdf>`_
-
-    The example usage is:
-
-    ..  code-block:: python
-
-        norm = img_cmrnorm_layer(input=net, size=5)
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param size: Normalize in number of :math:`size` feature maps.
-    :type size: int
-    :param scale: The hyper-parameter.
-    :type scale: float
-    :param power: The hyper-parameter.
-    :type power: float
-    :param num_channels: The number of input channels. If the parameter is not set or
-                         set to None, its actual value will be automatically set to
-                         the channels number of the input.
-    :param layer_attr: The extra layer attributes. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    return __img_norm_layer__(name, input, size, "cmrnorm-projection", scale,
-                              power, num_channels, 0, layer_attr)
-
-
-@wrap_bias_attr_default()
-@wrap_param_attr_default(
-    default_factory=lambda _: ParamAttr(initial_mean=1.0, initial_std=0.))
-@wrap_act_default(act=ReluActivation())
-@wrap_name_default("batch_norm")
-@layer_support(DROPOUT, ERROR_CLIPPING)
-def batch_norm_layer(input,
-                     act=None,
-                     name=None,
-                     img3D=False,
-                     num_channels=None,
-                     bias_attr=None,
-                     param_attr=None,
-                     layer_attr=None,
-                     batch_norm_type=None,
-                     epsilon=1e-5,
-                     moving_average_fraction=0.9,
-                     use_global_stats=None,
-                     mean_var_names=None):
-    """
-    Batch Normalization Layer. The notation of this layer is as follows.
-
-    :math:`x` is the input features over a mini-batch.
-
-    ..  math::
-
-        \\mu_{\\beta} &\\gets \\frac{1}{m} \\sum_{i=1}^{m} x_i \\qquad &//\\
-        \ mini-batch\ mean \\\\
-        \\sigma_{\\beta}^{2} &\\gets \\frac{1}{m} \\sum_{i=1}^{m}(x_i - \\
-        \\mu_{\\beta})^2 \\qquad &//\ mini-batch\ variance \\\\
-        \\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\
-        \\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\
-        y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift
-
-    Reference:
-        `Batch Normalization: Accelerating Deep Network Training by Reducing
-        Internal Covariate Shift
-        <http://arxiv.org/abs/1502.03167>`_
-
-    The example usage is:
-
-    ..  code-block:: python
-
-        norm = batch_norm_layer(input=net, act=ReluActivation())
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: This layer's input which is to be performed batch normalization on.
-    :type input: LayerOutput
-    :param batch_norm_type: We have batch_norm, mkldnn_batch_norm and cudnn_batch_norm.
-                            batch_norm supports CPU, MKLDNN and GPU. cudnn_batch_norm
-                            requires cuDNN version greater or equal to v4 (>=v4).
-                            But cudnn_batch_norm is faster and needs less
-                            memory than batch_norm. mkldnn_batch_norm requires
-                            use_mkldnn is enabled. By default (None), we will
-                            automatically select cudnn_batch_norm for GPU,
-                            mkldnn_batch_norm for MKLDNN and batch_norm for CPU.
-                            Users can specify the batch norm type. If you use
-                            cudnn_batch_norm, we suggested you use latest version,
-                            such as v5.1.
-    :type batch_norm_type: None | string, None or "batch_norm" or "cudnn_batch_norm"
-                           or "mkldnn_batch_norm"
-    :param act: Activation type. ReluActivation is the default activation.
-    :type act: BaseActivation
-    :param num_channels: The number of input channels. If the parameter is not set or
-                         set to None, its actual value will be automatically set to
-                         the channels number of the input.
-    :type num_channels: int
-    :param bias_attr: :math:`\\beta`. The bias attribute. If the parameter is set to
-                      False or an object whose type is not ParameterAttribute, no
-                      bias is defined. If the parameter is set to True, the bias is
-                      initialized to zero.
-    :type bias_attr: ParameterAttribute | None | bool | Any
-    :param param_attr: :math:`\\gamma`. The parameter attribute. See ParameterAttribute
-                       for details.
-    :type param_attr: ParameterAttribute
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :param use_global_stats: Whether use moving mean/variance statistics during
-                             testing peroid. If the parameter is set to None or
-                             True, it will use moving mean/variance statistics
-                             during testing. If the parameter is set to False, it
-                             will use the mean and variance of the current batch
-                             of test data.
-    :type use_global_stats: bool | None.
-    :param epsilon: The small constant added to the variance to improve numeric stability.
-    :type epsilon: float.
-    :param moving_average_fraction: Factor used in the moving average computation.
-                                   :math:`runningMean = newMean*(1-factor) + runningMean*factor`
-    :type moving_average_fraction: float.
-    :param mean_var_names: [mean name, variance name]
-    :type mean_var_names: string list
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-
-    if num_channels is None:
-        if input.num_filters is not None:
-            num_channels = input.num_filters
-        else:
-            num_channels = input.size
-    assert (batch_norm_type is None) or (batch_norm_type == "batch_norm") or \
-           (batch_norm_type == "mkldnn_batch_norm") or \
-           (batch_norm_type == "cudnn_batch_norm")
-
-    l = Layer(
-        name=name,
-        img3D=img3D,
-        inputs=Input(
-            input.name, image=Image(channels=num_channels), **param_attr.attr),
-        active_type=act.name,
-        type=LayerType.BATCH_NORM_LAYER,
-        batch_norm_type=batch_norm_type,
-        bias=ParamAttr.to_bias(bias_attr),
-        epsilon=epsilon,
-        moving_average_fraction=moving_average_fraction,
-        use_global_stats=use_global_stats,
-        mean_var_names=mean_var_names,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-
-    return LayerOutput(
-        name=name,
-        layer_type=LayerType.BATCH_NORM_LAYER,
-        parents=[input],
-        activation=act,
-        num_filters=num_channels,
-        size=l.config.size)
-
-
-@wrap_name_default()
-@layer_support()
-def sum_to_one_norm_layer(input, name=None, layer_attr=None):
-    """
-    A layer for sum-to-one normalization,
-    which is used in NEURAL TURING MACHINE.
-
-    .. math::
-       out[i] = \\frac {in[i]} {\sum_{k=1}^N in[k]}
-
-    where :math:`in` is a (batchSize x dataDim) input vector,
-    and :math:`out` is a (batchSize x dataDim) output vector.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       sum_to_one_norm = sum_to_one_norm_layer(input=layer)
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute
-                       for details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    Layer(
-        name=name,
-        type=LayerType.SUM_TO_ONE_NORM_LAYER,
-        inputs=[input.name],
-        **ExtraAttr.to_kwargs(layer_attr))
-    return LayerOutput(
-        name, LayerType.SUM_TO_ONE_NORM_LAYER, parents=[input], size=input.size)
-
-
-@wrap_name_default()
-@layer_support()
-def row_l2_norm_layer(input, name=None, layer_attr=None):
-    """
-    A layer for L2-normalization in each row.
-
-    .. math::
-       out[i] = \\frac{in[i]} {\\sqrt{\\sum_{k=1}^N in[k]^{2}}}
-
-    where the size of :math:`in` is (batchSize x dataDim) ,
-    and the size of :math:`out` is a (batchSize x dataDim) .
-
-    The example usage is:
-
-    .. code-block:: python
-
-       row_l2_norm_layer = row_l2_norm_layer(input=layer)
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute
-                       for details.
-    :type layer_attr: ExtraLayerAttribute.
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    Layer(
-        name=name,
-        type=LayerType.ROW_L2_NORM_LAYER,
-        inputs=[input.name],
-        **ExtraAttr.to_kwargs(layer_attr))
-    return LayerOutput(
-        name, LayerType.ROW_L2_NORM_LAYER, parents=[input], size=input.size)
-
-
-@wrap_name_default("addto")
-@wrap_act_default(act=LinearActivation())
-@wrap_bias_attr_default(has_bias=False)
-@layer_support(DROPOUT, ERROR_CLIPPING)
-def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None):
-    """
-    AddtoLayer.
-
-    ..  math::
-
-        y = f(\\sum_{i} x_i + b)
-
-    where :math:`y` is output, :math:`x` is input, :math:`b` is bias,
-    and :math:`f` is activation function.
-
-    The example usage is:
-
-    ..  code-block:: python
-
-        addto = addto_layer(input=[layer1, layer2],
-                            act=ReluActivation(),
-                            bias_attr=False)
-
-    This layer just simply adds all input layers together, then activates the
-    sum. All inputs should share the same dimension, which is also the dimension
-    of this layer's output.
-
-    There is no weight matrix for each input, because it just a simple add
-    operation. If you want a complicated operation before add, please use
-    mixed_layer.
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input layers. It could be a LayerOutput or list/tuple of
-                 LayerOutput.
-    :type input: LayerOutput | list | tuple
-    :param act: Activation Type. LinearActivation is the default activation.
-    :type act: BaseActivation
-    :param bias_attr: The bias attribute. If the parameter is set to False or an object
-                      whose type is not ParameterAttribute, no bias is defined. If the
-                      parameter is set to True, the bias is initialized to zero.
-    :type bias_attr: ParameterAttribute | None | bool | Any
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    num_filters = None
-    if isinstance(input, LayerOutput):
-        input = [input]
-
-    assert isinstance(input, collections.Sequence)
-    ipts_for_layer = []
-    for each_input in input:
-        assert isinstance(each_input, LayerOutput)
-        ipts_for_layer.append(Input(each_input.name))
-        if each_input.num_filters is not None:
-            num_filters = each_input.num_filters
-
-    l = Layer(
-        name=name,
-        type=LayerType.ADDTO_LAYER,
-        inputs=ipts_for_layer,
-        bias=ParamAttr.to_bias(bias_attr),
-        active_type=act.name,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-
-    return LayerOutput(
-        name,
-        LayerType.ADDTO_LAYER,
-        parents=input,
-        activation=act,
-        num_filters=num_filters,
-        size=l.config.size)
-
-
-@wrap_act_default(act=IdentityActivation())
-@wrap_name_default("concat")
-@layer_support(DROPOUT, ERROR_CLIPPING)
-def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None):
-    """
-    Concatenate all input vectors to one vector.
-    Inputs can be a list of LayerOutput or a list of projection.
-
-    The example usage is:
-
-    ..  code-block:: python
-
-        concat = concat_layer(input=[layer1, layer2])
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input layers or projections
-    :type input: list | tuple | collections.Sequence
-    :param act: Activation type. IdentityActivation is the default activation.
-    :type act: BaseActivation
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-
-    if isinstance(input, LayerOutput):
-        input = [input]
-    elif isinstance(input, Projection):
-        input = [input]
-    else:
-        assert isinstance(input, collections.Sequence)
-
-    def __is_type__(o, tp):
-        if not isinstance(o, collections.Sequence):
-            if o == tp:
-                return True
-            elif len(o.__bases__) == 0:
-                return False
-            else:
-                for bs in o.__bases__:
-                    if __is_type__(bs, tp):
-                        return True
-                return False
-        else:
-            tmp = map(lambda _x: __is_type__(_x, tp), o)
-            a = tmp[0]
-            for b in tmp[1:]:
-                assert a == b
-            return a
-
-    def __reduce_concat_type__(a, b):
-        assert __is_type__([a, b], Projection) or __is_type__([a, b],
-                                                              LayerOutput)
-        return a
-
-    is_concat_layer = __is_type__(
-        reduce(__reduce_concat_type__, map(type, input)), LayerOutput)
-
-    layer_type = (LayerType.CONCAT_LAYER
-                  if is_concat_layer else LayerType.CONCAT_PROJ_LAYER)
-
-    if layer_type == LayerType.CONCAT_LAYER:
-        assert not bias_attr
-
-    layer = Layer(
-        name=name,
-        type=layer_type,
-        inputs=[x.name for x in input] if is_concat_layer else input,
-        active_type=act.name,
-        bias=ParamAttr.to_bias(bias_attr),
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-
-    sz = layer.config.size
-
-    return LayerOutput(
-        name,
-        layer_type=layer_type,
-        parents=input if is_concat_layer else [x.origin for x in input],
-        activation=act,
-        size=sz)
-
-
-@wrap_name_default("seqconcat")
-@wrap_act_default(act=IdentityActivation())
-@wrap_bias_attr_default(has_bias=False)
-@layer_support(DROPOUT, ERROR_CLIPPING)
-def seq_concat_layer(a, b, act=None, name=None, layer_attr=None,
-                     bias_attr=None):
-    """
-    Concatenate sequence a and sequence b.
-
-    Inputs:
-      - a = [a1, a2, ..., am]
-      - b = [b1, b2, ..., bn]
-
-    Output: [a1, ..., am, b1, ..., bn]
-
-    Note that the above computation is for one sample. Multiple samples are
-    processed in one batch.
-
-    The example usage is:
-
-    ..  code-block:: python
-
-        concat = seq_concat_layer(a=layer1, b=layer2)
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param a: The first input sequence layer
-    :type a: LayerOutput
-    :param b: The second input sequence layer
-    :type b: LayerOutput
-    :param act: Activation type. IdentityActivation is the default activation.
-    :type act: BaseActivation
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :param bias_attr: The bias attribute. If the parameter is set to False or an object
-                      whose type is not ParameterAttribute, no bias is defined. If the
-                      parameter is set to True, the bias is initialized to zero.
-    :type bias_attr: ParameterAttribute | None | bool | Any
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert isinstance(a, LayerOutput) and isinstance(b, LayerOutput)
-    assert a.size == b.size
-    Layer(
-        name=name,
-        type=LayerType.SEQUENCE_CONCAT_LAYER,
-        inputs=[a.name, b.name],
-        active_type=act.name,
-        bias=ParamAttr.to_bias(bias_attr),
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-
-    return LayerOutput(
-        name,
-        layer_type=LayerType.SEQUENCE_CONCAT_LAYER,
-        parents=[a, b],
-        activation=act,
-        size=a.size)
-
-
-@wrap_name_default("memory", "memory_name")
-def memory(name,
-           size,
-           memory_name=None,
-           is_seq=False,
-           boot_layer=None,
-           boot_bias=None,
-           boot_bias_active_type=None,
-           boot_with_const_id=None):
-    """
-    The memory takes a layer's output at previous time step as its own output.
-
-    If boot_bias, the activation of the bias is the initial value of the memory.
-
-    If boot_with_const_id is set, then the memory's output at the first time step
-    is a IndexSlot, the Arguments.ids()[0] is this :code:`cost_id`.
-
-    If boot_layer is specified, the memory's output at the first time step will
-    be the boot_layer's output.
-
-    In other case, the default memory's output at the first time step is zero.
-
-    .. code-block:: python
-
-       mem = memory(size=256, name='state')
-       state = fc_layer(input=mem, size=256, name='state')
-
-    If you do not want to specify the name, you can also use set_input()
-    to specify the layer to be remembered as the following:
-
-    .. code-block:: python
-
-       mem = memory(size=256)
-       state = fc_layer(input=mem, size=256)
-       mem.set_input(mem)
-
-    :param name: The name of the layer which this memory remembers.
-                 If name is None, user should call set_input() to specify the
-                 name of the layer which this memory remembers.
-    :type name: basestring
-    :param size: The dimensionality of memory.
-    :type size: int
-    :param memory_name: The name of the memory. It is ignored when name is provided.
-    :type memory_name: basestring
-    :param is_seq: DEPRECATED. is sequence for boot_layer
-    :type is_seq: bool
-    :param boot_layer: This parameter specifies memory's output at the first time
-                       step and the output is boot_layer's output.
-    :type boot_layer: LayerOutput | None
-    :param boot_bias: The bias attribute of memory's output at the first time step.
-                      If the parameter is set to False or an object whose type is not
-                      ParameterAttribute, no bias is defined. If the parameter is set
-                      to True, the bias is initialized to zero.
-    :type boot_bias: ParameterAttribute | None
-    :param boot_bias_active_type: Activation type for memory's bias at the first time
-                                  step. LinearActivation is the default activation.
-    :type boot_bias_active_type: BaseActivation
-    :param boot_with_const_id: This parameter specifies memory's output at the first
-                               time step and the output is an index.
-    :type boot_with_const_id: int
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    if boot_bias_active_type is None:
-        boot_bias_active_type = LinearActivation()
-
-    assert boot_bias is None or isinstance(boot_bias, ParameterAttribute)
-    if isinstance(boot_bias, ParameterAttribute):
-        boot_bias = ParamAttr.to_bias(boot_bias)
-
-    assert boot_layer is None or isinstance(boot_layer, LayerOutput)
-    if name is not None:
-        memory_name = None
-
-    memory_name = Memory(
-        name,
-        size,
-        boot_layer=boot_layer.name if boot_layer is not None else None,
-        boot_bias=boot_bias,
-        boot_bias_active_type=boot_bias_active_type.name,
-        boot_with_const_id=boot_with_const_id,
-        memory_name=memory_name)
-
-    lout = LayerOutput(
-        name=memory_name,
-        size=size,
-        layer_type=LayerType.MEMORY,
-        parents=[boot_layer] if boot_layer is not None else None)
-    return lout
-
-
-@wrap_bias_attr_default()
-@wrap_act_default(param_names=['gate_act'], act=SigmoidActivation())
-@wrap_act_default(param_names=['state_act'], act=TanhActivation())
-@wrap_act_default(act=TanhActivation())
-@wrap_name_default('lstm_step')
-@layer_support()
-def lstm_step_layer(input,
-                    state,
-                    size=None,
-                    act=None,
-                    name=None,
-                    gate_act=None,
-                    state_act=None,
-                    bias_attr=None,
-                    layer_attr=None):
-    """
-    LSTM Step Layer. This function is used only in recurrent_group.
-    The lstm equations are shown as follows.
-
-    ..  math::
-
-        i_t & = \\sigma(W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i)
-
-        f_t & = \\sigma(W_{x_f}x_{t} + W_{h_f}h_{t-1} + W_{c_f}c_{t-1} + b_f)
-
-        c_t & = f_tc_{t-1} + i_t tanh (W_{x_c}x_t+W_{h_c}h_{t-1} + b_c)
-
-        o_t & = \\sigma(W_{x_o}x_{t} + W_{h_o}h_{t-1} + W_{c_o}c_t + b_o)
-
-        h_t & = o_t tanh(c_t)
-
-
-    The input of lstm step is :math:`Wx_t + Wh_{t-1}`, and user should use
-    :code:`mixed_layer` and :code:`full_matrix_projection` to calculate these
-    input vectors.
-
-    The state of lstm step is :math:`c_{t-1}`. And lstm step layer will do
-
-    ..  math::
-
-        i_t = \\sigma(input + W_{ci}c_{t-1} + b_i)
-
-        ...
-
-
-    This layer has two outputs. The default output is :math:`h_t`. The other
-    output is :math:`o_t`, whose name is 'state' and users can use
-    :code:`get_output_layer` to extract this output.
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param size: The dimension of this layer's output, which must be
-                 equal to the dimension of the state.
-    :type size: int
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param state: The state of the LSTM unit.
-    :type state: LayerOutput
-    :param act: Activation type. TanhActivation is the default activation.
-    :type act: BaseActivation
-    :param gate_act: Activation type of the gate. SigmoidActivation is the
-                     default activation.
-    :type gate_act: BaseActivation
-    :param state_act: Activation type of the state. TanhActivation is the
-                      default activation.
-    :type state_act: BaseActivation
-    :param bias_attr: The bias attribute. If the parameter is set to False or an object
-                      whose type is not ParameterAttribute, no bias is defined. If the
-                      parameter is set to True, the bias is initialized to zero.
-    :type bias_attr: ParameterAttribute | None | bool | Any
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-
-    assert size is None or state.size == size
-    size = state.size
-    Layer(
-        name=name,
-        type=LayerType.LSTM_STEP_LAYER,
-        active_type=act.name,
-        active_gate_type=gate_act.name,
-        active_state_type=state_act.name,
-        bias=ParamAttr.to_bias(bias_attr),
-        size=state.size,
-        inputs=[input.name, state.name],
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-
-    return LayerOutput(
-        name=name,
-        layer_type=LayerType.LSTM_STEP_LAYER,
-        parents=[input, state],
-        activation=act,
-        size=size,
-        outputs=['default', 'state'])
-
-
-@wrap_bias_attr_default()
-@wrap_param_attr_default()
-@wrap_act_default(param_names=['gate_act'], act=SigmoidActivation())
-@wrap_act_default(act=TanhActivation())
-@wrap_name_default('gru_step')
-@layer_support()
-def gru_step_layer(input,
-                   output_mem,
-                   size=None,
-                   act=None,
-                   name=None,
-                   gate_act=None,
-                   bias_attr=None,
-                   param_attr=None,
-                   layer_attr=None):
-    """
-
-    :param input: The input of this layer, whose dimension can be divided by 3.
-    :type input: LayerOutput
-    :param output_mem: A memory which memorizes the output of this layer at previous
-                       time step.
-    :type output_mem: LayerOutput
-    :param size: The dimension of this layer's output. If it is not set or set to None,
-                 it will be set to one-third of the dimension of the input automatically.
-    :type size: int
-    :param act: Activation type of this layer's output. TanhActivation
-                is the default activation.
-    :type act: BaseActivation
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param gate_act: Activation type of this layer's two gates. SigmoidActivation is
-                     the default activation.
-    :type gate_act: BaseActivation
-    :param bias_attr: The parameter attribute for bias. If this parameter is set to
-                      False or an object whose type is not ParameterAttribute, no bias
-                      is defined. If this parameter is set to True,
-                      the bias is initialized to zero.
-    :type bias_attr: ParameterAttribute | None | bool | Any
-    :param param_attr: The parameter attribute. See ParameterAttribute for details.
-    :type param_attr: ParameterAttribute
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert input.size % 3 == 0
-    if size is None:
-        size = input.size / 3
-    Layer(
-        name=name,
-        type=LayerType.GRU_STEP_LAYER,
-        # The parameter here is for transforming the output_mem. The input has
-        # already been transformed outside this module so it does not need
-        # parameter associated with it.
-        # The parameter here is instead grouped with input is due to
-        # backward model compatibility.
-        inputs=[Input(input.name, **param_attr.attr), output_mem.name],
-        bias=ParamAttr.to_bias(bias_attr),
-        size=size,
-        active_type=act.name,
-        active_gate_type=gate_act.name,
-        **ExtraAttr.to_kwargs(layer_attr))
-    return LayerOutput(
-        name=name,
-        layer_type=LayerType.GRU_STEP_LAYER,
-        parents=[input, output_mem],
-        size=size,
-        activation=act)
-
-
-@wrap_bias_attr_default()
-@wrap_param_attr_default()
-@wrap_act_default(param_names=['gate_act'], act=SigmoidActivation())
-@wrap_act_default(act=TanhActivation())
-@wrap_name_default('gru_step_naive')
-@layer_support(ERROR_CLIPPING, DROPOUT)
-def gru_step_naive_layer(input,
-                         output_mem,
-                         size=None,
-                         name=None,
-                         act=None,
-                         gate_act=None,
-                         bias_attr=None,
-                         param_attr=None,
-                         layer_attr=None):
-    """
-    GRU Step Layer, which is realized using PaddlePaddle API. It supports ERROR_CLIPPING
-    and DROPOUT.
-
-    :param input: The input of this layer, whose dimensionality can be divided by 3.
-    :param output_mem: A memory which memorizes the output of this layer at previous
-                       time step.
-    :type output_mem: LayerOutput
-    :param size: The dimension of this layer's output. If it is not set or set to None,
-                 it will be set to one-third of the dimension of the input automatically.
-    :type size: int
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param act: Activation type of this layer's output. TanhActivation
-                is the default activation.
-    :type act: BaseActivation
-    :param gate_act: Activation type of this layer's two gates. SigmoidActivation
-                     is the default activation.
-    :type gate_act: BaseActivation
-    :param bias_attr: The parameter attribute for bias. If this parameter is set to
-                      False or an object whose type is not ParameterAttribute, no bias
-                      is defined. If this parameter is set to True,
-                      the bias is initialized to zero.
-    :type bias_attr: ParameterAttribute | None | bool | Any
-    :param param_attr: The parameter attribute. See ParameterAttribute for details.
-    :type param_attr: ParameterAttribute
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    if input.size % 3 != 0:
-        raise ValueError("GruStep input size must be divided by 3")
-    if size is None:
-        size = input.size / 3
-
-    if bias_attr and bias_attr.attr.get("parameter_name", None) is not None:
-        raise ValueError("You should not specify the field `name` in bias_attr."
-                         " Otherwise, the three biases, which correponding to "
-                         " the two gates and the mixed layer for computing Wx+b"
-                         ", will share the same parameter matrix unexpectedly.")
-
-    def __gate__(gate_name, offset):
-        with mixed_layer(
-                name=name + "_" + gate_name,
-                size=size,
-                layer_attr=layer_attr,
-                bias_attr=bias_attr,
-                act=gate_act) as gate:
-            gate += identity_projection(input=input, offset=offset)
-            gate += full_matrix_projection(
-                input=output_mem, param_attr=param_attr)
-        return gate
-
-    update_gate = __gate__("update", 0)
-    reset_gate = __gate__("reset", size)
-
-    with mixed_layer(
-            name=name + "_reset_output", bias_attr=False) as reset_output:
-        reset_output += dotmul_operator(a=output_mem, b=reset_gate)
-
-    with mixed_layer(
-            name=name + "_output_candidate",
-            size=size,
-            layer_attr=layer_attr,
-            bias_attr=bias_attr,
-            act=act) as output_candidate:
-        output_candidate += identity_projection(input=input, offset=2 * size)
-        output_candidate += full_matrix_projection(
-            input=reset_output, param_attr=param_attr)
-
-    with mixed_layer(name=name) as output:
-        output += identity_projection(output_mem)
-        output += dotmul_operator(a=output_mem, b=update_gate, scale=-1.0)
-        output += dotmul_operator(a=output_candidate, b=update_gate)
-
-    return output
-
-
-@wrap_name_default()
-@layer_support()
-def get_output_layer(input, arg_name, name=None, layer_attr=None):
-    """
-    Get layer's output by name. In PaddlePaddle, a layer might return multiple
-    values, but returns one layer's output. If the user wants to use another
-    output besides the default one, please use get_output_layer first to get
-    the output from input.
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input layer. And this layer should contain
-                   multiple outputs.
-    :type input: LayerOutput
-    :param arg_name: The name of the output to be extracted from the input layer.
-    :type arg_name: basestring
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    # GetOutputLayer
-    assert arg_name in input.outputs, 'Get Output From an not existed input.' \
-                                      ' The get output name is %s, which not' \
-                                      ' in %s' % (
-                                          arg_name, ",".join(input.outputs))
-    Layer(
-        name=name,
-        type=LayerType.GET_OUTPUT_LAYER,
-        inputs=[Input(
-            input.name, input_layer_argument=arg_name)],
-        size=input.size,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-
-    return LayerOutput(
-        name=name,
-        layer_type=LayerType.GET_OUTPUT_LAYER,
-        parents=[input],
-        size=input.size)
-
-
-@wrap_name_default()
-@wrap_act_default()
-@wrap_bias_attr_default()
-@wrap_param_attr_default()
-@layer_support()
-def recurrent_layer(input,
-                    act=None,
-                    bias_attr=None,
-                    param_attr=None,
-                    name=None,
-                    reverse=False,
-                    layer_attr=None):
-    """
-    Simple recurrent unit layer. It is just a fully connect layer through both
-    time and neural network.
-
-    For each sequence [start, end] it performs the following computation\:
-
-    ..  math::
-
-        out_{i} = act(in_{i})     \\      \\      \\text{for} \\ i = start \\\\
-        out_{i} = act(in_{i} + out_{i-1} * W) \\ \\ \\text{for} \\ start < i <= end
-
-    If reversed is true, the order is reversed\:
-
-    ..  math::
-
-        out_{i} = act(in_{i})           \\    \\   \\text{for} \\ i = end  \\\\
-        out_{i} = act(in_{i} + out_{i+1} * W) \\ \\ \\text{for} \\ start <= i < end
-
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param act: Activation type. TanhActivation is the default activation.
-    :type act: BaseActivation
-    :param bias_attr: The parameter attribute for bias. If this parameter is set to
-                      False or an object whose type is not ParameterAttribute,
-                      no bias is defined. If the parameter is set to True,
-                      the bias is initialized to zero.
-    :type bias_attr: ParameterAttribute | None | bool | Any
-    :param param_attr: The parameter attribute. See ParameterAttribute for
-                       details.
-    :type param_attr: ParameterAttribute
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    Layer(
-        name=name,
-        type=LayerType.RECURRENT_LAYER,
-        inputs=Input(input.name, **param_attr.attr),
-        active_type=act.name,
-        bias=ParamAttr.to_bias(bias_attr),
-        reversed=reverse,
-        **ExtraAttr.to_kwargs(layer_attr))
-    return LayerOutput(
-        name=name,
-        layer_type=LayerType.RECURRENT_LAYER,
-        parents=[input],
-        size=input.size,
-        activation=act,
-        reverse=reverse)
-
-
-class StaticInput(object):
-    """
-    StaticInput is only used in recurrent_group which defines a read-only memory
-    and can be a sequence or non-sequence.
-    :param size: DEPRECATED
-    :param is_seq: DEPRECATED
-    """
-
-    def __init__(self, input, is_seq=False, size=None):
-        assert isinstance(input, LayerOutput)
-        self.input = input
-        assert input.size is not None
-        if size is not None:
-            assert input.size == size
-
-
-def SubsequenceInput(input):
-    """
-    DEPRECATED.
-    Input sequence has sub-sequence, used in recurrent_group.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       input = SubsequenceInput(layer)
-    """
-    return input
-
-
-@wrap_name_default("recurrent_group")
-def recurrent_group(step, input, reverse=False, name=None, targetInlink=None):
-    """
-    Recurrent layer group is an extremely flexible recurrent unit in
-    PaddlePaddle. As long as the user defines the calculation done within a
-    time step, PaddlePaddle will iterate such a recurrent calculation over
-    sequence input. This is useful for attention-based models, or Neural
-    Turning Machine like models.
-
-    The basic usage (time steps) is:
-
-    .. code-block:: python
-
-       def step(input):
-           output = fc_layer(input=layer,
-                             size=1024,
-                             act=LinearActivation(),
-                             bias_attr=False)
-           return output
-
-       group = recurrent_group(input=layer,
-                               step=step)
-
-    You can see following configs for further usages:
-
-    - time steps: lstmemory_group, paddle/legacy/gserver/tests/sequence_layer_group.conf, \
-                  demo/seqToseq/seqToseq_net.py
-    - sequence steps: paddle/legacy/gserver/tests/sequence_nest_layer_group.conf
-
-    :param step: A step function which takes the input of recurrent_group as its own
-                 input and returns values as recurrent_group's output every time step.
-
-                 The recurrent group scatters a sequence into time steps. And
-                 for each time step, it will invoke step function, and return
-                 a time step result. Then gather outputs of each time step into
-                 layer group's output.
-
-    :type step: callable
-
-    :param name: The recurrent_group's name. It is optional.
-    :type name: basestring
-
-    :param input: Input links array.
-
-                  LayerOutput will be scattered into time steps.
-                  SubsequenceInput will be scattered into sequence steps.
-                  StaticInput will be imported to each time step, and doesn't change
-                  over time. It's a mechanism to access layer outside step function.
-
-    :type input: LayerOutput | StaticInput | SubsequenceInput | list | tuple
-
-    :param reverse: If reverse is set to True, the recurrent unit will process the
-                    input sequence in a reverse order.
-    :type reverse: bool
-
-    :param targetInlink: DEPRECATED.
-                         The input layer which share info with layer group's output
-
-                         Param input specifies multiple input layers. For
-                         SubsequenceInput inputs, config should assign one input
-                         layer that share info(the number of sentences and the number
-                         of words in each sentence) with all layer group's outputs.
-                         targetInlink should be one of the layer group's input.
-
-    :type targetInlink: LayerOutput | SubsequenceInput
-
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    model_type('recurrent_nn')
-
-    if isinstance(input, LayerOutput) or isinstance(input, StaticInput):
-        input = [input]
-    assert isinstance(input, collections.Sequence)
-
-    def is_in_links(x):
-        return isinstance(x, LayerOutput)
-
-    in_links = filter(is_in_links, input)
-
-    RecurrentLayerGroupWithoutOutLinksBegin(
-        name=name,
-        in_links=map(lambda x: x.name, in_links),
-        seq_reversed=reverse)
-    in_args = []
-    for each_input in input:
-        if isinstance(each_input, StaticInput):  # StaticInput
-            mem_name = "__%s_memory__" % each_input.input.name
-            mem = memory(
-                name=None,
-                size=each_input.input.size,
-                boot_layer=each_input.input)
-            mem.set_input(mem)
-            in_args.append(mem)
-        else:
-            in_args.append(each_input)
-
-    layer_outs = step(*in_args)
-
-    if isinstance(layer_outs, LayerOutput):
-        layer_outs = [layer_outs]
-
-    for layer_out in layer_outs:
-        assert isinstance(
-            layer_out, LayerOutput
-        ), "Type of step function's return value must be LayerOutput."
-        layer_out.reverse = reverse
-        RecurrentLayerGroupSetOutLink(layer_out.name)
-
-    RecurrentLayerGroupEnd(name=name)
-
-    for layer_out in layer_outs:
-        # The previous full_name is the name inside the recurrent group.
-        # We need a full_name outside the recurrent group.
-        layer_out.full_name = MakeLayerNameInSubmodel(layer_out.name)
-
-    if len(layer_outs) == 1:
-        return layer_outs[0]
-    else:
-        return layer_outs
-
-
-class BaseGeneratedInput(object):
-    def __init__(self):
-        self.bos_id = None
-        self.eos_id = None
-
-    def before_real_step(self):
-        raise NotImplementedError()
-
-    def after_real_step(self, *args):
-        raise NotImplementedError()
-
-
-class GeneratedInput(BaseGeneratedInput):
-    def after_real_step(self, input):
-        if isinstance(input, LayerOutput):
-            input = [input]
-        elif isinstance(input, collections.Sequence):
-            input = list(input)
-            if len(input) > 1:
-                logger.info(
-                    ("More than one layers inside the recurrent_group "
-                     "are returned as outputs of the entire recurrent_group "
-                     "PLEASE garantee the first output is probability of "
-                     "the predicted next word."))
-
-        return [maxid_layer(
-            input=input[0], name='__beam_search_predict__')] + (
-                input[1:] if len(input) > 1 else [])
-
-    def before_real_step(self):
-        predict_id = memory(
-            name='__beam_search_predict__',
-            size=self.size,
-            boot_with_const_id=self.bos_id)
-
-        trg_emb = embedding_layer(
-            input=predict_id,
-            size=self.embedding_size,
-            param_attr=ParamAttr(name=self.embedding_name))
-        return trg_emb
-
-    def __init__(self, size, embedding_name, embedding_size):
-        super(GeneratedInput, self).__init__()
-        self.size = size
-        self.embedding_name = embedding_name
-        self.embedding_size = embedding_size
-
-
-@wrap_name_default()
-def maxid_layer(input, name=None, layer_attr=None):
-    """
-    A layer for finding the id which has the maximal value for each sample.
-    The result is stored in output.ids.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       maxid = maxid_layer(input=layer)
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute.
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-
-    assert isinstance(input, LayerOutput)
-    l = Layer(
-        name=name,
-        type='maxid',
-        inputs=[input.name],
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name=name,
-        layer_type=LayerType.MAXID_LAYER,
-        parents=[input],
-        size=l.config.size)
-
-
-@wrap_name_default()
-def dot_prod_layer(input1, input2, name=None, layer_attr=None):
-    """
-    A layer for computing the dot product of two vectors.
-
-    The example usage is:
-
-    .. code-block:: python
-
-        dot_prod = dot_prod_layer(input1=vec1, input2=vec2)
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input1: The first input layer.
-    :type input1: LayerOutput
-    :param input2: The second input layer.
-    :type input2: LayerOutput
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute.
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert isinstance(input1, LayerOutput)
-    assert isinstance(input2, LayerOutput)
-    assert input1.size == input2.size, ("Two inputs should have the same size.")
-
-    l = Layer(
-        name=name,
-        type=LayerType.DOT_PROD_LAYER,
-        inputs=[input1.name, input2.name],
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name=name,
-        layer_type=LayerType.DOT_PROD_LAYER,
-        parents=[input1, input2],
-        size=l.config.size)
-
-
-@wrap_name_default()
-def out_prod_layer(input1, input2, name=None, layer_attr=None):
-    """
-    A layer for computing the outer product of two vectors
-    The result is a matrix of size(input1) x size(input2)
-
-    The example usage is:
-
-    .. code-block:: python
-
-       out_prod = out_prod_layer(input1=vec1, input2=vec2)
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input1: The first input layer.
-    :type input: LayerOutput
-    :param input2: The second input layer.
-    :type input2: LayerOutput
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute.
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-
-    assert isinstance(input1, LayerOutput)
-    assert isinstance(input2, LayerOutput)
-    l = Layer(
-        name=name,
-        type=LayerType.OUT_PROD_LAYER,
-        inputs=[input1.name, input2.name],
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name=name,
-        layer_type=LayerType.OUT_PROD_LAYER,
-        parents=[input1, input2],
-        size=l.config.size)
-
-
-@wrap_name_default()
-def eos_layer(input, eos_id, name=None, layer_attr=None):
-    """
-    A layer for checking EOS for each sample:
-    - output_id = (input_id == conf.eos_id)
-
-    The result is stored in output\_.ids.
-    It is used by recurrent layer group.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       eos = eos_layer(input=layer, eos_id=id)
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param eos_id: End id of sequence
-    :type eos_id: int
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute.
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    l = Layer(
-        name=name,
-        type=LayerType.EOSID_LAYER,
-        eos_id=eos_id,
-        inputs=[input.name],
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name=name,
-        layer_type=LayerType.EOSID_LAYER,
-        parents=[input],
-        size=l.config.size)
-
-
-@wrap_name_default()
-def beam_search(step,
-                input,
-                bos_id,
-                eos_id,
-                beam_size,
-                max_length=500,
-                name=None,
-                num_results_per_sample=None):
-    """
-    Beam search is a heuristic search algorithm used in sequence generation.
-    It explores a graph by expanding the most promising nodes in a limited set
-    to maintain tractability.
-
-    The example usage is:
-
-    .. code-block:: python
-
-        def rnn_step(input):
-            last_time_step_output = memory(name='rnn', size=512)
-            with mixed_layer(size=512, name='rnn') as simple_rnn:
-                simple_rnn += full_matrix_projection(input)
-                simple_rnn += last_time_step_output
-            return simple_rnn
-
-        generated_word_embedding = GeneratedInput(
-                               size=target_dictionary_dim,
-                               embedding_name="target_language_embedding",
-                               embedding_size=word_vector_dim)
-
-        beam_gen = beam_search(name="decoder",
-                               step=rnn_step,
-                               input=[StaticInput(encoder_last),
-                                      generated_word_embedding],
-                               bos_id=0,
-                               eos_id=1,
-                               beam_size=5)
-
-    Please see the following demo for more details:
-
-    - machine translation : demo/seqToseq/translation/gen.conf \
-                            demo/seqToseq/seqToseq_net.py
-
-    :param name: The name of the recurrent unit that is responsible for
-                 generating sequences. It is optional.
-    :type name: basestring
-    :param step: A callable function that defines the calculation in a time
-                 step, and it is applied to sequences with arbitrary length by
-                 sharing a same set of weights.
-
-                 You can refer to the first parameter of recurrent_group, or
-                 demo/seqToseq/seqToseq_net.py for more details.
-    :type step: callable
-    :param input: Input data for the recurrent unit, which should include the
-                  previously generated words as a GeneratedInput object.
-                  In beam_search, none of the input's type should be LayerOutput.
-    :type input: list
-    :param bos_id: Index of the start symbol in the dictionary. The start symbol
-                   is a special token for NLP task, which indicates the
-                   beginning of a sequence. In the generation task, the start
-                   symbol is essential, since it is used to initialize the RNN
-                   internal state.
-    :type bos_id: int
-    :param eos_id: Index of the end symbol in the dictionary. The end symbol is
-                   a special token for NLP task, which indicates the end of a
-                   sequence. The generation process will stop once the end
-                   symbol is generated, or a pre-defined max iteration number
-                   is exceeded.
-    :type eos_id: int
-    :param max_length: Max generated sequence length.
-    :type max_length: int
-    :param beam_size: Beam search for sequence generation is an iterative search
-                      algorithm. To maintain tractability, every iteration only
-                      only stores a predetermined number, called the beam_size,
-                      of the most promising next words. The greater the beam
-                      size, the fewer candidate words are pruned.
-    :type beam_size: int
-    :param num_results_per_sample: Number of the generated results per input
-                                  sequence. This number must always be less than
-                                  beam size.
-    :type num_results_per_sample: int
-    :return: The generated word index.
-    :rtype: LayerOutput
-    """
-
-    if num_results_per_sample is None:
-        num_results_per_sample = beam_size
-    if num_results_per_sample > beam_size:
-        logger.warning("num_results_per_sample should be less than beam_size")
-
-    if isinstance(input, StaticInput) or isinstance(input, BaseGeneratedInput):
-        input = [input]
-
-    generated_input_index = -1
-
-    real_input = []
-    for i, each_input in enumerate(input):
-        assert not isinstance(each_input, LayerOutput), (
-            "in beam_search, "
-            "none of the input should has a type of LayerOutput.")
-        if isinstance(each_input, BaseGeneratedInput):
-            assert generated_input_index == -1, ("recurrent_group accepts "
-                                                 "only one GeneratedInput.")
-            generated_input_index = i
-
-        else:
-            real_input.append(each_input)
-
-    assert generated_input_index != -1, "No GeneratedInput is given."
-
-    gipt = input[generated_input_index]
-
-    gipt.bos_id = bos_id
-    gipt.eos_id = eos_id
-
-    def __real_step__(*args):
-        eos_name = "__%s_eos_layer__" % name
-        RecurrentLayerGroupSetGenerator(
-            Generator(
-                eos_layer_name=eos_name,
-                max_num_frames=max_length,
-                beam_size=beam_size,
-                num_results_per_sample=num_results_per_sample))
-
-        args = list(args)
-        args.insert(generated_input_index, gipt.before_real_step())
-
-        predict = gipt.after_real_step(step(*args))
-
-        eos_layer(input=predict[0], eos_id=eos_id, name=eos_name)
-        return predict
-
-    return recurrent_group(
-        step=__real_step__, input=real_input, reverse=False, name=name)
-
-
-def __cost_input__(input, label, weight=None):
-    """
-    inputs and parents for cost layers.
-    """
-    if isinstance(input, LayerOutput):
-        input = [input]
-    if isinstance(label, LayerOutput):
-        label = [label]
-    ipts = [Input(ipt.name) for ipt in (input + label)]
-    parents = [ipt for ipt in (input + label)]
-    if weight is not None:
-        assert weight.size == 1
-        ipts.append(Input(weight.name))
-        parents.append(weight)
-    return ipts, parents
-
-
-@wrap_name_default()
-@layer_support()
-def square_error_cost(input,
-                      label,
-                      weight=None,
-                      name=None,
-                      coeff=1.0,
-                      layer_attr=None):
-    """
-    sum of square error cost:
-
-    ..  math::
-
-        cost = \\sum_{i=1}^N(t_i-y_i)^2
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The first input layer.
-    :type input: LayerOutput
-    :param label: The input label.
-    :type label: LayerOutput
-    :param weight: The weight layer defines a weight for each sample in the
-                   mini-batch. It is optional.
-    :type weight: LayerOutput
-    :param coeff: The weight of the gradient in the back propagation.
-                  1.0 is the default value.
-    :type coeff: float
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    ipts, parents = __cost_input__(input, label, weight)
-
-    Layer(
-        inputs=ipts,
-        type="square_error",
-        name=name,
-        coeff=coeff,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(name, LayerType.COST, parents=parents, size=1)
-
-
-regression_cost = square_error_cost
-
-
-@wrap_name_default("cost")
-@layer_support()
-def classification_cost(input,
-                        label,
-                        weight=None,
-                        name=None,
-                        evaluator=classification_error_evaluator,
-                        layer_attr=None,
-                        coeff=1.):
-    """
-    classification cost Layer.
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The first input layer.
-    :type input: LayerOutput
-    :param label: The input label.
-    :type label: LayerOutput
-    :param weight: The weight layer defines a weight for each sample in the
-                   mini-batch. It is optional.
-    :type weight: LayerOutput
-    :param evaluator: Evaluator method. classification_error_evaluator is the default.
-    :type evaluator: Evaluator method
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :param coeff: The weight of the gradient in the back propagation.
-                  1.0 is the default value.
-    :type coeff: float
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert input.layer_type != LayerType.DATA
-    assert isinstance(input.activation, SoftmaxActivation)
-    assert label.layer_type == LayerType.DATA
-
-    ipts, parents = __cost_input__(input, label, weight)
-
-    Layer(
-        name=name,
-        type="multi-class-cross-entropy",
-        inputs=ipts,
-        coeff=coeff,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-
-    def __add_evaluator__(e):
-        assert callable(e)
-        assert hasattr(e, 'is_evaluator')
-        assert isinstance(e.is_evaluator, bool)
-        assert e.is_evaluator
-        assert hasattr(e, "for_classification")
-        assert isinstance(e.for_classification, bool)
-        assert e.for_classification
-
-        e(name=e.__name__, input=input, label=label, weight=weight)
-
-    if not isinstance(evaluator, collections.Sequence):
-        evaluator = [evaluator]
-
-    for each_evaluator in evaluator:
-        __add_evaluator__(each_evaluator)
-
-    return LayerOutput(name, LayerType.COST, parents=parents, size=1)
-
-
-def conv_operator(img,
-                  filter,
-                  filter_size,
-                  num_filters,
-                  num_channels=None,
-                  stride=1,
-                  padding=0,
-                  filter_size_y=None,
-                  stride_y=None,
-                  padding_y=None,
-                  trans=False):
-    """
-    Different from img_conv_layer, conv_op is an Operator, which can be used
-    in mixed_layer. And conv_op takes two inputs to perform convolution.
-    The first input is the image and the second is filter kernel. It only
-    supports GPU mode.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       op = conv_operator(img=input1,
-                          filter=input2,
-                          filter_size=3,
-                          num_filters=64,
-                          num_channels=64)
-
-    :param img: The input image.
-    :type img: LayerOutput
-    :param filter: The input filter.
-    :type filter: LayerOutput
-    :param filter_size: The dimension of the filter kernel on the x axis.
-    :type filter_size: int
-    :param filter_size_y: The dimension of the filter kernel on the y axis.
-                          If the parameter is not set or set to None, it will
-                          set to 'filter_size' automatically.
-    :type filter_size_y: int
-    :param num_filters: The number of the output channels.
-    :type num_filters: int
-    :param num_channels: The number of the input channels. If the parameter is not set
-                         or set to None, it will be automatically set to the channel
-                         number of the 'img'.
-    :type num_channels: int
-    :param stride: The stride on the x axis.
-    :type stride: int
-    :param stride_y: The stride on the y axis. If the parameter is not set or
-                     set to None, it will be set to 'stride' automatically.
-    :type stride_y: int
-    :param padding: The padding size on the x axis.
-    :type padding: int
-    :param padding_y: The padding size on the y axis. If the parameter is not set
-                      or set to None, it will be set to 'padding' automatically.
-    :type padding_y: int
-    :return: A ConvOperator Object.
-    :rtype: ConvOperator
-    """
-    if filter_size_y is None:
-        filter_size_y = filter_size
-    if stride_y is None:
-        stride_y = stride
-    if padding_y is None:
-        padding_y = padding
-
-    if num_channels is None:
-        num_channels = img.num_filters
-
-    assert isinstance(filter, LayerOutput)
-    assert filter.size is not None
-
-    opCls = ConvTransOperator if trans else ConvOperator
-
-    op = opCls(
-        input_layer_names=[img.name, filter.name],
-        num_filters=num_filters,
-        conv_conf=Conv(
-            filter_size=filter_size,
-            padding=padding,
-            stride=stride,
-            channels=num_channels,
-            filter_size_y=filter_size_y,
-            padding_y=padding_y,
-            stride_y=stride_y,
-            groups=1))
-
-    op.origin = [img, filter]
-    return op
-
-
-@wrap_param_attr_default()
-def conv_projection(input,
-                    filter_size,
-                    num_filters,
-                    num_channels=None,
-                    stride=1,
-                    padding=0,
-                    filter_size_y=None,
-                    stride_y=None,
-                    padding_y=None,
-                    groups=1,
-                    param_attr=None,
-                    trans=False):
-    """
-    Different from img_conv_layer and conv_op, conv_projection is a Projection,
-    which can be used in mixed_layer and concat_layer. It uses cudnn to implement
-    convolution and only supports GPU mode.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       proj = conv_projection(input=input1,
-                              filter_size=3,
-                              num_filters=64,
-                              num_channels=64)
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param filter_size: The dimensions of the filter kernel. If the parameter is
-                        set to one integer, the two dimensions on x and y axises
-                        will be same when filter_size_y is not set. If it is set
-                        to a list, the first element indicates the dimension on
-                        the x axis, and the second is used to specify the dimension
-                        on the y axis when filter_size_y is not provided.
-    :type filter_size: int | tuple | list
-    :param filter_size_y: The dimension of the filter kernel on the y axis. If the parameter
-                          is not set, it will be set automatically according to filter_size.
-    :type filter_size_y: int
-    :param num_filters: The number of filters.
-    :type num_filters: int
-    :param num_channels: The number of the input channels.
-    :type num_channels: int
-    :param stride: The strides. If the parameter is set to one integer, the strides
-                   on x and y axises will be same when stride_y is not set. If it is
-                   set to a list, the first element indicates the stride on the x axis,
-                   and the second is used to specify the stride on the y axis when
-                   stride_y is not provided.
-    :type stride: int | tuple | list
-    :param stride_y: The stride on the y axis.
-    :type stride_y: int
-    :param padding: The padding sizes. If the parameter is set to one integer, the padding
-                    sizes on x and y axises will be same when padding_y is not set. If it
-                    is set to a list, the first element indicates the padding size on the
-                    x axis, and the second is used to specify the padding size on the y axis
-                    when padding_y is not provided.
-    :type padding: int | tuple | list
-    :param padding_y: The padding size on the y axis.
-    :type padding_y: int
-    :param groups: The group number.
-    :type groups: int
-    :param param_attr: The parameter attribute of the convolution. See ParameterAttribute for
-                       details.
-    :type param_attr: ParameterAttribute
-    :param trans: Whether it is ConvTransProjection or ConvProjection
-    :type trans: bool
-    :return: A Projection Object.
-    :rtype: ConvTransProjection | ConvProjection
-    """
-    if num_channels is None:
-        assert input.num_filters is not None
-        num_channels = input.num_filters
-
-    if filter_size_y is None:
-        if isinstance(filter_size, collections.Sequence):
-            assert len(filter_size) == 2
-            filter_size, filter_size_y = filter_size
-        else:
-            filter_size_y = filter_size
-
-    if stride_y is None:
-        if isinstance(stride, collections.Sequence):
-            assert len(stride) == 2
-            stride, stride_y = stride
-        else:
-            stride_y = stride
-
-    if padding_y is None:
-        if isinstance(padding, collections.Sequence):
-            assert len(padding) == 2
-            padding, padding_y = padding
-        else:
-            padding_y = padding
-
-    if param_attr.attr.get('initial_smart'):
-        # special initial for conv layers.
-        init_w = (2.0 / (filter_size**2 * num_channels))**0.5
-        param_attr.attr["initial_mean"] = 0.0
-        param_attr.attr["initial_std"] = init_w
-        param_attr.attr["initial_strategy"] = 0
-        param_attr.attr["initial_smart"] = False
-
-    projCls = ConvTransProjection if trans else ConvProjection
-
-    proj = projCls(
-        input_layer_name=input.name,
-        num_filters=num_filters,
-        conv_conf=Conv(
-            filter_size=filter_size,
-            padding=padding,
-            stride=stride,
-            channels=num_channels,
-            filter_size_y=filter_size_y,
-            padding_y=padding_y,
-            stride_y=stride_y,
-            groups=groups),
-        **param_attr.attr)
-
-    proj.origin = input
-    return proj
-
-
-@wrap_name_default("pad")
-@layer_support()
-def pad_layer(input,
-              pad_c=None,
-              pad_h=None,
-              pad_w=None,
-              name=None,
-              layer_attr=None):
-    """
-    This operation pads zeros to the input data according to pad_c,pad_h
-    and pad_w. pad_c, pad_h, pad_w specify the size in the corresponding
-    dimension. And the input data shape is NCHW.
-
-    For example, pad_c=[2,3] means padding 2 zeros before the input data
-    and 3 zeros after the input data in the channel dimension. pad_h means
-    padding zeros in the height dimension. pad_w means padding zeros in the
-    width dimension.
-
-    For example,
-
-    .. code-block:: python
-
-       input(2,2,2,3)  = [
-                           [ [[1,2,3], [3,4,5]],
-                             [[2,3,5], [1,6,7]] ],
-                           [ [[4,3,1], [1,8,7]],
-                             [[3,8,9], [2,3,5]] ]
-                         ]
-
-       pad_c=[1,1], pad_h=[0,0], pad_w=[0,0]
-
-       output(2,4,2,3) = [
-                           [ [[0,0,0], [0,0,0]],
-                             [[1,2,3], [3,4,5]],
-                             [[2,3,5], [1,6,7]],
-                             [[0,0,0], [0,0,0]] ],
-                           [ [[0,0,0], [0,0,0]],
-                             [[4,3,1], [1,8,7]],
-                             [[3,8,9], [2,3,5]],
-                             [[0,0,0], [0,0,0]] ]
-                         ]
-
-    The simply usage is:
-
-    .. code-block:: python
-
-       pad = pad_layer(input=ipt,
-                       pad_c=[4,4],
-                       pad_h=[0,0],
-                       pad_w=[2,2])
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param pad_c: The padding size in the channel dimension.
-    :type pad_c: list | None
-    :param pad_h: The padding size in the height dimension.
-    :type pad_h: list | None
-    :param pad_w: The padding size in the width dimension.
-    :type pad_w: list | None
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    if pad_c is not None:
-        assert isinstance(pad_c, collections.Sequence) and len(pad_c) == 2
-    else:
-        pad_c = [0, 0]
-
-    if pad_h is not None:
-        assert isinstance(pad_h, collections.Sequence) and len(pad_h) == 2
-    else:
-        pad_h = [0, 0]
-
-    if pad_w is not None:
-        assert isinstance(pad_w, collections.Sequence) and len(pad_w) == 2
-    else:
-        pad_w = [0, 0]
-
-    assert input.num_filters is not None
-    in_ch = input.num_filters
-    out_ch = in_ch + pad_c[0] + pad_c[1]
-
-    l = Layer(
-        name=name,
-        type=LayerType.PAD_LAYER,
-        inputs=Input(
-            input.name,
-            pad=Pad(
-                channels=in_ch,
-                pad_c=pad_c,
-                pad_h=pad_h,
-                pad_w=pad_w, )),
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name,
-        layer_type=LayerType.PAD_LAYER,
-        parents=[input],
-        num_filters=out_ch,
-        size=l.config.size)
-
-
-@wrap_name_default()
-@layer_support()
-def conv_shift_layer(a, b, name=None, layer_attr=None):
-    """
-    This layer performs cyclic convolution on two inputs. For example:
-      - a[in]: contains M elements.
-      - b[in]: contains N elements (N should be odd).
-      - c[out]: contains M elements.
-
-    .. math::
-
-        c[i] = \sum_{j=-(N-1)/2}^{(N-1)/2}a_{i+j} * b_{j}
-
-    In this formula:
-     - a's index is computed modulo M. When it is negative, then get item from
-       the right side (which is the end of array) to the left.
-     - b's index is computed modulo N. When it is negative, then get item from
-       the right size (which is the end of array) to the left.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       conv_shift = conv_shift_layer(a=layer1, b=layer2)
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param a: The first input of this layer.
-    :type a: LayerOutput
-    :param b: The second input of this layer.
-    :type b: LayerOutput
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert isinstance(a, LayerOutput) and isinstance(b, LayerOutput)
-    assert b.size is None or b.size % 2 == 1  # size of b must be odd.
-    Layer(
-        name=name,
-        type=LayerType.CONV_SHIFT_LAYER,
-        inputs=[a.name, b.name],
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-
-    return LayerOutput(
-        name, LayerType.CONV_SHIFT_LAYER, parents=[a, b], size=a.size)
-
-
-@wrap_name_default()
-@wrap_param_attr_default()
-@wrap_bias_attr_default()
-@wrap_act_default(act=LinearActivation())
-@layer_support(ERROR_CLIPPING, DROPOUT)
-def tensor_layer(a,
-                 b,
-                 size,
-                 act=None,
-                 name=None,
-                 param_attr=None,
-                 bias_attr=None,
-                 layer_attr=None):
-    """
-    This layer performs tensor operation on two inputs.
-    For example:
-
-    .. math::
-       y_{i} = a * W_{i} * {b^\mathrm{T}}, i=0,1,...,K-1
-
-    In this formular:
-      - :math:`a`: the first input contains M elements.
-      - :math:`b`: the second input contains N elements.
-      - :math:`y_{i}`: the i-th element of y.
-      - :math:`W_{i}`: the i-th learned weight, shape if [M, N]
-      - :math:`b^\mathrm{T}`: the transpose of :math:`b_{2}`.
-
-    The simple usage is:
-
-    .. code-block:: python
-
-       tensor = tensor_layer(a=layer1, b=layer2, size=1000)
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param a: The first input of this layer.
-    :type a: LayerOutput
-    :param b: The second input of this layer.
-    :type b: LayerOutput
-    :param size: The dimension of this layer.
-    :type size: int
-    :param act: Activation type. LinearActivation is the default activation.
-    :type act: BaseActivation
-    :param param_attr: The parameter attribute. See ParameterAttribute for
-                       details.
-    :type param_attr: ParameterAttribute
-    :param bias_attr: The parameter attribute for bias. If this parameter is set to
-                      False or an object whose type is not ParameterAttribute,
-                      no bias is defined. If this parameter is set to True,
-                      the bias is initialized to zero.
-    :type bias_attr: ParameterAttribute | None | bool | Any
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute | None
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert isinstance(a, LayerOutput) and isinstance(b, LayerOutput)
-    Layer(
-        name=name,
-        size=size,
-        type=LayerType.TENSOR_LAYER,
-        active_type=act.name,
-        bias=ParamAttr.to_bias(bias_attr),
-        inputs=[Input(a.name, **param_attr.attr), Input(b.name)],
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name, LayerType.TENSOR_LAYER, parents=[a, b], activation=act, size=size)
-
-
-@wrap_name_default()
-@wrap_param_attr_default()
-@wrap_bias_attr_default()
-@wrap_act_default()
-@layer_support(DROPOUT, ERROR_CLIPPING)
-def selective_fc_layer(input,
-                       size,
-                       select=None,
-                       act=None,
-                       name=None,
-                       pass_generation=False,
-                       has_selected_colums=True,
-                       mul_ratio=0.02,
-                       param_attr=None,
-                       bias_attr=None,
-                       layer_attr=None):
-    """
-    Selectived fully connected layer. Different from fc_layer, the output
-    of this layer can be sparse. It requires an additional input to indicate
-    several selected columns for output. If the selected columns is not
-    specified, selective_fc_layer acts exactly like fc_layer.
-
-    The simple usage is:
-
-    .. code-block:: python
-
-       sel_fc = selective_fc_layer(input=input, size=128, act=TanhActivation())
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input of this layer.
-    :type input: LayerOutput | list | tuple
-    :param select: The layer to select columns to output. It should be a sparse
-                   binary matrix, and is treated as the mask of selective fc. If
-                   it is not set or set to None, selective_fc_layer acts exactly
-                   like fc_layer.
-    :type select: LayerOutput
-    :param size: The dimension of this layer, which should be equal to that of
-                 the layer 'select'.
-    :type size: int
-    :param act: Activation type. TanhActivation is the default activation.
-    :type act: BaseActivation
-    :param pass_generation: The flag which indicates whether it is during generation.
-    :type pass_generation: bool
-    :param has_selected_colums: The flag which indicates whether the parameter 'select'
-                                has been set. True is the default.
-    :type has_selected_colums: bool
-    :param mul_ratio: A ratio helps to judge how sparse the output is and determine
-                      the computation method for speed consideration.
-    :type mul_ratio: float
-    :param param_attr: The parameter attribute. See ParameterAttribute for
-                       details.
-    :type param_attr: ParameterAttribute
-    :param bias_attr: The parameter attribute for bias. If this parameter is set to
-                      False or an object whose type is not ParameterAttribute,
-                      no bias is defined. If this parameter is set to True,
-                      the bias is initialized to zero.
-    :type bias_attr: ParameterAttribute | None | bool | Any
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute | None
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    if isinstance(input, LayerOutput):
-        input = [input]
-        assert not isinstance(param_attr, collections.Sequence)
-        param_attr = [param_attr]
-    else:
-        if isinstance(param_attr, collections.Sequence):
-            assert len(input) == len(param_attr)
-        else:
-            if "parameter_name" in param_attr.attr and len(input) > 1:
-                logger.fatal(
-                    "When the name field of param_attr is manually specified "
-                    "and the input is a list, the param_attr should also be a "
-                    "list with each item being the param_attr for each input "
-                    "item. If only one named param_attr is provided, all the "
-                    "input items would share this parameter.")
-            param_attr = [copy.deepcopy(param_attr) for _ in range(len(input))]
-
-    assert isinstance(input, collections.Sequence)
-    assert isinstance(select, LayerOutput)
-    if select.size is not None:
-        assert select.size == size
-    Layer(
-        inputs=[
-            Input(ipt.name, **attr.attr) for ipt, attr in zip(input, param_attr)
-        ] + [select.name],
-        name=name,
-        type=LayerType.SEL_FC_LAYER,
-        size=size,
-        bias=ParameterAttribute.to_bias(bias_attr),
-        active_type=act.name,
-        selective_fc_pass_generation=pass_generation,
-        has_selected_colums=has_selected_colums,
-        selective_fc_full_mul_ratio=mul_ratio,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name,
-        LayerType.SEL_FC_LAYER,
-        list(input) + [select],
-        activation=act,
-        size=size)
-
-
-@wrap_name_default()
-@layer_support()
-def sampling_id_layer(input, name=None, layer_attr=None):
-    """
-    A layer for sampling id from a multinomial distribution from the input layer.
-    Sampling one id for one sample.
-
-    The simple usage is:
-
-    .. code-block:: python
-
-       samping_id = sampling_id_layer(input=input)
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    l = Layer(
-        name=name,
-        type=LayerType.SAMPLING_ID_LAYER,
-        inputs=[Input(input.name)],
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name, LayerType.SAMPLING_ID_LAYER, input, size=l.config.size)
-
-
-@wrap_name_default()
-@layer_support()
-def slope_intercept_layer(input,
-                          name=None,
-                          slope=1.0,
-                          intercept=0.0,
-                          layer_attr=None):
-    """
-    This layer for applying a slope and an intercept to the input.
-
-    ..  math::
-        y = slope * x + intercept
-
-    The simple usage is:
-
-    .. code-block:: python
-
-       scale = slope_intercept_layer(input=input, slope=-1.0, intercept=1.0)
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param slope: The scale factor.
-    :type slope: float
-    :param intercept: The offset.
-    :type intercept: float
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    Layer(
-        name=name,
-        type=LayerType.SLOPE_INTERCEPT_LAYER,
-        slope=slope,
-        intercept=intercept,
-        inputs=[Input(input.name)],
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name, LayerType.SLOPE_INTERCEPT_LAYER, input, size=input.size)
-
-
-@wrap_name_default()
-@layer_support()
-def linear_comb_layer(weights, vectors, size=None, name=None, layer_attr=None):
-    """
-    A layer for weighted sum of vectors takes two inputs.
-      - Input: size of weights is M
-               size of vectors is M*N
-      - Output: a vector of size=N
-
-    .. math::
-
-       z(i) = \sum_{j=0}^{M-1} x(j) y(i+Nj)
-
-    where :math:`0 \le i \le N-1`
-
-    Or in the matrix notation:
-
-    .. math::
-
-       z = x^\mathrm{T} Y
-
-    In this formular:
-      - :math:`x`: weights
-      - :math:`y`: vectors.
-      - :math:`z`: the output.
-
-    Note that the above computation is for one sample. Multiple samples are
-    processed in one batch.
-
-    The simple usage is:
-
-    .. code-block:: python
-
-       linear_comb = linear_comb_layer(weights=weight, vectors=vectors,
-                                       size=elem_dim)
-
-    :param weights: The weight layer.
-    :type weights: LayerOutput
-    :param vectors: The vector layer.
-    :type vectors: LayerOutput
-    :param size: The dimension of this layer.
-    :type size: int
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert isinstance(weights, LayerOutput) and isinstance(vectors, LayerOutput)
-    if vectors.size is not None and weights.size is not None:
-        assert vectors.size % weights.size == 0
-        if size is None:
-            size = vectors.size / weights.size
-        else:
-            assert size == vectors.size / weights.size
-    Layer(
-        name=name,
-        type=LayerType.LINEAR_COMBINATION_LAYER,
-        size=size,
-        inputs=[Input(weights.name), Input(vectors.name)],
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name, LayerType.LINEAR_COMBINATION_LAYER, [weights, vectors], size=size)
-
-
-convex_comb_layer = linear_comb_layer
-
-
-@wrap_name_default()
-@layer_support()
-def block_expand_layer(input,
-                       block_x=0,
-                       block_y=0,
-                       stride_x=0,
-                       stride_y=0,
-                       padding_x=0,
-                       padding_y=0,
-                       num_channels=None,
-                       name=None,
-                       layer_attr=None):
-    """
-    Expand feature map to minibatch matrix.
-       - matrix width is: block_y * block_x * num_channels
-       - matirx height is: outputH * outputW
-
-    .. math::
-
-       outputH = 1 + (2 * padding_y + imgSizeH - block_y + stride_y - 1) / stride_y
-
-       outputW = 1 + (2 * padding_x + imgSizeW - block_x + stride_x - 1) / stride_x
-
-    The expanding method is the same with ExpandConvLayer, but saved the transposed
-    value. After expanding, output.sequenceStartPositions will store timeline.
-    The number of time steps is outputH * outputW and the dimension of each
-    time step is block_y * block_x * num_channels. This layer can be used after
-    convolutional neural network, and before recurrent neural network.
-
-    The simple usage is:
-
-    .. code-block:: python
-
-       block_expand = block_expand_layer(input=layer,
-                                         num_channels=128,
-                                         stride_x=1,
-                                         stride_y=1,
-                                         block_x=1,
-                                         block_x=3)
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param num_channels: The number of input channels. If the parameter is not set or
-                         set to None, its actual value will be automatically set to
-                         the channels number of the input.
-    :type num_channels: int
-    :param block_x: The width of sub block.
-    :type block_x: int
-    :param block_y: The width of sub block.
-    :type block_y: int
-    :param stride_x: The stride size in horizontal direction.
-    :type stride_x: int
-    :param stride_y: The stride size in vertical direction.
-    :type stride_y: int
-    :param padding_x: The padding size in horizontal direction.
-    :type padding_x: int
-    :param padding_y: The padding size in vertical direction.
-    :type padding_y: int
-    :param name: The name of this layer. It is optional.
-    :type name: basestring.
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    if num_channels is None:
-        assert input.num_filters is not None
-        num_channels = input.num_filters
-    l = Layer(
-        name=name,
-        inputs=Input(
-            input.name,
-            block_expand=BlockExpand(
-                channels=num_channels,
-                block_x=block_x,
-                block_y=block_y,
-                stride_x=stride_x,
-                stride_y=stride_y,
-                padding_x=padding_x,
-                padding_y=padding_y)),
-        type=LayerType.BLOCK_EXPAND,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-
-    return LayerOutput(
-        name, LayerType.BLOCK_EXPAND, parents=[input], size=l.config.size)
-
-
-@wrap_name_default()
-@layer_support()
-def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None):
-    """
-    A layer to do max out on convolutional layer output.
-      - Input: the output of a convolutional layer.
-      - Output: feature map size same as the input's, and its channel number is
-        (input channel) / groups.
-
-    So groups should be larger than 1, and the num of channels should be able
-    to be devided by groups.
-
-    Reference:
-        `Maxout Networks
-        <http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf>`_
-        `Multi-digit Number Recognition from Street View Imagery using Deep Convolutional Neural Networks
-        <https://arxiv.org/pdf/1312.6082v4.pdf>`_
-
-
-    .. math::
-
-       & out = \max_k (in[n, k, o_c , s])
-
-       & out_{i * s + j} = \max_k in_{  k * o_{c} * s + i * s + j}
-
-       & s = \\frac{input.size}{ num\_channels}
-
-       & o_{c} = \\frac{num\_channels}{groups}
-
-       & 0 \le i < o_{c}
-
-       & 0 \le j < s
-
-       & 0 \le k < groups
-
-
-    The simple usage is:
-
-    .. code-block:: python
-
-       maxout = maxout_layer(input,
-                             num_channels=128,
-                             groups=4)
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param num_channels: The number of input channels. If the parameter is not set or
-                         set to None, its actual value will be automatically set to
-                         the channels number of the input.
-    :type num_channels: int
-    :param groups: The group number of input layer.
-    :type groups: int
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert isinstance(input.activation, LinearActivation)
-    assert groups > 1
-    if num_channels is None:
-        assert input.num_filters is not None
-        num_channels = input.num_filters
-    assert num_channels % groups == 0
-    l = Layer(
-        name=name,
-        inputs=Input(
-            input.name, maxout=MaxOut(
-                channels=num_channels, groups=groups)),
-        type=LayerType.MAXOUT,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name, LayerType.MAXOUT, parents=[input], size=l.config.size)
-
-
-@wrap_name_default()
-@layer_support()
-def ctc_layer(input,
-              label,
-              size=None,
-              name=None,
-              norm_by_times=False,
-              layer_attr=None):
-    """
-    Connectionist Temporal Classification (CTC) is designed for temporal
-    classication task. e.g. sequence labeling problems where the
-    alignment between the inputs and the target labels is unknown.
-
-    Reference:
-        `Connectionist Temporal Classification: Labelling Unsegmented Sequence Data
-        with Recurrent Neural Networks
-        <http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf>`_
-
-    Note:
-        Considering the 'blank' label needed by CTC, you need to use (num_classes + 1)
-        as the size of the input, where num_classes is the category number.
-        And the 'blank' is the last category index. So the size of 'input' layer (e.g.
-        fc_layer with softmax activation) should be (num_classes + 1). The size of
-        ctc_layer should also be (num_classes + 1).
-
-    The example usage is:
-
-    .. code-block:: python
-
-      ctc = ctc_layer(input=input,
-                      label=label,
-                      size=9055,
-                      norm_by_times=True)
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param label: The input label.
-    :type label: LayerOutput
-    :param size: The dimension of this layer, which must be equal to (category number + 1).
-    :type size: int
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param norm_by_times: Whether to do normalization by times. False is the default.
-    :type norm_by_times: bool
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert isinstance(input, LayerOutput)
-    assert isinstance(label, LayerOutput)
-    if label.size is not None:
-        if size is not None:
-            assert size == label.size + 1
-        else:
-            size = label.size + 1
-    Layer(
-        name=name,
-        type=LayerType.CTC_LAYER,
-        size=size,
-        norm_by_times=norm_by_times,
-        inputs=[input.name, label.name],
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(name, LayerType.CTC_LAYER, [input, label], size=size)
-
-
-@wrap_name_default()
-@layer_support()
-def warp_ctc_layer(input,
-                   label,
-                   size=None,
-                   name=None,
-                   blank=0,
-                   norm_by_times=False,
-                   layer_attr=None):
-    """
-    A layer intergrating the open-source `warp-ctc
-    <https://github.com/baidu-research/warp-ctc>`_ library, which is used in
-    `Deep Speech 2: End-toEnd Speech Recognition in English and Mandarin
-    <https://arxiv.org/pdf/1512.02595v1.pdf>`_, to compute Connectionist Temporal
-    Classification (CTC) loss. Besides, another `warp-ctc repository
-    <https://github.com/gangliao/warp-ctc>`_ , which is forked from
-    the official one, is maintained to enable more compiling options. During the
-    building process, PaddlePaddle will clone the source codes, build and
-    install it to :code:`third_party/install/warpctc` directory.
-
-    Reference:
-        `Connectionist Temporal Classification: Labelling Unsegmented Sequence Data
-        with Recurrent Neural Networks
-        <http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf>`_
-
-    Note:
-        - Let num_classes represents the category number. Considering the 'blank'
-          label needed by CTC, you need to use (num_classes + 1) as the size of
-          warp_ctc layer.
-        - You can set 'blank' to any value ranged in [0, num_classes], which
-          should be consistent with those used in your labels.
-        - As a native 'softmax' activation is interated to the warp-ctc library,
-          'linear' activation is expected to be used instead in the 'input' layer.
-
-    The example usage is:
-
-    .. code-block:: python
-
-      ctc = warp_ctc_layer(input=input,
-                           label=label,
-                           size=1001,
-                           blank=1000,
-                           norm_by_times=False)
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param label: The input label.
-    :type label: LayerOutput
-    :param size: The dimension of this layer, which must be equal to (category number + 1).
-    :type size: int
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param blank: The 'blank' label used in ctc.
-    :type blank: int
-    :param norm_by_times: Whether to do normalization by times. False is the default.
-    :type norm_by_times: bool
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert isinstance(input, LayerOutput)
-    assert isinstance(label, LayerOutput)
-    if label.size is not None:
-        if size is not None:
-            assert size == label.size + 1
-        else:
-            size = label.size + 1
-    Layer(
-        name=name,
-        type=LayerType.WARP_CTC_LAYER,
-        size=size,
-        blank=blank,
-        norm_by_times=norm_by_times,
-        inputs=[input.name, label.name],
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name, LayerType.WARP_CTC_LAYER, parents=[input, label], size=size)
-
-
-@wrap_name_default()
-@wrap_param_attr_default()
-@layer_support()
-def crf_layer(input,
-              label,
-              size=None,
-              weight=None,
-              param_attr=None,
-              name=None,
-              coeff=1.0,
-              layer_attr=None):
-    """
-    A layer for calculating the cost of sequential conditional random
-    field model.
-
-    The example usage is:
-
-    .. code-block:: python
-
-      crf = crf_layer(input=input,
-                      label=label,
-                      size=label_dim)
-
-    :param input: The first input layer.
-    :type input: LayerOutput
-    :param label: The input label.
-    :type label: LayerOutput
-    :param size: The category number.
-    :type size: int
-    :param weight: The weight layer defines a weight for each sample in the
-                   mini-batch. It is optional.
-    :type weight: LayerOutput
-    :param param_attr: The parameter attribute. See ParameterAttribute for
-                       details.
-    :type param_attr: ParameterAttribute
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param coeff: The weight of the gradient in the back propagation.
-                  1.0 is the default value.
-    :type coeff: float
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert isinstance(input, LayerOutput)
-    assert isinstance(label, LayerOutput)
-    assert weight is None or isinstance(weight, LayerOutput)
-    if input.size is not None and label.size is not None:
-        assert input.size == label.size
-        if size is None:
-            size = input.size
-        else:
-            assert size == input.size
-
-    ipts = [Input(input.name, **param_attr.attr), Input(label.name)]
-    if weight is not None:
-        ipts.append(Input(weight.name))
-
-    Layer(
-        name=name,
-        type=LayerType.CRF_LAYER,
-        size=size,
-        inputs=ipts,
-        coeff=coeff,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    parents = [input, label]
-    if weight is not None:
-        parents.append(weight)
-    # The size for LayerOutput means the dimension of the output.
-    # It's different from the meaning of crf layer, which is the number of
-    # classes.
-    return LayerOutput(name, LayerType.CRF_LAYER, parents, size=1)
-
-
-@wrap_name_default()
-@wrap_param_attr_default()
-@layer_support()
-def crf_decoding_layer(input,
-                       size,
-                       label=None,
-                       param_attr=None,
-                       name=None,
-                       layer_attr=None):
-    """
-    A layer for calculating the decoding sequence of sequential conditional
-    random field model. The decoding sequence is stored in output.ids.
-    If the input 'label' is provided, it is treated as the ground-truth label, and
-    this layer will also calculate error. output.value[i] is 1 for an incorrect
-    decoding and 0 for the correct.
-
-    The example usage is:
-
-    .. code-block:: python
-
-      crf_decoding = crf_decoding_layer(input=input,
-                                        size=label_dim)
-
-    :param input: The first input layer.
-    :type input: LayerOutput
-    :param size: The dimension of this layer.
-    :type size: int
-    :param label: The input label.
-    :type label: LayerOutput | None
-    :param param_attr: The parameter attribute. See ParameterAttribute for
-                       details.
-    :type param_attr: ParameterAttribute
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-
-    assert isinstance(input, LayerOutput)
-    assert label is None or isinstance(label, LayerOutput)
-
-    ipts = [Input(input.name, **param_attr.attr)]
-    if label is not None:
-        ipts.append(Input(label.name))
-
-    Layer(
-        name=name,
-        type=LayerType.CRF_DECODING_LAYER,
-        size=size,
-        inputs=ipts,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    parents = [input]
-    if label is not None:
-        parents.append(label)
-    # The size for LayerOutput means the dimension of the output.
-    # It's different from the meaning of crf layer, which is the number of
-    # classes.
-    return LayerOutput(name, LayerType.CRF_DECODING_LAYER, parents, size=1)
-
-
-"""
-Following are cost Layers.
-"""
-
-
-@wrap_bias_attr_default(has_bias=True)
-@wrap_param_attr_default()
-@wrap_name_default()
-@layer_support()
-def nce_layer(input,
-              label,
-              num_classes=None,
-              param_attr=None,
-              weight=None,
-              num_neg_samples=10,
-              neg_distribution=None,
-              name=None,
-              bias_attr=None,
-              layer_attr=None):
-    """
-    Noise-contrastive estimation.
-
-    Reference:
-        `A fast and simple algorithm for training neural probabilistic language
-        models. <https://www.cs.toronto.edu/~amnih/papers/ncelm.pdf>`_
-
-    The example usage is:
-
-    .. code-block:: python
-
-       cost = nce_layer(input=[layer1, layer2], label=layer2,
-                        param_attr=[attr1, attr2], weight=layer3,
-                        num_classes=3, neg_distribution=[0.1,0.3,0.6])
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The first input of this layer.
-    :type input: LayerOutput | list | tuple | collections.Sequence
-    :param label: The input label.
-    :type label: LayerOutput
-    :param weight: The weight layer defines a weight for each sample in the
-                   mini-batch. It is optional.
-    :type weight: LayerOutput
-    :param num_classes: The number of classes.
-    :type num_classes: int
-    :param act: Activation type. SigmoidActivation is the default activation.
-    :type act: BaseActivation
-    :param param_attr: The parameter attribute. See ParameterAttribute for
-                       details.
-    :type param_attr: ParameterAttribute
-    :param num_neg_samples: The number of sampled negative labels. 10 is the
-                            default value.
-    :type num_neg_samples: int
-    :param neg_distribution: The discrete noisy distribution over the output
-                             space from which num_neg_samples negative labels
-                             are sampled. If this parameter is not set, a
-                             uniform distribution will be used. A user-defined
-                             distribution is a list whose length must be equal
-                             to the num_classes. Each member of the list defines
-                             the probability of a class given input x.
-    :type neg_distribution: list | tuple | collections.Sequence | None
-    :param bias_attr: The parameter attribute for bias. If this parameter is set to
-                      False or an object whose type is not ParameterAttribute,
-                      no bias is defined. If this parameter is set to True,
-                      the bias is initialized to zero.
-    :type bias_attr: ParameterAttribute | None | bool | Any
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    if isinstance(input, LayerOutput):
-        input = [input]
-        assert not isinstance(param_attr, collections.Sequence)
-        param_attr = [param_attr]
-    else:
-        if isinstance(param_attr, collections.Sequence):
-            assert len(input) == len(param_attr)
-        else:
-            param_attr = [copy.deepcopy(param_attr) for _ in range(len(input))]
-
-    assert isinstance(input, collections.Sequence)
-
-    assert isinstance(label, LayerOutput)
-    assert label.layer_type == LayerType.DATA
-    if num_classes is None:
-        num_classes = label.size
-    if neg_distribution is not None:
-        assert isinstance(neg_distribution, collections.Sequence)
-        assert len(neg_distribution) == num_classes
-        assert abs(sum(neg_distribution) - 1.0) < 1e-5
-
-    ipts_for_layer = []
-    parents = []
-    for each_input, attr in zip(input, param_attr):
-        assert isinstance(each_input, LayerOutput)
-        ipts_for_layer.append(Input(each_input.name, **attr.attr))
-        parents.append(each_input)
-    ipts_for_layer.append(label.name)
-    parents.append(label)
-
-    if weight is not None:
-        assert isinstance(weight, LayerOutput)
-        assert weight.layer_type == LayerType.DATA
-        ipts_for_layer.append(weight.name)
-        parents.append(weight)
-
-    l = Layer(
-        name=name,
-        type=LayerType.NCE_LAYER,
-        num_classes=num_classes,
-        neg_sampling_dist=neg_distribution,
-        active_type=SigmoidActivation().name,
-        num_neg_samples=num_neg_samples,
-        inputs=ipts_for_layer,
-        bias=ParamAttr.to_bias(bias_attr),
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name,
-        LayerType.NCE_LAYER,
-        parents=parents,
-        size=l.config.size,
-        activation=SigmoidActivation())
-
-
-@wrap_name_default()
-@layer_support()
-def rank_cost(left,
-              right,
-              label,
-              weight=None,
-              name=None,
-              coeff=1.0,
-              layer_attr=None):
-    """
-    A cost Layer for learning to rank using gradient descent.
-
-    Reference:
-        `Learning to Rank using Gradient Descent
-        <http://research.microsoft.com/en-us/um/people/cburges/papers/ICML_ranking.pdf>`_
-
-    .. math::
-
-       C_{i,j} & = -\\tilde{P_{ij}} * o_{i,j} + log(1 + e^{o_{i,j}})
-
-       o_{i,j} & =  o_i - o_j
-
-       \\tilde{P_{i,j}} & = \\{0, 0.5, 1\\} \ or \ \\{0, 1\\}
-
-    In this formula:
-      - :math:`C_{i,j}` is the cross entropy cost.
-      - :math:`\\tilde{P_{i,j}}` is the label. 1 means positive order
-        and 0 means reverse order.
-      - :math:`o_i` and :math:`o_j`: the left output and right output.
-        Their dimension is one.
-
-    The example usage is:
-
-    .. code-block:: python
-
-      cost = rank_cost(left=out_left,
-                       right=out_right,
-                       label=label)
-
-    :param left: The first input, the size of this layer is 1.
-    :type left: LayerOutput
-    :param right: The right input, the size of this layer is 1.
-    :type right: LayerOutput
-    :param label: Label is 1 or 0, means positive order and reverse order.
-    :type label: LayerOutput
-    :param weight: The weight layer defines a weight for each sample in the
-                   mini-batch. It is optional.
-    :type weight: LayerOutput
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param coeff: The weight of the gradient in the back propagation.
-                  1.0 is the default value.
-    :type coeff: float
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert left.size == 1
-    assert right.size == 1
-    assert label.size == 1
-
-    ipts = [left.name, right.name, label.name]
-    parents = [left, right, label]
-    if weight is not None:
-        ipts.append(weight.name)
-        parents.append(weight)
-
-    Layer(
-        name=name,
-        type=LayerType.RANK_COST,
-        inputs=ipts,
-        coeff=coeff,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-
-    return LayerOutput(name, LayerType.RANK_COST, parents=parents, size=1)
-
-
-@wrap_name_default()
-@layer_support()
-def lambda_cost(input,
-                score,
-                name,
-                NDCG_num=5,
-                max_sort_size=-1,
-                layer_attr=None):
-    """
-    lambdaCost for lambdaRank LTR approach.
-
-    The example usage is:
-
-    .. code-block:: python
-
-      cost = lambda_cost(input=input,
-                         score=score,
-                         NDCG_num=8,
-                         max_sort_size=-1)
-
-    :param input: The first input of this layer, which is often a document
-                  samples list of the same query and whose type must be sequence.
-    :type input: LayerOutput
-    :param score: The scores of the samples.
-    :type input: LayerOutput
-    :param NDCG_num: The size of NDCG (Normalized Discounted Cumulative Gain),
-                     e.g., 5 for NDCG@5. It must be less than or equal to the
-                     minimum size of the list.
-    :type NDCG_num: int
-    :param max_sort_size: The size of partial sorting in calculating gradient. If
-                          max_sort_size is equal to -1 or greater than the number
-                          of the samples in the list, then the algorithm will sort
-                          the entire list to compute the gradient. In other cases,
-                          max_sort_size must be greater than or equal to NDCG_num.
-    :type max_sort_size: int
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert isinstance(input, LayerOutput) and isinstance(score, LayerOutput)
-    if score.size is not None:
-        assert score.size == 1
-    Layer(
-        name=name,
-        type=LayerType.LAMBDA_COST,
-        inputs=[input.name, score.name],
-        NDCG_num=NDCG_num,
-        max_sort_size=max_sort_size,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-
-    return LayerOutput(
-        name, LayerType.LAMBDA_COST, parents=[input, score], size=1)
-
-
-@wrap_name_default()
-@layer_support()
-def cross_entropy(input,
-                  label,
-                  name=None,
-                  coeff=1.0,
-                  weight=None,
-                  layer_attr=None):
-    """
-    A loss layer for multi class entropy.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       cost = cross_entropy(input=input_layer,
-                            label=label_layer)
-
-    :param input: The first input layer.
-    :type input: LayerOutput.
-    :param label: The input label.
-    :type input: LayerOutput
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param coeff: The weight of the gradient in the back propagation.
-                  1.0 is the default value.
-    :type coeff: float
-    :param weight: The weight layer defines a weight for each sample in the
-                   mini-batch. It is optional.
-    :type weight: LayerOutout
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-
-    ipts, parents = __cost_input__(input, label, weight)
-    Layer(
-        name=name,
-        type=LayerType.CROSS_ENTROPY,
-        inputs=ipts,
-        coeff=coeff,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=parents, size=1)
-
-
-@wrap_name_default()
-@layer_support()
-def cross_entropy_with_selfnorm(input,
-                                label,
-                                name=None,
-                                coeff=1.0,
-                                softmax_selfnorm_alpha=0.1,
-                                layer_attr=None):
-    """
-    A loss layer for multi class entropy with selfnorm.
-    Input should be a vector of positive numbers, without normalization.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       cost = cross_entropy_with_selfnorm(input=input_layer,
-                                          label=label_layer)
-
-    :param input: The first input layer.
-    :type input: LayerOutput
-    :param label: The input label.
-    :type input: LayerOutput
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param coeff: The weight of the gradient in the back propagation.
-                  1.0 is the default value.
-    :type coeff: float
-    :param softmax_selfnorm_alpha: The scale factor affects the cost.
-    :type softmax_selfnorm_alpha: float
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    Layer(
-        name=name,
-        type=LayerType.CROSS_ENTROPY_WITH_SELFNORM,
-        inputs=[input.name, label.name],
-        coeff=coeff,
-        softmax_selfnorm_alpha=softmax_selfnorm_alpha,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-
-    return LayerOutput(
-        name,
-        LayerType.CROSS_ENTROPY_WITH_SELFNORM,
-        parents=[input, label],
-        size=1)
-
-
-@wrap_name_default()
-@layer_support()
-def sum_cost(input, name=None, layer_attr=None):
-    """
-    A loss layer which calculates the sum of the input as loss.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       cost = sum_cost(input=input_layer)
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput.
-    """
-    assert isinstance(input, LayerOutput)
-    Layer(
-        name=name,
-        type=LayerType.SUM_COST,
-        inputs=[input.name],
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-
-    return LayerOutput(name, LayerType.SUM_COST, parents=[input], size=1)
-
-
-@wrap_name_default()
-@layer_support()
-def huber_regression_cost(input,
-                          label,
-                          name=None,
-                          delta=1.0,
-                          coeff=1.0,
-                          layer_attr=None):
-    """
-    In statistics, the Huber loss is a loss function used in robust regression,
-    that is less sensitive to outliers in data than the squared error loss.
-    Given a prediction f(x), a label y and :math:`\delta`, the loss function
-    is defined as:
-
-    .. math::
-
-       loss = 0.5*(y-f(x))^{2}, | y-f(x) | < \delta
-
-       loss = \delta | y-f(x) | - 0.5 \delta ^2, otherwise
-
-    The example usage is:
-
-    .. code-block:: python
-
-       cost = huber_regression_cost(input=input_layer, label=label_layer)
-
-    :param input: The first input layer.
-    :type input: LayerOutput
-    :param label: The input label.
-    :type input: LayerOutput
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param delta: The difference between the observed and predicted values.
-    :type delta: float
-    :param coeff: The weight of the gradient in the back propagation.
-                  1.0 is the default value.
-    :type coeff: float
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput.
-    """
-    assert isinstance(input, LayerOutput)
-    Layer(
-        name=name,
-        type=LayerType.HUBER_REGRESSION,
-        inputs=[input.name, label.name],
-        delta=delta,
-        coeff=coeff,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name, LayerType.HUBER_REGRESSION, parents=[input, label], size=1)
-
-
-@wrap_name_default()
-@layer_support()
-def huber_classification_cost(input,
-                              label,
-                              name=None,
-                              coeff=1.0,
-                              layer_attr=None):
-    """
-    For classification purposes, a variant of the Huber loss called modified Huber
-    is sometimes used. Given a prediction f(x) (a real-valued classifier score) and
-    a true binary class label :math:`y\in \{-1, 1 \}`, the modified Huber
-    loss is defined as:
-
-    .. math:
-
-       loss = \max ( 0, 1-yf(x) )^2, yf(x) \geq -1
-
-       loss = -4yf(x), otherwise
-
-    The example usage is:
-
-    .. code-block:: python
-
-       cost = huber_classification_cost(input=input_layer, label=label_layer)
-
-    :param input: The first input layer.
-    :type input: LayerOutput
-    :param label: The input label.
-    :type input: LayerOutput
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param coeff: The weight of the gradient in the back propagation.
-                  1.0 is the default value.
-    :type coeff: float
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert isinstance(input, LayerOutput)
-    if input.size is not None:
-        assert input.size == 1
-    Layer(
-        name=name,
-        type=LayerType.HUBER_CLASSIFICATION,
-        inputs=[input.name, label.name],
-        coeff=coeff,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name, LayerType.HUBER_CLASSIFICATION, parents=[input, label], size=1)
-
-
-@wrap_name_default()
-@layer_support()
-def multi_binary_label_cross_entropy(input,
-                                     label,
-                                     name=None,
-                                     coeff=1.0,
-                                     layer_attr=None):
-    """
-    A loss layer for multi binary label cross entropy.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       cost = multi_binary_label_cross_entropy(input=input_layer,
-                                               label=label_layer)
-
-    :param input: The first input layer.
-    :type input: LayerOutput
-    :param label: The input label.
-    :type input: LayerOutput
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param coeff: The weight of the gradient in the back propagation.
-                  1.0 is the default value.
-    :type coeff: float
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-
-    if input.activation is None or \
-            not isinstance(input.activation, SigmoidActivation):
-        logger.log(logging.WARN,
-                   ("%s is not a recommended activation for "
-                    "multi_binary_label_cross_entropy, sigmoid is better") %
-                   repr(input.activation))
-
-    Layer(
-        name=name,
-        type=LayerType.MULTI_BIN_LABEL_CROSS_ENTROPY,
-        inputs=[input.name, label.name],
-        coeff=coeff,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name,
-        LayerType.MULTI_BIN_LABEL_CROSS_ENTROPY,
-        parents=[input, label],
-        size=1)
-
-
-class BeamInput(object):
-    """
-    Define the input for cross_entropy_over_beam layer.
-
-    A beam is made up of a triple: the first one is scores over all
-    candidates; the second one is indices of top k selected candidates; the
-    third one is the index of ground truth, which is also always called
-    gold.
-    """
-
-    def __init__(self, candidate_scores, selected_candidates, gold):
-        assert isinstance(candidate_scores, LayerOutput)
-        self.candidate_scores = candidate_scores
-        assert candidate_scores.size == 1
-
-        assert isinstance(selected_candidates, LayerOutput)
-        self.selected_candidates = selected_candidates
-
-        assert isinstance(gold, LayerOutput)
-        self.gold = gold
-
-
-@wrap_name_default()
-@layer_support()
-def cross_entropy_over_beam(input, name=None):
-    """
-    This layer is used in learning to search models, which is to solve complex
-    joint prediction problems based on learning to search through a
-    problem-defined search space.
-
-    Specifically, the learning to search process for this layer begins with
-    searching a target sequence from a nested sequence. In the first search
-    step, top beam size sequences with highest scores, indices of these top k
-    sequences in the original nested sequence, and the ground truth (also
-    called gold) altogether (a triple) make up of the first beam.
-
-    Then, several special positions, for example, start and end positions
-    that define meaningful segments are searched. In these searches, top k
-    positions with highest scores are selected, and then sequence, starting
-    from the selected starts till ends of the sequences (or a fixed position)
-    are taken to search next.
-
-    We call the possible top k results returned in one search the beam. This
-    search process can be repeated for pre-defined turns and leads to several
-    beam expansions.
-
-    Finally, the layer cross_entropy_over_beam takes all the beam expansions
-    which contain several candidate targets found along the multi-step search.
-    cross_entropy_over_beam calculates cross entropy over the expanded beams
-    which all the candidates in the beam as the normalized factor.
-
-    Note that, if gold falls off the beam at search step t, then the cost is
-    calculated over the beam at step t.
-
-    This cost layer always works together with kmax_seq_score_layer,
-    sub_nested_seq_layer, and sequence_slice_layer to trim the input to form a
-    sub-search space.
-
-
-    The example usage is:
-
-    .. code-block:: python
-
-       cost = cross_entropy_over_beam(input=[
-           BeamInput(
-               candidate_scores=beam1_candidates,
-               selected_candidates=beam1_topk,
-               gold=gold1),
-           BeamInput(
-               candidate_scores=beam2_candidates,
-               selected_candidates=beam2_topk,
-               gold=gold2),
-       ])
-
-
-    :param input: Input beams for this layer.
-    :type input: BeamInput
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-
-    if isinstance(input, BeamInput):
-        input = [input]
-    else:
-        assert isinstance(input, list), (
-            'input for cross_entropy_over_beam shold be a python list '
-            'of BeamInput object.')
-        for ipt in input:
-            assert isinstance(ipt, BeamInput), (
-                'input for cross_entropy_over_beam '
-                'should be a BeamInput object.')
-
-    ipts = []
-    parents = []
-    for beam in input:
-        parents += [beam.candidate_scores, beam.selected_candidates, beam.gold]
-        ipts += [
-            beam.candidate_scores.name, beam.selected_candidates.name,
-            beam.gold.name
-        ]
-
-    Layer(name=name, type=LayerType.CROSS_ENTROPY_OVER_BEAM, inputs=ipts)
-    return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=parents, size=1)
-
-
-@wrap_name_default()
-@layer_support()
-def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None):
-    """
-    This is a L1 loss but more smooth. It requires that the
-    sizes of input and label are equal. The formula is as follows,
-
-    .. math::
-
-        L = \sum_{i} smooth_{L1}(input_i - label_i)
-
-    in which
-
-    .. math::
-
-        smooth_{L1}(x) = \\begin{cases} 0.5x^2& \\text{if}  \\ |x| < 1 \\\\ |x|-0.5& \\text{otherwise} \end{cases}
-
-    Reference:
-        `Fast R-CNN
-        <https://arxiv.org/pdf/1504.08083v2.pdf>`_
-
-    The example usage is:
-
-    .. code-block:: python
-
-       cost = smooth_l1_cost(input=input_layer,
-                             label=label_layer)
-
-    :param input: The input layer.
-    :type input: LayerOutput
-    :param label: The input label.
-    :type input: LayerOutput
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param coeff: The weight of the gradient in the back propagation.
-                  1.0 is the default value.
-    :type coeff: float
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert isinstance(input, LayerOutput)
-    assert isinstance(label, LayerOutput)
-    assert input.size == label.size
-
-    Layer(
-        name=name,
-        type=LayerType.SMOOTH_L1,
-        inputs=[input.name, label.name],
-        coeff=coeff,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name, LayerType.SMOOTH_L1, parents=[input, label], size=1)
-
-
-@wrap_name_default()
-def multiplex_layer(input, name=None, layer_attr=None):
-    """
-    This layer multiplex multiple layers according to the indexes,
-    which are provided by the first input layer.
-    inputs[0]: the indexes of the layers to form the output of size batchSize.
-    inputs[1:N]; the candidate output data.
-    For each index i from 0 to batchSize - 1, the i-th row of the output is the
-    the same to the i-th row of the (index[i] + 1)-th layer.
-
-    For each i-th row of output:
-    .. math::
-        y[i][j] = x_{x_{0}[i] + 1}[i][j], j = 0,1, ... , (x_{1}.width - 1)
-
-    where, y is output. :math:`x_{k}` is the k-th input layer and
-    :math:`k = x_{0}[i] + 1`.
-
-    The example usage is:
-
-    .. code-block:: python
-
-       maxid = multiplex_layer(input=layers)
-
-    :param input: Input layers.
-    :type input: list of LayerOutput
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute.
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-
-    assert isinstance(input, collections.Sequence)
-    assert len(input) > 2, 'multiplex_layer should have more than 2 inputs'
-    for i in range(1, len(input)):
-        assert isinstance(input[i], LayerOutput)
-        assert input[i].size == input[1].size, \
-            "All the input layers except the first one should have the same size"
-
-    l = Layer(
-        name=name,
-        type='multiplex',
-        inputs=[x.name for x in input],
-        size=input[1].size,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name=name,
-        layer_type=LayerType.MULTIPLEX_LAYER,
-        parents=input,
-        size=l.config.size)
-
-
-@wrap_name_default("dropout")
-def dropout_layer(input, dropout_rate, name=None):
-    """
-
-    The example usage is:
-
-    .. code-block:: python
-
-        dropout = dropout_layer(input=input_layer, dropout_rate=0.5)
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param dropout_rate: The probability of dropout.
-    :type dropout_rate: float
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    return addto_layer(
-        name=name,
-        input=input,
-        act=LinearActivation(),
-        bias_attr=False,
-        layer_attr=ExtraAttr(drop_rate=dropout_rate))
-
-
-@wrap_name_default()
-@wrap_act_default(act=LinearActivation())
-@wrap_param_attr_default()
-@layer_support(DROPOUT)
-def row_conv_layer(input,
-                   context_len,
-                   act=None,
-                   name=None,
-                   param_attr=None,
-                   layer_attr=None):
-    """
-
-    The row convolution is called lookahead convolution. It is firstly
-    introduced in paper of `Deep Speech 2: End-to-End Speech Recognition
-    in English and Mandarin <https://arxiv.org/pdf/1512.02595v1.pdf>`_ .
-
-    The bidirectional RNN that learns representation for a sequence by
-    performing a forward and a backward pass through the entire sequence.
-    However, unlike unidirectional RNNs, bidirectional RNNs are challenging
-    to deploy in an online and low-latency setting. The lookahead convolution
-    incorporates information from future subsequences in a computationally
-    efficient manner to improve unidirectional RNNs.
-
-    The connection of row convolution is different from the 1D sequence
-    convolution. Assumed that, the future context-length is k, that is to say,
-    it can get the output at timestep t by using the the input feature from t-th
-    timestep to (t+k+1)-th timestep. Assumed that the hidden dim of input
-    activations are d, the activations r_t for the new layer at time-step t are:
-
-    .. math::
-
-        r_{t,r} = \sum_{j=1}^{k + 1} {w_{i,j}h_{t+j-1, i}}
-                  \quad \\text{for} \quad  (1 \leq i \leq d)
-
-    Note:
-        The `context_len` is `k + 1`. That is to say, the lookahead step
-        number plus one equals context_len.
-
-
-    .. code-block:: python
-
-       row_conv = row_conv_layer(input=input_layer, context_len=3)
-
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param context_len: The context length equals the lookahead step number
-                        plus one.
-    :type context_len: int
-    :param act: Activation Type. LinearActivation is the default activation.
-    :type act: BaseActivation
-    :param param_attr: The parameter attribute. See ParameterAttribute for
-                       details.
-    :type param_attr: ParameterAttribute
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute | None
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert isinstance(input, LayerOutput)
-    assert context_len > 0, "the context_len must be greatet than 0."
-
-    Layer(
-        inputs=[Input(input.name, **param_attr.attr)],
-        name=name,
-        context_length=context_len,
-        type=LayerType.ROW_CONV_LAYER,
-        active_type=act.name,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name, LayerType.ROW_CONV_LAYER, input, activation=act, size=input.size)
-
-
-@layer_support()
-@wrap_name_default()
-def prelu_layer(input,
-                name=None,
-                partial_sum=1,
-                channel_shared=None,
-                num_channels=None,
-                param_attr=None,
-                layer_attr=None):
-    """
-    The Parametric Relu activation that actives outputs with a learnable weight.
-
-    Reference:
-        `Delving Deep into Rectifiers: Surpassing Human-Level Performance on
-        ImageNet Classification <http://arxiv.org/pdf/1502.01852v1.pdf>`_
-
-    .. math::
-       z_i &\\quad if \\quad z_i > 0 \\\\
-       a_i * z_i  &\\quad \\mathrm{otherwise}
-
-    The example usage is:
-
-    .. code-block:: python
-
-       prelu = prelu_layer(input=layers, partial_sum=1)
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param partial_sum: this parameter makes a group of inputs share the same weight.
-
-        - partial_sum = 1, indicates the element-wise activation: each element has a weight.
-        - partial_sum = number of elements in one channel, indicates the channel-wise activation, elements in a channel share the same weight.
-        - partial_sum = number of outputs, indicates all elements share the same weight.
-
-    :type partial_sum: int
-    :param channel_shared: whether or not the parameter are shared across channels.
-
-        - channel_shared = True, we set the partial_sum to the number of outputs.
-        - channel_shared = False, we set the partial_sum to the number of elements in one channel.
-
-    :type channel_shared: bool
-    :param num_channels: number of input channel.
-    :type num_channels: int
-    :param param_attr: The parameter attribute. See ParameterAttribute for details.
-    :type param_attr: ParameterAttribute
-    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute | None
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-
-    assert isinstance(input, LayerOutput), 'prelu_layer accepts only one input.'
-
-    if not param_attr:
-        param_attr = ParamAttr(initial_mean=0.25, initial_std=0.0)
-    else:
-        assert isinstance(param_attr, ParameterAttribute)
-
-    if num_channels is None:
-        assert input.num_filters is not None, \
-                'the input channel cannot be detected, please specify the num_channels parameter'
-        num_channels = input.num_filters
-
-    if channel_shared is not None:
-        assert isinstance(channel_shared, bool)
-        assert (input.height != 0 and input.width != 0), \
-            'input height and widht must be setted'
-        if channel_shared:
-            partial_sum = input.height * input.width * num_channels
-        else:
-            partial_sum = input.height * input.width
-
-    l = Layer(
-        name=name,
-        type=LayerType.PRELU,
-        inputs=Input(input.name, **param_attr.attr),
-        partial_sum=partial_sum,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name=name,
-        layer_type=LayerType.PRELU,
-        parents=input,
-        num_filters=num_channels,
-        size=l.config.size)
-
-
-@wrap_name_default()
-@layer_support(ERROR_CLIPPING, DROPOUT)
-@wrap_act_default(act=LinearActivation())
-def gated_unit_layer(input,
-                     size,
-                     act=None,
-                     name=None,
-                     gate_attr=None,
-                     gate_param_attr=None,
-                     gate_bias_attr=True,
-                     inproj_attr=None,
-                     inproj_param_attr=None,
-                     inproj_bias_attr=True,
-                     layer_attr=None):
-    """
-    The gated unit layer implements a simple gating mechanism over the input.
-    The input :math:`X` is first projected into a new space :math:`X'`, and
-    it is also used to produce a gate weight :math:`\sigma`. Element-wise
-    product between :math:`X'` and :math:`\sigma` is finally returned.
-
-    Reference:
-        `Language Modeling with Gated Convolutional Networks
-        <https://arxiv.org/abs/1612.08083>`_
-
-    .. math::
-       y=\\text{act}(X \cdot W + b)\otimes \sigma(X \cdot V + c)
-
-    The example usage is:
-
-    .. code-block:: python
-        gated_unit = gated_unit_layer(size=128, input=input_layer))
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param size: The dimension of this layer's output.
-    :type size: int
-    :param act: Activation type of the projection. LinearActivation is the default
-                activation.
-    :type act: BaseActivation
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param gate_attr: The extra layer attribute of the gate. See ExtraLayerAttribute for
-                      details.
-    :type gate_attr: ExtraLayerAttribute | None
-    :param gate_param_attr: The parameter attribute of the gate. See ParameterAttribute
-                            for details.
-    :type gate_param_attr: ParameterAttribute
-    :param gate_bias_attr: The bias attribute of the gate. If this parameter is set to False or
-                           an object whose type is not ParameterAttribute, no bias is defined.
-                           If this parameter is set to True, the bias is initialized to zero.
-    :type gate_bias_attr: ParameterAttribute | bool | None | Any
-    :param inproj_attr: Extra layer attributes of the projection. See ExtraLayerAttribute for
-                        details.
-    :type inproj_attr: ExtraLayerAttribute | None
-    :param inproj_param_attr: The parameter attribute of the projection. See ParameterAttribute
-                              for details.
-    :type inproj_param_attr: ParameterAttribute
-    :param inproj_bias_attr: The bias attribute of the projection. If this parameter is set to False
-                             or an object whose type is not ParameterAttribute, no bias is defined.
-                             If this parameter is set to True, the bias is initialized to zero.
-    :type inproj_bias_attr: ParameterAttribute | bool | None | Any
-    :param layer_attr: Extra layer attribute of the product. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute | None
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-
-    assert isinstance(
-        input, LayerOutput), 'The gated linear unit accepts only one input.'
-
-    input_proj = fc_layer(
-        input=input,
-        name="%s_input_proj" % name,
-        size=size,
-        act=act,
-        layer_attr=inproj_attr,
-        param_attr=inproj_param_attr,
-        bias_attr=inproj_bias_attr)
-
-    gate = fc_layer(
-        size=size,
-        name="%s_gate" % name,
-        act=SigmoidActivation(),
-        input=input,
-        layer_attr=gate_attr,
-        param_attr=gate_param_attr,
-        bias_attr=gate_bias_attr)
-    return mixed_layer(
-        name="%s_gated_act" % name,
-        input=dotmul_operator(input_proj, gate),
-        layer_attr=layer_attr)
-
-
-@layer_support()
-@wrap_name_default('switch_order')
-def switch_order_layer(input,
-                       name=None,
-                       reshape_axis=None,
-                       act=None,
-                       layer_attr=None):
-    """
-    This layer switch dimension order of image input.
-    From order "batchSize, channels, height, width"
-    to order "batchSize, height, width, channels".
-
-    The example usage is:
-
-    .. code-block:: python
-       reshape_axis = 3
-       switch = switch_order(input=layer, name='switch', reshape_axis=reshape_axis)
-       reshape = {'height':[ 0, 1, 2], 'width':[3]}
-
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param reshape_axis: Specify the axises of 'height'. Its value should be positive and less than 4.
-    :type reshape_axis: int
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert isinstance(input, LayerOutput)
-    assert reshape_axis != None and (reshape_axis > 0 and reshape_axis < 4)
-    height = [ele for ele in xrange(reshape_axis)]
-    width = [ele for ele in range(reshape_axis, 4)]
-    reshape = {'height': height, 'width': width}
-
-    l = Layer(
-        name=name,
-        inputs=input.name,
-        reshape=reshape,
-        type=LayerType.SWITCH_ORDER_LAYER,
-        active_type=act.name,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name=name,
-        layer_type=LayerType.SWITCH_ORDER_LAYER,
-        activation=act,
-        parents=input,
-        size=l.config.size)
-
-
-@wrap_name_default()
-@layer_support()
-def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None):
-    """
-    This layer crops images according to the offset and shape. Users can set
-    the crop shape through the argument 'shape' explicitly or by specifying a
-    reference input layer.
-
-    The example usage is:
-
-    .. code-block:: python
-    crop = crop_layer(input=[image_input, reference_input], axis=2, offset=[2, 3])
-
-    :param input: The input of this layer. If two inputs are given, the second one
-                  will be regarded as the reference.
-                  And the input must be 4-dims and in NCHW order.
-    :type input: LayerOutput | Sequence
-    :param offset: The crop offset.
-    :type offset: Sequence
-    :param axis: The start axis to be cropped. For image input layer:
-        - 0: batch size
-        - 1: channels
-        - 2: height
-        - 3: width
-    :type axis: int
-    :param shape: The shape to be cropped to. Default is None.
-    :type shape: Sequence | None
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    if isinstance(input, LayerOutput):
-        input = [input]
-    else:
-        assert isinstance(input, collections.Sequence)
-    l = Layer(
-        inputs=[x.name for x in input],
-        axis=axis,
-        offset=offset,
-        shape=shape,
-        name=name,
-        type=LayerType.CROP_LAYER,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name=name,
-        layer_type=LayerType.CROP_LAYER,
-        parents=input,
-        size=l.config.size)
-
-
-@wrap_name_default()
-@layer_support()
-def sub_nested_seq_layer(input, selected_indices, name=None):
-    """
-    The sub_nested_seq_layer accepts two inputs: the first one is a nested
-    sequence; the second one is a set of selceted indices in the nested sequence.
-
-    Then sub_nest_seq_layer trims the first nested sequence input according
-    to the selected indices to form a new output. This layer is useful in
-    beam training.
-
-    The example usage is:
-
-    .. code-block:: python
-
-        sub_nest_seq = sub_nested_seq_layer(input=data, selected_indices=selected_ids)
-
-
-    :param input: The input of this layer. It is a nested sequence.
-    :type input: LayerOutput
-    :param selected_indices: A set of sequence indices in the nested sequence.
-    :type input: LayerOutput
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-
-    assert isinstance(input, LayerOutput), (
-        'The first input of '
-        'sub_nested_seq_layer must be a Paddle layer.')
-    assert isinstance(selected_indices, LayerOutput), (
-        'The second input of '
-        'sub_nested_seq_layer must be a Paddle layer.')
-
-    l = Layer(
-        inputs=input.name,
-        selected_indices=selected_indices.name,
-        name=name,
-        type=LayerType.SUB_NESTED_SEQ)
-    return LayerOutput(
-        name=name,
-        layer_type=LayerType.SUB_NESTED_SEQ,
-        parents=input,
-        size=l.config.size)
-
-
-@wrap_name_default("clip")
-def clip_layer(input, min, max, name=None):
-    """
-    A layer for clipping the input value by the threshold.
-
-    .. math::
-
-        out[i] = \min (\max (in[i],p_{1} ),p_{2} )
-
-    .. code-block:: python
-
-        clip = clip_layer(input=input_layer, min=-10, max=10)
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input of this layer.
-    :type input: LayerOutput.
-    :param min: The lower threshold for clipping.
-    :type min: float
-    :param max: The upper threshold for clipping.
-    :type max: float
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    Layer(
-        name=name,
-        type=LayerType.CLIP_LAYER,
-        inputs=[input.name],
-        min=min,
-        max=max)
-    return LayerOutput(
-        name, LayerType.CLIP_LAYER, parents=[input], size=input.size)
-
-
-@wrap_name_default()
-def seq_slice_layer(input, starts, ends, name=None):
-    """
-    seq_slice_layer will return one or several sub-sequences from the
-    input sequence layer given start and end indices.
-
-        - If only start indices are given, and end indices are set to None,
-          this layer slices the input sequence from the given start indices
-          to its end.
-        - If only end indices are given, and start indices are set to None,
-          this layer slices the input sequence from its beginning to the
-          given end indices.
-        - If start and end indices are both given, they should have the same
-          number of elements.
-
-    If start or end indices contains more than one elements, the input sequence
-    will be sliced for multiple times.
-
-
-    .. code-block:: python
-
-        seq_silce = seq_slice_layer(input=input_seq,
-                                    starts=start_pos, ends=end_pos)
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input of this layer, which should be a sequence.
-    :type input: LayerOutput
-    :param starts: The start indices to slice the input sequence.
-    :type starts: LayerOutput | None
-    :param ends: The end indices to slice the input sequence.
-    :type ends: LayerOutput | None
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-
-    assert isinstance(input, LayerOutput), (
-        'The first input of seq_slice layer must be a PaddlePaddle layer.')
-
-    if starts is not None:
-        assert isinstance(starts, LayerOutput), (
-            'The start indices for seq_slice layer '
-            'must be a PaddlePaddle layer.')
-    if ends is not None:
-        assert isinstance(ends, LayerOutput), (
-            'The end indices for seq_slice layer must be a PaddlePaddle layer.')
-    assert starts is not None or ends is not None, (
-        'start and end indices '
-        'cannot be set to None at the same time, at least one of '
-        'them should be given.')
-    if starts is not None and ends is not None:
-        assert starts.size == ends.size, (
-            'If start and end indices are both given to seq_slice_layer, '
-            'they should have the same width.')
-
-    Layer(
-        name=name,
-        type=LayerType.SEQ_SLICE,
-        inputs=input.name,
-        starts=starts.name if starts is not None else None,
-        ends=ends.name if ends is not None else None)
-    return LayerOutput(
-        name, LayerType.SEQ_SLICE, parents=[input], size=input.size)
-
-
-@wrap_name_default()
-@layer_support()
-def kmax_seq_score_layer(input, name=None, beam_size=1):
-    """
-    This layer accepts one input which is scores over a sequence or a nested
-    sequence, and returns indices of beam_size sequences with highest scores.
-
-    .. code-block:: python
-
-        kmax_indices = kmax_seq_score_layer(input=input_layer, beam_size)
-
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input of this layer. It stores scores over a sequence or
-                  a nested sequence and its size must be 1.
-    :type input: LayerOutput
-    :param beam_size: The indices of the sequences with top beam_size scores are returned.
-    :type beam_size: int
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert isinstance(input, LayerOutput), ("kmax_seq_score_layer "
-                                            "accepts only one input.")
-    assert input.size == 1, (
-        "input of kmax_seq_score_layer is a score "
-        "over a sequence or a nested sequence, so its width must be 1.")
-
-    Layer(
-        name=name,
-        type=LayerType.KMAX_SEQ_SCORE,
-        inputs=[input.name],
-        beam_size=beam_size)
-
-    return LayerOutput(
-        name, LayerType.KMAX_SEQ_SCORE, parents=[input], size=input.size)
-
-
-@wrap_name_default("conv3d")
-@wrap_param_attr_default()
-@wrap_bias_attr_default()
-@wrap_act_default(act=ReluActivation())
-@layer_support(DROPOUT)
-def img_conv3d_layer(input,
-                     filter_size,
-                     num_filters,
-                     name=None,
-                     num_channels=None,
-                     act=None,
-                     groups=1,
-                     stride=1,
-                     padding=0,
-                     bias_attr=None,
-                     param_attr=None,
-                     shared_biases=True,
-                     layer_attr=None,
-                     trans=False,
-                     layer_type=None):
-    """
-
-    The example usage is:
-
-    ..  code-block:: python
-
-        conv = img_conv3d_layer(input=data, filter_size=1,
-                              num_channels=8,
-                              num_filters=16, stride=1,
-                              bias_attr=False,
-                              act=ReluActivation())
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param filter_size: The dimensions of the filter kernel along three axises. If the parameter
-                        is set to one integer, the three dimensions will be same.
-    :type filter_size: int | tuple | list
-    :param num_filters: The number of filters. It is as same as the output image channel.
-    :type num_filters: int
-    :param act: Activation type. ReluActivation is the default activation.
-    :type act: BaseActivation
-    :param groups: The number of the filter groups.
-    :type groups: int
-    :param stride: The strides of the convolution along three axises. If the parameter
-                   is set to one integer, the three strides will be same.
-    :type stride: int | tuple | list
-    :param padding: The numbers of padding along three axises. If the parameter is set to
-                    one integer, they will be same.
-    :type padding: int | tuple | list
-    :param bias_attr: The bias attribute. If the parameter is set to False or an object
-                      whose type is not ParameterAttribute, no bias is defined. If the
-                      parameter is set to True, the bias is initialized to zero.
-    :type bias_attr: ParameterAttribute | None | bool | Any
-    :param num_channels: The number of input channels. If the parameter is not set or
-                         set to None, its actual value will be automatically set to
-                         the channels number of the input.
-    :type num_channels: int
-    :param param_attr: The parameter attribute of the convolution. See ParameterAttribute for
-                       details.
-    :type param_attr: ParameterAttribute
-    :param shared_biases: Whether biases will be shared between filters or not.
-    :type shared_biases: bool
-    :param layer_attr: The extra layer attributes. See ExtraLayerAttribute for
-                       details.
-    :type layer_attr: ExtraLayerAttribute
-    :param trans: True if it is a convTransLayer, False if it is a convLayer
-    :type trans: bool
-    :param layer_type: Specify the layer type. If the parameter is set, it must be "deconv3d"
-                       when trans=True. If not set, it will be automatically set to "deconv3d"
-                       when trans=True and "conv3d" when trans=False.
-    :type layer_type: basestring
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    if num_channels is None:
-        assert input.num_filters is not None
-        num_channels = input.num_filters
-
-    if isinstance(filter_size, collections.Sequence):
-        assert len(filter_size) == 3
-        filter_size, filter_size_y, filter_size_z = filter_size
-    else:
-        filter_size_y = filter_size
-        filter_size_z = filter_size
-
-    if isinstance(stride, collections.Sequence):
-        assert len(stride) == 3
-        stride, stride_y, stride_z = stride
-    else:
-        stride_y = stride
-        stride_z = stride
-
-    if isinstance(padding, collections.Sequence):
-        assert len(padding) == 3
-        padding, padding_y, padding_z = padding
-    else:
-        padding_y = padding
-        padding_z = padding
-
-    if param_attr.attr.get('initial_smart'):
-        # special initial for conv layers.
-        init_w = (2.0 / (filter_size**2 * num_channels))**0.5
-        param_attr.attr["initial_mean"] = 0.0
-        param_attr.attr["initial_std"] = init_w
-        param_attr.attr["initial_strategy"] = 0
-        param_attr.attr["initial_smart"] = False
-
-    if layer_type:
-        if trans:
-            assert layer_type in ["deconv3d"]
-        lt = layer_type
-    else:
-        lt = LayerType.DECONV3D_LAYER if trans else LayerType.CONV3D_LAYER
-
-    l = Layer(
-        name=name,
-        inputs=Input(
-            input.name,
-            conv=Conv3D(
-                filter_size=filter_size,
-                padding=padding,
-                stride=stride,
-                channels=num_channels,
-                groups=groups,
-                filter_size_y=filter_size_y,
-                padding_y=padding_y,
-                stride_y=stride_y,
-                filter_size_z=filter_size_z,
-                padding_z=padding_z,
-                stride_z=stride_z),
-            **param_attr.attr),
-        active_type=act.name,
-        num_filters=num_filters,
-        bias=ParamAttr.to_bias(bias_attr),
-        shared_biases=shared_biases,
-        type=lt,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name,
-        lt,
-        parents=[input],
-        activation=act,
-        num_filters=num_filters,
-        size=l.config.size)
-
-
-@wrap_name_default("scale_shift")
-@wrap_param_attr_default()
-@wrap_bias_attr_default()
-def scale_shift_layer(input, name=None, param_attr=None, bias_attr=None):
-    """
-    A layer applies a linear transformation to each element in each row of
-    the input matrix. For each element, the layer first re-scales it and then
-    adds a bias to it.
-
-    This layer is very like the SlopeInterceptLayer, except the scale and
-    bias are trainable.
-
-    .. math::
-
-        y = w * x + b
-
-    .. code-block:: python
-
-        scale_shift = scale_shift_layer(input=input_layer, bias_attr=False)
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input of this layer.
-    :type input: LayerOutput
-    :param param_attr: The parameter attribute of scaling. See ParameterAttribute for
-                      details.
-    :type param_attr: ParameterAttribute
-    :param bias_attr: The bias attribute. If the parameter is set to False or an object
-                      whose type is not ParameterAttribute, no bias is defined. If the
-                      parameter is set to True, the bias is initialized to zero.
-    :type bias_attr: ParameterAttribute | None | bool | Any
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    Layer(
-        name=name,
-        type=LayerType.SCALE_SHIFT_LAYER,
-        inputs=Input(input.name, **param_attr.attr),
-        bias=ParamAttr.to_bias(bias_attr))
-    return LayerOutput(
-        name, LayerType.SCALE_SHIFT_LAYER, parents=[input], size=input.size)
-
-
-@wrap_name_default("resize")
-def resize_layer(input, size, name=None):
-    """
-    The resize layer resizes the input matrix with a shape of [Height, Width]
-    into the output matrix with a shape of [Height x Width / size, size],
-    where size is the parameter of this layer indicating the output dimension.
-
-    :param input: The input of this layer.
-    :type input: LayerOutput.
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param size: The resized output dimension of this layer.
-    :type size: int
-    :return: A LayerOutput object.
-    :rtype: LayerOutput
-    """
-    Layer(name=name, type=LayerType.RESIZE, inputs=Input(input.name), size=size)
-    return LayerOutput(name, LayerType.RESIZE, parents=[input], size=input.size)
-
-
-@wrap_act_default(act=LinearActivation())
-@wrap_name_default('sub_seq')
-def sub_seq_layer(input, offsets, sizes, act=None, bias_attr=None, name=None):
-    """
-    sub_seq_layer will return sub-sequences from the input sequences. For each
-    sequence in the input sequence layer, sub_seq_layer will slice it by given
-    offset and size. Please notice that, number of offset value and size value
-    both are equal to the number of sequence in the input layer.
-
-    .. code-block:: python
-
-        sub_seq = sub_seq_layer(input=input_seq, offsets=offsets, sizes=sizes)
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input of this layer, which should be sequence.
-    :type input: LayerOutput
-    :param offsets: The offset indices to slice the input sequence, which should
-                    be sequence type.
-    :type offsets: LayerOutput
-    :param sizes: The sizes of the sub-sequences, which should be sequence type.
-    :type sizes: LayerOutput
-    :param act: Activation type, LinearActivation is the default activation.
-    :type act: BaseActivation.
-    :param bias_attr: The bias attribute. If the parameter is set to False or an object
-                      whose type is not ParameterAttribute, no bias is defined. If the
-                      parameter is set to True, the bias is initialized to zero.
-    :type bias_attr: ParameterAttribute | None | bool | Any
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-
-    assert isinstance(input, LayerOutput), (
-        'The first input of sub_seq_layer layer must be a PaddlePaddle layer.')
-    assert isinstance(offsets, LayerOutput), (
-        'The offset indices for sub_seq_layer, '
-        'must be a PaddlePaddle layer.')
-    assert isinstance(sizes, LayerOutput), (
-        'The sizes of sub-sequences, must be a PaddlePaddle layer.')
-
-    Layer(
-        name=name,
-        type=LayerType.SUB_SEQ_LAYER,
-        inputs=[input.name, offsets.name, sizes.name],
-        active_type=act.name,
-        bias=ParamAttr.to_bias(bias_attr))
-
-    return LayerOutput(
-        name,
-        LayerType.SUB_SEQ_LAYER,
-        parents=[input, offsets, sizes],
-        size=input.size)
-
-
-@wrap_name_default('scale_sub_region')
-def scale_sub_region_layer(input, indices, value, name=None):
-    """
-    Given an image or feature map with CHW information, scale_sub_region_layer
-    can be used to multiply a real value to values of a sub continuous region.
-    You can provide start and end indices of CHW for each instance.
-    Please notice that all start indices are counting from 1.
-    The shape of indices should be [batch_size, 6] and the layout for each row
-    is [C_Start, C_End, H_Start, H_End, W_Start, W_End].
-
-    .. code-block:: python
-
-        scale_sub_region = scale_sub_region_layer(input=input,
-                                                  indices=indices,
-                                                  value=value)
-
-    :param name: The name of this layer. It is optional.
-    :type name: basestring
-    :param input: The input of this layer which should contains CHW information.
-    :type input: LayerOutput
-    :param indices: Start index and end index for C H W, the input value should
-                    be a 2-D matrix with shape [batch_size, 6].
-    :type indices: LayerOutput.
-    :param value: value to multiply.
-    :type value: float
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-
-    assert isinstance(input, LayerOutput), (
-        'The first input of scale_sub_region_layer, '
-        'must be a PaddlePaddle layer.')
-    assert isinstance(indices, LayerOutput), (
-        'The start and end indices for CHW, must be a PaddlePaddle layer.')
-    assert isinstance(value, float), (
-        'The value to multiply, must be a real value.')
-
-    Layer(
-        name=name,
-        type=LayerType.SCALE_SUB_REGION_LAYER,
-        inputs=[input.name, indices.name],
-        value=value)
-
-    return LayerOutput(
-        name,
-        LayerType.SCALE_SUB_REGION_LAYER,
-        parents=[input, indices],
-        num_filters=input.num_filters,
-        size=input.size)
-
-
-@wrap_name_default()
-@wrap_act_default(act=LinearActivation())
-@wrap_param_attr_default()
-@layer_support()
-def factorization_machine(input,
-                          factor_size,
-                          act=None,
-                          name=None,
-                          param_attr=None,
-                          layer_attr=None):
-    """
-    The Factorization Machine models pairwise feature interactions as inner
-    product of the learned latent vectors corresponding to each input feature.
-    The Factorization Machine can effectively capture feature interactions
-    especially when the input is sparse.
-
-    This implementation only consider the 2-order feature interactions using
-    Factorization Machine with the formula:
-
-    .. math::
-        y = \sum_{i=1}^{n-1}\sum_{j=i+1}^n\langle v_i, v_j \\rangle x_i x_j
-
-    Note:
-        X is the input vector with size n. V is the factor matrix. Each row of V
-        is the latent vector corresponding to each input dimesion. The size of
-        each latent vector is k.
-
-    For details of Factorization Machine, please refer to the paper:
-    Factorization machines.
-
-    .. code-block:: python
-        first_order = paddle.layer.fc(input=input,
-                                      size=1,
-                                      act=paddle.activation.Linear())
-        second_order = paddle.layer.factorization_machine(input=input,
-                                                          factor_size=10)
-        fm = paddle.layer.addto(input=[first_order, second_order],
-                                act=paddle.activation.Linear(),
-                                bias_attr=False)
-
-    :param input: The input layer. Supported input types: all input data types
-                  on CPU, and only dense input types on GPU.
-    :type input: LayerOutput
-    :param factor_size: The hyperparameter that defines the dimensionality of
-                        the latent vector size.
-    :type context_len: int
-    :param act: Activation Type. Default is linear activation.
-    :type act: BaseActivation
-    :param param_attr: The parameter attribute. See ParameterAttribute for
-                       details.
-    :type param_attr: ParameterAttribute
-    :param layer_attr: Extra Layer config.
-    :type layer_attr: ExtraLayerAttribute|None
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    assert isinstance(input, LayerOutput)
-    assert factor_size > 0, "the factor_size must be greater than 0."
-
-    Layer(
-        inputs=[Input(input.name, **param_attr.attr)],
-        name=name,
-        factor_size=factor_size,
-        type=LayerType.FACTORIZATION_MACHINE,
-        active_type=act.name,
-        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name, LayerType.FACTORIZATION_MACHINE, input, activation=act, size=1)
diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py
deleted file mode 100644
index b5cde7bac7..0000000000
--- a/python/paddle/trainer_config_helpers/networks.py
+++ /dev/null
@@ -1,1813 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import math
-
-from activations import LinearActivation, ReluActivation, SoftmaxActivation, \
-    IdentityActivation, TanhActivation, SequenceSoftmaxActivation
-from attrs import ExtraAttr
-from default_decorators import wrap_name_default, wrap_act_default, \
-    wrap_param_default, wrap_bias_attr_default, wrap_param_attr_default
-from layers import *  # There are too many layers used in network, so import *
-from poolings import MaxPooling, SumPooling
-from paddle.trainer.config_parser import *
-
-__all__ = [
-    'sequence_conv_pool', 'simple_lstm', "simple_img_conv_pool",
-    "img_conv_bn_pool", 'lstmemory_group', 'lstmemory_unit', 'small_vgg',
-    'img_conv_group', 'img_separable_conv', 'vgg_16_network', 'gru_unit',
-    'gru_group', 'simple_gru', 'simple_attention', 'dot_product_attention',
-    'multi_head_attention', 'simple_gru2', 'bidirectional_gru',
-    'text_conv_pool', 'bidirectional_lstm', 'inputs', 'outputs'
-]
-
-######################################################
-#                     Text CNN                       #
-######################################################
-
-
-@wrap_name_default("sequence_conv_pooling")
-def sequence_conv_pool(input,
-                       context_len,
-                       hidden_size,
-                       name=None,
-                       context_start=None,
-                       pool_type=None,
-                       context_proj_layer_name=None,
-                       context_proj_param_attr=False,
-                       fc_layer_name=None,
-                       fc_param_attr=None,
-                       fc_bias_attr=None,
-                       fc_act=None,
-                       pool_bias_attr=None,
-                       fc_attr=None,
-                       context_attr=None,
-                       pool_attr=None):
-    """
-    Text convolution pooling group.
-
-    Text input => Context Projection => FC Layer => Pooling => Output.
-
-    :param name: group name.
-    :type name: basestring
-    :param input: input layer.
-    :type input: LayerOutput
-    :param context_len: context projection length. See
-                        context_projection's document.
-    :type context_len: int
-    :param hidden_size: FC Layer size.
-    :type hidden_size: int
-    :param context_start: context start position. See
-                          context_projection's context_start.
-    :type context_start: int|None
-    :param pool_type: pooling layer type. See pooling_layer's document.
-    :type pool_type: BasePoolingType
-    :param context_proj_layer_name: context projection layer name.
-                                    None if user don't care.
-    :type context_proj_layer_name: basestring
-    :param context_proj_param_attr: padding parameter attribute of context projection layer.
-                                    If false, it means padding always be zero.
-    :type context_proj_param_attr: ParameterAttribute|None
-    :param fc_layer_name: fc layer name. None if user don't care.
-    :type fc_layer_name: basestring
-    :param fc_param_attr: fc layer parameter attribute. None if user don't care.
-    :type fc_param_attr: ParameterAttribute|None
-    :param fc_bias_attr: fc bias parameter attribute. False if no bias,
-                         None if user don't care.
-    :type fc_bias_attr: ParameterAttribute|False|None
-    :param fc_act: fc layer activation type. None means tanh.
-    :type fc_act: BaseActivation
-    :param pool_bias_attr: pooling layer bias attr. False if no bias.
-                           None if user don't care.
-    :type pool_bias_attr: ParameterAttribute|False|None
-    :param fc_attr: fc layer extra attribute.
-    :type fc_attr: ExtraLayerAttribute
-    :param context_attr: context projection layer extra attribute.
-    :type context_attr: ExtraLayerAttribute
-    :param pool_attr: pooling layer extra attribute.
-    :type pool_attr: ExtraLayerAttribute
-    :return: layer's output.
-    :rtype: LayerOutput
-    """
-    # Set Default Value to param
-    context_proj_layer_name = "%s_conv_proj" % name \
-        if context_proj_layer_name is None else context_proj_layer_name
-
-    with mixed_layer(
-            name=context_proj_layer_name,
-            size=input.size * context_len,
-            act=LinearActivation(),
-            layer_attr=context_attr) as m:
-        m += context_projection(
-            input,
-            context_len=context_len,
-            context_start=context_start,
-            padding_attr=context_proj_param_attr)
-
-    fc_layer_name = "%s_conv_fc" % name \
-        if fc_layer_name is None else fc_layer_name
-    fl = fc_layer(
-        name=fc_layer_name,
-        input=m,
-        size=hidden_size,
-        act=fc_act,
-        layer_attr=fc_attr,
-        param_attr=fc_param_attr,
-        bias_attr=fc_bias_attr)
-
-    return pooling_layer(
-        name=name,
-        input=fl,
-        pooling_type=pool_type,
-        bias_attr=pool_bias_attr,
-        layer_attr=pool_attr)
-
-
-text_conv_pool = sequence_conv_pool
-
-############################################################################
-#                       Images                                             #
-############################################################################
-
-
-@wrap_name_default("conv_pool")
-def simple_img_conv_pool(input,
-                         filter_size,
-                         num_filters,
-                         pool_size,
-                         name=None,
-                         pool_type=None,
-                         act=None,
-                         groups=1,
-                         conv_stride=1,
-                         conv_padding=0,
-                         bias_attr=None,
-                         num_channel=None,
-                         param_attr=None,
-                         shared_bias=True,
-                         conv_layer_attr=None,
-                         pool_stride=1,
-                         pool_padding=0,
-                         pool_layer_attr=None):
-    """
-    Simple image convolution and pooling group.
-
-    Img input => Conv => Pooling => Output.
-
-    :param name: group name.
-    :type name: basestring
-    :param input: input layer.
-    :type input: LayerOutput
-    :param filter_size: see img_conv_layer for details.
-    :type filter_size: int
-    :param num_filters: see img_conv_layer for details.
-    :type num_filters: int
-    :param pool_size: see img_pool_layer for details.
-    :type pool_size: int
-    :param pool_type: see img_pool_layer for details.
-    :type pool_type: BasePoolingType
-    :param act: see img_conv_layer for details.
-    :type act: BaseActivation
-    :param groups: see img_conv_layer for details.
-    :type groups: int
-    :param conv_stride: see img_conv_layer for details.
-    :type conv_stride: int
-    :param conv_padding: see img_conv_layer for details.
-    :type conv_padding: int
-    :param bias_attr: see img_conv_layer for details.
-    :type bias_attr: ParameterAttribute
-    :param num_channel: see img_conv_layer for details.
-    :type num_channel: int
-    :param param_attr: see img_conv_layer for details.
-    :type param_attr: ParameterAttribute
-    :param shared_bias: see img_conv_layer for details.
-    :type shared_bias: bool
-    :param conv_layer_attr: see img_conv_layer for details.
-    :type conv_layer_attr: ExtraLayerAttribute
-    :param pool_stride: see img_pool_layer for details.
-    :type pool_stride: int
-    :param pool_padding: see img_pool_layer for details.
-    :type pool_padding: int
-    :param pool_layer_attr: see img_pool_layer for details.
-    :type pool_layer_attr: ExtraLayerAttribute
-    :return: layer's output
-    :rtype: LayerOutput
-    """
-    _conv_ = img_conv_layer(
-        name="%s_conv" % name,
-        input=input,
-        filter_size=filter_size,
-        num_filters=num_filters,
-        num_channels=num_channel,
-        act=act,
-        groups=groups,
-        stride=conv_stride,
-        padding=conv_padding,
-        bias_attr=bias_attr,
-        param_attr=param_attr,
-        shared_biases=shared_bias,
-        layer_attr=conv_layer_attr)
-    return img_pool_layer(
-        name="%s_pool" % name,
-        input=_conv_,
-        pool_size=pool_size,
-        pool_type=pool_type,
-        stride=pool_stride,
-        padding=pool_padding,
-        layer_attr=pool_layer_attr)
-
-
-@wrap_name_default("conv_bn_pool")
-def img_conv_bn_pool(input,
-                     filter_size,
-                     num_filters,
-                     pool_size,
-                     name=None,
-                     pool_type=None,
-                     act=None,
-                     groups=1,
-                     conv_stride=1,
-                     conv_padding=0,
-                     conv_bias_attr=None,
-                     num_channel=None,
-                     conv_param_attr=None,
-                     shared_bias=True,
-                     conv_layer_attr=None,
-                     bn_param_attr=None,
-                     bn_bias_attr=None,
-                     bn_layer_attr=None,
-                     pool_stride=1,
-                     pool_padding=0,
-                     pool_layer_attr=None):
-    """
-    Convolution, batch normalization, pooling group.
-
-    Img input => Conv => BN => Pooling => Output.
-
-    :param name: group name.
-    :type name: basestring
-    :param input: input layer.
-    :type input: LayerOutput
-    :param filter_size: see img_conv_layer for details.
-    :type filter_size: int
-    :param num_filters: see img_conv_layer for details.
-    :type num_filters: int
-    :param pool_size: see img_pool_layer for details.
-    :type pool_size: int
-    :param pool_type: see img_pool_layer for details.
-    :type pool_type: BasePoolingType
-    :param act: see batch_norm_layer for details.
-    :type act: BaseActivation
-    :param groups: see img_conv_layer for details.
-    :type groups: int
-    :param conv_stride: see img_conv_layer for details.
-    :type conv_stride: int
-    :param conv_padding: see img_conv_layer for details.
-    :type conv_padding: int
-    :param conv_bias_attr: see img_conv_layer for details.
-    :type conv_bias_attr: ParameterAttribute
-    :param num_channel: see img_conv_layer for details.
-    :type num_channel: int
-    :param conv_param_attr: see img_conv_layer for details.
-    :type conv_param_attr: ParameterAttribute
-    :param shared_bias: see img_conv_layer for details.
-    :type shared_bias: bool
-    :param conv_layer_attr: see img_conv_layer for details.
-    :type conv_layer_attr: ExtraLayerOutput
-    :param bn_param_attr: see batch_norm_layer for details.
-    :type bn_param_attr: ParameterAttribute
-    :param bn_bias_attr: see batch_norm_layer for details.
-    :type bn_bias_attr: ParameterAttribute
-    :param bn_layer_attr: see batch_norm_layer for details.
-    :type bn_layer_attr: ExtraLayerAttribute
-    :param pool_stride: see img_pool_layer for details.
-    :type pool_stride: int
-    :param pool_padding: see img_pool_layer for details.
-    :type pool_padding: int
-    :param pool_layer_attr: see img_pool_layer for details.
-    :type pool_layer_attr: ExtraLayerAttribute
-    :return: layer's output
-    :rtype: LayerOutput
-    """
-    __conv__ = img_conv_layer(
-        name="%s_conv" % name,
-        input=input,
-        filter_size=filter_size,
-        num_filters=num_filters,
-        num_channels=num_channel,
-        act=LinearActivation(),
-        groups=groups,
-        stride=conv_stride,
-        padding=conv_padding,
-        bias_attr=conv_bias_attr,
-        param_attr=conv_param_attr,
-        shared_biases=shared_bias,
-        layer_attr=conv_layer_attr)
-    __bn__ = batch_norm_layer(
-        name="%s_bn" % name,
-        input=__conv__,
-        act=act,
-        bias_attr=bn_bias_attr,
-        param_attr=bn_param_attr,
-        layer_attr=bn_layer_attr)
-    return img_pool_layer(
-        name="%s_pool" % name,
-        input=__bn__,
-        pool_type=pool_type,
-        pool_size=pool_size,
-        stride=pool_stride,
-        padding=pool_padding,
-        layer_attr=pool_layer_attr)
-
-
-@wrap_act_default(param_names=['conv_act'], act=ReluActivation())
-@wrap_param_default(
-    param_names=['pool_type'], default_factory=lambda _: MaxPooling())
-def img_conv_group(input,
-                   conv_num_filter,
-                   pool_size,
-                   num_channels=None,
-                   conv_padding=1,
-                   conv_filter_size=3,
-                   conv_act=None,
-                   conv_with_batchnorm=False,
-                   conv_batchnorm_drop_rate=0,
-                   pool_stride=1,
-                   pool_type=None,
-                   param_attr=None):
-    """
-    Image Convolution Group, Used for vgg net.
-
-    :param conv_batchnorm_drop_rate: if conv_with_batchnorm[i] is true,
-        conv_batchnorm_drop_rate[i] represents the drop rate of each batch norm.
-    :type conv_batchnorm_drop_rate: list
-    :param input: input layer.
-    :type input: LayerOutput
-    :param conv_num_filter: list of output channels num.
-    :type conv_num_filter: list|tuple
-    :param pool_size: pooling filter size.
-    :type pool_size: int
-    :param num_channels: input channels num.
-    :type num_channels: int
-    :param conv_padding: convolution padding size.
-    :type conv_padding: int
-    :param conv_filter_size: convolution filter size.
-    :type conv_filter_size: int
-    :param conv_act: activation funciton after convolution.
-    :type conv_act: BaseActivation
-    :param conv_with_batchnorm: if conv_with_batchnorm[i] is true,
-        there is a batch normalization operation after each convolution.
-    :type conv_with_batchnorm: list
-    :param pool_stride: pooling stride size.
-    :type pool_stride: int
-    :param pool_type: pooling type.
-    :type pool_type: BasePoolingType
-    :param param_attr: param attribute of convolution layer,
-                       None means default attribute.
-    :type param_attr: ParameterAttribute
-    :return: layer's output
-    :rtype: LayerOutput
-    """
-    tmp = input
-
-    # Type checks
-    assert isinstance(tmp, LayerOutput)
-    assert isinstance(conv_num_filter, list) or isinstance(conv_num_filter,
-                                                           tuple)
-    for each_num_filter in conv_num_filter:
-        assert isinstance(each_num_filter, int)
-
-    assert isinstance(pool_size, int)
-
-    def __extend_list__(obj):
-        if not hasattr(obj, '__len__'):
-            return [obj] * len(conv_num_filter)
-        else:
-            return obj
-
-    conv_padding = __extend_list__(conv_padding)
-    conv_filter_size = __extend_list__(conv_filter_size)
-    conv_act = __extend_list__(conv_act)
-    conv_with_batchnorm = __extend_list__(conv_with_batchnorm)
-    conv_batchnorm_drop_rate = __extend_list__(conv_batchnorm_drop_rate)
-
-    for i in xrange(len(conv_num_filter)):
-        extra_kwargs = dict()
-        if num_channels is not None:
-            extra_kwargs['num_channels'] = num_channels
-            num_channels = None
-        if conv_with_batchnorm[i]:
-            extra_kwargs['act'] = LinearActivation()
-        else:
-            extra_kwargs['act'] = conv_act[i]
-
-        tmp = img_conv_layer(
-            input=tmp,
-            padding=conv_padding[i],
-            filter_size=conv_filter_size[i],
-            num_filters=conv_num_filter[i],
-            param_attr=param_attr,
-            **extra_kwargs)
-
-        # logger.debug("tmp.num_filters = %d" % tmp.num_filters)
-
-        if conv_with_batchnorm[i]:
-            dropout = conv_batchnorm_drop_rate[i]
-            if dropout == 0 or abs(dropout) < 1e-5:  # dropout not set
-                tmp = batch_norm_layer(input=tmp, act=conv_act[i])
-            else:
-                tmp = batch_norm_layer(
-                    input=tmp,
-                    act=conv_act[i],
-                    layer_attr=ExtraAttr(drop_rate=dropout))
-
-    return img_pool_layer(
-        input=tmp, stride=pool_stride, pool_size=pool_size, pool_type=pool_type)
-
-
-@wrap_name_default("separable_conv")
-def img_separable_conv(input,
-                       num_channels,
-                       num_out_channels,
-                       filter_size,
-                       stride=1,
-                       padding=0,
-                       depth_multiplier=1,
-                       act=None,
-                       bias_attr=None,
-                       param_attr=None,
-                       shared_bias=True,
-                       layer_type='exconv',
-                       name=None):
-    """
-    Separable Convolution.
-
-    The separable convolution module is consisted of a depthwise convolution
-    that acts separately on input channels, followed by a pointwise convolution
-    with 1*1 kernels that mixes channels. It is used for Xception:
-    https://arxiv.org/pdf/1610.02357.pdf
-
-    :param input: input layer.
-    :type input: LayerOutput
-    :param num_channels: the number of input channels.
-    :type num_channels: int
-    :param num_out_channels: the number of output channels.
-    :type num_out_channels: int
-    :param filter_size: the filter size for the depthwise convolution.
-    :type filter_size: int|tuple
-    :param stride: the stride size for the depthwise convolution.
-    :type stride: int|tuple
-    :param padding: the padding size for the depthwise convolution.
-    :type padding: int|tuple
-    :param depth_multiplier: the number of filter for one channel in the
-                             depthwize convolution.
-    :type depth_multiplier: int
-    :param act: the activation function for the output.
-    :type act: BaseActivation
-    :param bias_attr: see img_conv_layer for details.
-    :type bias_attr: ParameterAttribute
-    :param param_attr: see img_conv_layer for details.
-    :type param_attr: ParameterAttribute
-    :param shared_bias: see img_conv_layer for details.
-    :type shared_bias: bool
-    :param layer_type: see img_conv_layer for details.
-    :type layer_type: bool
-    :return: layer's output
-    :rtype: LayerOutput
-    """
-    __depthwise_conv__ = img_conv_layer(
-        name="%s_depthwise_conv" % name,
-        input=input,
-        num_channels=num_channels,
-        num_filters=num_channels * depth_multiplier,
-        groups=num_channels,
-        filter_size=filter_size,
-        stride=stride,
-        padding=padding,
-        act=LinearActivation(),
-        bias_attr=bias_attr,
-        param_attr=param_attr,
-        shared_biases=shared_bias,
-        layer_type=layer_type)
-    __pointwise_conv__ = img_conv_layer(
-        name="%s_pointwise_conv" % name,
-        input=__depthwise_conv__,
-        num_channels=num_channels * depth_multiplier,
-        num_filters=num_out_channels,
-        filter_size=1,
-        stride=1,
-        padding=0,
-        act=act,
-        bias_attr=bias_attr,
-        param_attr=param_attr,
-        shared_biases=shared_bias)
-    return __pointwise_conv__
-
-
-def small_vgg(input_image, num_channels, num_classes):
-    def __vgg__(ipt, num_filter, times, dropouts, num_channels_=None):
-        return img_conv_group(
-            input=ipt,
-            num_channels=num_channels_,
-            pool_size=2,
-            pool_stride=2,
-            conv_num_filter=[num_filter] * times,
-            conv_filter_size=3,
-            conv_act=ReluActivation(),
-            conv_with_batchnorm=True,
-            conv_batchnorm_drop_rate=dropouts,
-            pool_type=MaxPooling())
-
-    tmp = __vgg__(input_image, 64, 2, [0.3, 0], num_channels)
-    tmp = __vgg__(tmp, 128, 2, [0.4, 0])
-    tmp = __vgg__(tmp, 256, 3, [0.4, 0.4, 0])
-    tmp = __vgg__(tmp, 512, 3, [0.4, 0.4, 0])
-    tmp = img_pool_layer(
-        input=tmp, stride=2, pool_size=2, pool_type=MaxPooling())
-    tmp = dropout_layer(input=tmp, dropout_rate=0.5)
-    tmp = fc_layer(
-        input=tmp,
-        size=512,
-        layer_attr=ExtraAttr(drop_rate=0.5),
-        act=LinearActivation())
-    tmp = batch_norm_layer(input=tmp, act=ReluActivation())
-    return fc_layer(input=tmp, size=num_classes, act=SoftmaxActivation())
-
-
-def vgg_16_network(input_image, num_channels, num_classes=1000):
-    """
-    Same model from https://gist.github.com/ksimonyan/211839e770f7b538e2d8
-
-    :param num_classes: number of class.
-    :type num_classes: int
-    :param input_image: input layer.
-    :type input_image: LayerOutput
-    :param num_channels: input channels num.
-    :type num_channels: int
-    :return: layer's output
-    :rtype: LayerOutput
-    """
-
-    tmp = img_conv_group(
-        input=input_image,
-        num_channels=num_channels,
-        conv_padding=1,
-        conv_num_filter=[64, 64],
-        conv_filter_size=3,
-        conv_act=ReluActivation(),
-        pool_size=2,
-        pool_stride=2,
-        pool_type=MaxPooling())
-
-    tmp = img_conv_group(
-        input=tmp,
-        conv_num_filter=[128, 128],
-        conv_padding=1,
-        conv_filter_size=3,
-        conv_act=ReluActivation(),
-        pool_stride=2,
-        pool_type=MaxPooling(),
-        pool_size=2)
-
-    tmp = img_conv_group(
-        input=tmp,
-        conv_num_filter=[256, 256, 256],
-        conv_padding=1,
-        conv_filter_size=3,
-        conv_act=ReluActivation(),
-        pool_stride=2,
-        pool_type=MaxPooling(),
-        pool_size=2)
-
-    tmp = img_conv_group(
-        input=tmp,
-        conv_num_filter=[512, 512, 512],
-        conv_padding=1,
-        conv_filter_size=3,
-        conv_act=ReluActivation(),
-        pool_stride=2,
-        pool_type=MaxPooling(),
-        pool_size=2)
-    tmp = img_conv_group(
-        input=tmp,
-        conv_num_filter=[512, 512, 512],
-        conv_padding=1,
-        conv_filter_size=3,
-        conv_act=ReluActivation(),
-        pool_stride=2,
-        pool_type=MaxPooling(),
-        pool_size=2)
-
-    tmp = fc_layer(
-        input=tmp,
-        size=4096,
-        act=ReluActivation(),
-        layer_attr=ExtraAttr(drop_rate=0.5))
-
-    tmp = fc_layer(
-        input=tmp,
-        size=4096,
-        act=ReluActivation(),
-        layer_attr=ExtraAttr(drop_rate=0.5))
-
-    return fc_layer(input=tmp, size=num_classes, act=SoftmaxActivation())
-
-
-############################################################################
-#                       Recurrent                                          #
-############################################################################
-
-
-@wrap_name_default("lstm")
-def simple_lstm(input,
-                size,
-                name=None,
-                reverse=False,
-                mat_param_attr=None,
-                bias_param_attr=None,
-                inner_param_attr=None,
-                act=None,
-                gate_act=None,
-                state_act=None,
-                mixed_layer_attr=None,
-                lstm_cell_attr=None):
-    """
-    Simple LSTM Cell.
-
-    It just combines a mixed layer with fully_matrix_projection and a lstmemory
-    layer. The simple lstm cell was implemented with follow equations.
-
-    ..  math::
-
-        i_t & = \\sigma(W_{xi}x_{t} + W_{hi}h_{t-1} + W_{ci}c_{t-1} + b_i)
-
-        f_t & = \\sigma(W_{xf}x_{t} + W_{hf}h_{t-1} + W_{cf}c_{t-1} + b_f)
-
-        c_t & = f_tc_{t-1} + i_t tanh (W_{xc}x_t+W_{hc}h_{t-1} + b_c)
-
-        o_t & = \\sigma(W_{xo}x_{t} + W_{ho}h_{t-1} + W_{co}c_t + b_o)
-
-        h_t & = o_t tanh(c_t)
-
-    Please refer to **Generating Sequences With Recurrent Neural Networks** for more
-    details about lstm. Link_ is here.
-
-    .. _Link: http://arxiv.org/abs/1308.0850
-
-    :param name: lstm layer name.
-    :type name: basestring
-    :param input: layer's input.
-    :type input: LayerOutput
-    :param size: lstm layer size.
-    :type size: int
-    :param reverse: process the input in a reverse order or not.
-    :type reverse: bool
-    :param mat_param_attr: parameter attribute of matrix projection in mixed layer.
-    :type mat_param_attr: ParameterAttribute
-    :param bias_param_attr: bias parameter attribute. False means no bias, None
-                            means default bias.
-    :type bias_param_attr: ParameterAttribute|False
-    :param inner_param_attr: parameter attribute of lstm cell.
-    :type inner_param_attr: ParameterAttribute
-    :param act: last activiation type of lstm.
-    :type act: BaseActivation
-    :param gate_act: gate activiation type of lstm.
-    :type gate_act: BaseActivation
-    :param state_act: state activiation type of lstm.
-    :type state_act: BaseActivation
-    :param mixed_layer_attr: extra attribute of mixed layer.
-    :type mixed_layer_attr: ExtraLayerAttribute
-    :param lstm_cell_attr: extra attribute of lstm.
-    :type lstm_cell_attr: ExtraLayerAttribute
-    :return: layer's output.
-    :rtype: LayerOutput
-    """
-    fc_name = 'lstm_transform_%s' % name
-    with mixed_layer(
-            name=fc_name,
-            size=size * 4,
-            act=IdentityActivation(),
-            layer_attr=mixed_layer_attr,
-            bias_attr=False) as m:
-        m += full_matrix_projection(input, param_attr=mat_param_attr)
-
-    return lstmemory(
-        name=name,
-        input=m,
-        reverse=reverse,
-        bias_attr=bias_param_attr,
-        param_attr=inner_param_attr,
-        act=act,
-        gate_act=gate_act,
-        state_act=state_act,
-        layer_attr=lstm_cell_attr)
-
-
-@wrap_name_default('lstm_unit')
-def lstmemory_unit(input,
-                   out_memory=None,
-                   name=None,
-                   size=None,
-                   param_attr=None,
-                   act=None,
-                   gate_act=None,
-                   state_act=None,
-                   input_proj_bias_attr=None,
-                   input_proj_layer_attr=None,
-                   lstm_bias_attr=None,
-                   lstm_layer_attr=None):
-    """
-    lstmemory_unit defines the caculation process of a LSTM unit during a
-    single time step. This function is not a recurrent layer, so it can not be
-    directly used to process sequence input. This function is always used in
-    recurrent_group (see layers.py for more details) to implement attention
-    mechanism.
-
-    Please refer to  **Generating Sequences With Recurrent Neural Networks**
-    for more details about LSTM. The link goes as follows:
-    .. _Link: https://arxiv.org/abs/1308.0850
-
-    ..  math::
-
-        i_t & = \\sigma(W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i)
-
-        f_t & = \\sigma(W_{x_f}x_{t} + W_{h_f}h_{t-1} + W_{c_f}c_{t-1} + b_f)
-
-        c_t & = f_tc_{t-1} + i_t tanh (W_{x_c}x_t+W_{h_c}h_{t-1} + b_c)
-
-        o_t & = \\sigma(W_{x_o}x_{t} + W_{h_o}h_{t-1} + W_{c_o}c_t + b_o)
-
-        h_t & = o_t tanh(c_t)
-
-    The example usage is:
-
-    ..  code-block:: python
-
-        lstm_step = lstmemory_unit(input=[layer1],
-                                   size=256,
-                                   act=TanhActivation(),
-                                   gate_act=SigmoidActivation(),
-                                   state_act=TanhActivation())
-
-
-    :param input: Input layer.
-    :type input: LayerOutput
-    :param out_memory: The output of previous time step.
-    :type out_memory: LayerOutput | None
-    :param name: The lstmemory unit name.
-    :type name: basestring
-    :param size: The lstmemory unit size.
-    :type size: int
-    :param param_attr: The parameter attribute for the weights in
-                     input to hidden projection.
-                     None means default attribute.
-    :type param_attr: ParameterAttribute
-    :param act: The last activiation type of lstm.
-    :type act: BaseActivation
-    :param gate_act: The gate activiation type of lstm.
-    :type gate_act: BaseActivation
-    :param state_act: The state activiation type of lstm.
-    :type state_act: BaseActivation
-    :param input_proj_bias_attr: The parameter attribute for the bias in
-                      input to hidden projection.
-                      False or None means no bias.
-                      If this parameter is set to True,
-                      the bias is initialized to zero.
-    :type input_proj_bias_attr: ParameterAttribute|bool|None
-    :param input_proj_layer_attr: The extra layer attribute for
-                     input to hidden projection of the LSTM unit,
-                     such as dropout, error clipping.
-    :type input_proj_layer_attr: ExtraLayerAttribute
-    :param lstm_bias_attr: The parameter attribute for the bias in lstm layer.
-                      False or None means no bias.
-                      If this parameter is set to True,
-                      the bias is initialized to zero.
-    :type lstm_bias_attr: ParameterAttribute|True|None
-    :param lstm_layer_attr: The extra attribute of lstm layer.
-    :type lstm_layer_attr: ExtraLayerAttribute
-    :return: The lstmemory unit name.
-    :rtype: LayerOutput
-    """
-    if size is None:
-        assert input.size % 4 == 0
-        size = input.size / 4
-    if out_memory is None:
-        out_mem = memory(name=name, size=size)
-    else:
-        out_mem = out_memory
-
-    state_mem = memory(name="%s_state" % name, size=size)
-
-    with mixed_layer(
-            name="%s_input_recurrent" % name,
-            size=size * 4,
-            bias_attr=input_proj_bias_attr,
-            layer_attr=input_proj_layer_attr,
-            act=IdentityActivation()) as m:
-        m += identity_projection(input=input)
-        m += full_matrix_projection(input=out_mem, param_attr=param_attr)
-
-    lstm_out = lstm_step_layer(
-        name=name,
-        input=m,
-        state=state_mem,
-        size=size,
-        bias_attr=lstm_bias_attr,
-        act=act,
-        gate_act=gate_act,
-        state_act=state_act,
-        layer_attr=lstm_layer_attr)
-    get_output_layer(name='%s_state' % name, input=lstm_out, arg_name='state')
-
-    return lstm_out
-
-
-@wrap_name_default('lstm_group')
-def lstmemory_group(input,
-                    size=None,
-                    name=None,
-                    out_memory=None,
-                    reverse=False,
-                    param_attr=None,
-                    act=None,
-                    gate_act=None,
-                    state_act=None,
-                    input_proj_bias_attr=None,
-                    input_proj_layer_attr=None,
-                    lstm_bias_attr=None,
-                    lstm_layer_attr=None):
-    """
-    lstm_group is a recurrent_group version of Long Short Term Memory. It
-    does exactly the same calculation as the lstmemory layer (see lstmemory in
-    layers.py for the maths) does. A promising benefit is that LSTM memory
-    cell states(or hidden states) in every time step are accessible to the
-    user. This is especially useful in attention model. If you do not need to
-    access the internal states of the lstm and merely use its outputs,
-    it is recommended to use the lstmemory, which is relatively faster than
-    lstmemory_group.
-
-    NOTE: In PaddlePaddle's implementation, the following input-to-hidden
-    multiplications:
-    :math:`W_{x_i}x_{t}` , :math:`W_{x_f}x_{t}`,
-    :math:`W_{x_c}x_t`, :math:`W_{x_o}x_{t}` are not done in lstmemory_unit to
-    speed up the calculations. Consequently, an additional mixed_layer with
-    full_matrix_projection must be included before lstmemory_unit is called.
-
-    The example usage is:
-
-    ..  code-block:: python
-
-        lstm_step = lstmemory_group(input=[layer1],
-                                    size=256,
-                                    act=TanhActivation(),
-                                    gate_act=SigmoidActivation(),
-                                    state_act=TanhActivation())
-
-    :param input: Input layer.
-    :type input: LayerOutput
-    :param size: The lstmemory group size.
-    :type size: int
-    :param name: The name of lstmemory group.
-    :type name: basestring
-    :param out_memory: The output of previous time step.
-    :type out_memory: LayerOutput | None
-    :param reverse: Process the input in a reverse order or not.
-    :type reverse: bool
-    :param param_attr: The parameter attribute for the weights in
-                     input to hidden projection.
-                     None means default attribute.
-    :type param_attr: ParameterAttribute
-    :param act: The last activiation type of lstm.
-    :type act: BaseActivation
-    :param gate_act: The gate activiation type of lstm.
-    :type gate_act: BaseActivation
-    :param state_act: The state activiation type of lstm.
-    :type state_act: BaseActivation
-    :param input_proj_bias_attr: The parameter attribute for the bias in
-                      input to hidden projection.
-                      False or None means no bias.
-                      If this parameter is set to True,
-                      the bias is initialized to zero.
-    :type input_proj_bias_attr: ParameterAttribute|bool|None
-    :param input_proj_layer_attr: The extra layer attribute for
-                     input to hidden projection of the LSTM unit,
-                     such as dropout, error clipping.
-    :type input_proj_layer_attr: ExtraLayerAttribute
-    :param lstm_bias_attr: The parameter attribute for the bias in lstm layer.
-                      False or None means no bias.
-                      If this parameter is set to True,
-                      the bias is initialized to zero.
-    :type lstm_bias_attr: ParameterAttribute|True|None
-    :param lstm_layer_attr: The extra attribute of lstm layer.
-    :type lstm_layer_attr: ExtraLayerAttribute
-    :return: the lstmemory group.
-    :rtype: LayerOutput
-    """
-
-    def __lstm_step__(ipt):
-        return lstmemory_unit(
-            input=ipt,
-            name=name,
-            size=size,
-            act=act,
-            gate_act=gate_act,
-            state_act=state_act,
-            out_memory=out_memory,
-            input_proj_bias_attr=input_proj_bias_attr,
-            input_proj_layer_attr=input_proj_layer_attr,
-            param_attr=param_attr,
-            lstm_layer_attr=lstm_layer_attr,
-            lstm_bias_attr=lstm_bias_attr)
-
-    return recurrent_group(
-        name='%s_recurrent_group' % name,
-        step=__lstm_step__,
-        reverse=reverse,
-        input=input)
-
-
-@wrap_name_default('gru_unit')
-def gru_unit(input,
-             memory_boot=None,
-             size=None,
-             name=None,
-             gru_bias_attr=None,
-             gru_param_attr=None,
-             act=None,
-             gate_act=None,
-             gru_layer_attr=None,
-             naive=False):
-    """
-    gru_unit defines the calculation process of a gated recurrent unit during a single
-    time step. This function is not a recurrent layer, so it can not be
-    directly used to process sequence input. This function is always used in
-    the recurrent_group (see layers.py for more details) to implement attention
-    mechanism.
-
-    Please see grumemory in layers.py for the details about the maths.
-
-    :param input: input layer.
-    :type input: LayerOutput
-    :param memory_boot: the initialization state of the LSTM cell.
-    :type memory_boot: LayerOutput | None
-    :param name: name of the gru group.
-    :type name: basestring
-    :param size: hidden size of the gru.
-    :type size: int
-    :param act: activation type of gru
-    :type act: BaseActivation
-    :param gate_act: gate activation type or gru
-    :type gate_act: BaseActivation
-    :param gru_layer_attr: Extra attribute of the gru layer.
-    :type gru_layer_attr: ExtraLayerAttribute
-    :return: the gru output layer.
-    :rtype: LayerOutput
-    """
-
-    assert input.size % 3 == 0
-    if size is None:
-        size = input.size / 3
-
-    out_mem = memory(name=name, size=size, boot_layer=memory_boot)
-
-    if naive:
-        __step__ = gru_step_naive_layer
-    else:
-        __step__ = gru_step_layer
-
-    gru_out = __step__(
-        name=name,
-        input=input,
-        output_mem=out_mem,
-        size=size,
-        bias_attr=gru_bias_attr,
-        param_attr=gru_param_attr,
-        act=act,
-        gate_act=gate_act,
-        layer_attr=gru_layer_attr)
-    return gru_out
-
-
-@wrap_name_default('gru_group')
-def gru_group(input,
-              memory_boot=None,
-              size=None,
-              name=None,
-              reverse=False,
-              gru_bias_attr=None,
-              gru_param_attr=None,
-              act=None,
-              gate_act=None,
-              gru_layer_attr=None,
-              naive=False):
-    """
-    gru_group is a recurrent_group version of Gated Recurrent Unit. It
-    does exactly the same calculation as the grumemory layer does. A promising
-    benefit is that gru hidden states are accessible to the user. This is
-    especially useful in attention model. If you do not need to access
-    any internal state and merely use the outputs of a GRU, it is recommended
-    to use the grumemory, which is relatively faster.
-
-    Please see grumemory in layers.py for more detail about the maths.
-
-    The example usage is:
-
-    ..  code-block:: python
-
-        gru = gru_group(input=[layer1],
-                        size=256,
-                        act=TanhActivation(),
-                        gate_act=SigmoidActivation())
-
-    :param input: input layer.
-    :type input: LayerOutput
-    :param memory_boot: the initialization state of the LSTM cell.
-    :type memory_boot: LayerOutput | None
-    :param name: name of the gru group.
-    :type name: basestring
-    :param size: hidden size of the gru.
-    :type size: int
-    :param reverse: process the input in a reverse order or not.
-    :type reverse: bool
-    :param act: activiation type of gru
-    :type act: BaseActivation
-    :param gate_act: gate activiation type of gru
-    :type gate_act: BaseActivation
-    :param gru_bias_attr: bias parameter attribute of gru layer,
-                          False means no bias, None means default bias.
-    :type gru_bias_attr: ParameterAttribute|False|None
-    :param gru_layer_attr: Extra attribute of the gru layer.
-    :type gru_layer_attr: ExtraLayerAttribute
-    :return: the gru group.
-    :rtype: LayerOutput
-    """
-
-    def __gru_step__(ipt):
-        return gru_unit(
-            input=ipt,
-            memory_boot=memory_boot,
-            name=name,
-            size=size,
-            gru_bias_attr=gru_bias_attr,
-            gru_param_attr=gru_param_attr,
-            act=act,
-            gate_act=gate_act,
-            gru_layer_attr=gru_layer_attr,
-            naive=naive)
-
-    return recurrent_group(
-        name='%s_recurrent_group' % name,
-        step=__gru_step__,
-        reverse=reverse,
-        input=input)
-
-
-@wrap_name_default('simple_gru')
-def simple_gru(input,
-               size,
-               name=None,
-               reverse=False,
-               mixed_param_attr=None,
-               mixed_bias_param_attr=None,
-               mixed_layer_attr=None,
-               gru_bias_attr=None,
-               gru_param_attr=None,
-               act=None,
-               gate_act=None,
-               gru_layer_attr=None,
-               naive=False):
-    """
-    You may see gru_step_layer, grumemory in layers.py, gru_unit, gru_group,
-    simple_gru in network.py. The reason why there are so many interfaces is
-    that we have two ways to implement recurrent neural network. One way is to
-    use one complete layer to implement rnn (including simple rnn, gru and lstm)
-    with multiple time steps, such as recurrent_layer, lstmemory, grumemory. But
-    the multiplication operation :math:`W x_t` is not computed in these layers.
-    See details in their interfaces in layers.py.
-    The other implementation is to use an recurrent group which can ensemble a
-    series of layers to compute rnn step by step. This way is flexible for
-    attenion mechanism or other complex connections.
-
-    - gru_step_layer: only compute rnn by one step. It needs an memory as input
-      and can be used in recurrent group.
-    - gru_unit: a wrapper of gru_step_layer with memory.
-    - gru_group: a GRU cell implemented by a combination of multiple layers in
-      recurrent group.
-      But :math:`W x_t` is not done in group.
-    - gru_memory: a GRU cell implemented by one layer, which does same calculation
-      with gru_group and is faster than gru_group.
-    - simple_gru: a complete GRU implementation inlcuding :math:`W x_t` and
-      gru_group. :math:`W` contains :math:`W_r`, :math:`W_z` and :math:`W`, see
-      formula in grumemory.
-
-    The computational speed is that, grumemory is relatively better than
-    gru_group, and gru_group is relatively better than simple_gru.
-
-    The example usage is:
-
-    ..  code-block:: python
-
-        gru = simple_gru(input=[layer1], size=256)
-
-    :param input: input layer.
-    :type input: LayerOutput
-    :param name: name of the gru group.
-    :type name: basestring
-    :param size: hidden size of the gru.
-    :type size: int
-    :param reverse: process the input in a reverse order or not.
-    :type reverse: bool
-    :param act: activiation type of gru
-    :type act: BaseActivation
-    :param gate_act: gate activiation type of gru
-    :type gate_act: BaseActivation
-    :param gru_bias_attr: bias parameter attribute of gru layer,
-                          False means no bias, None means default bias.
-    :type gru_bias_attr: ParameterAttribute|False|None
-    :param gru_layer_attr: Extra attribute of the gru layer.
-    :type gru_layer_attr: ExtraLayerAttribute
-    :return: the gru group.
-    :rtype: LayerOutput
-    """
-    with mixed_layer(
-            name='%s_transform' % name,
-            size=size * 3,
-            bias_attr=mixed_bias_param_attr,
-            layer_attr=mixed_layer_attr) as m:
-        m += full_matrix_projection(input=input, param_attr=mixed_param_attr)
-
-    return gru_group(
-        name=name,
-        size=size,
-        input=m,
-        reverse=reverse,
-        gru_bias_attr=gru_bias_attr,
-        gru_param_attr=gru_param_attr,
-        act=act,
-        gate_act=gate_act,
-        gru_layer_attr=gru_layer_attr,
-        naive=naive)
-
-
-@wrap_name_default('simple_gru2')
-def simple_gru2(input,
-                size,
-                name=None,
-                reverse=False,
-                mixed_param_attr=None,
-                mixed_bias_attr=None,
-                gru_param_attr=None,
-                gru_bias_attr=None,
-                act=None,
-                gate_act=None,
-                mixed_layer_attr=None,
-                gru_cell_attr=None):
-    """
-    simple_gru2 is the same with simple_gru, but using grumemory instead.
-    Please refer to grumemory in layers.py for more detail about the math.
-    simple_gru2 is faster than simple_gru.
-
-    The example usage is:
-
-    ..  code-block:: python
-
-        gru = simple_gru2(input=[layer1], size=256)
-
-    :param input: input layer.
-    :type input: LayerOutput
-    :param name: name of the gru group.
-    :type name: basestring
-    :param size: hidden size of the gru.
-    :type size: int
-    :param reverse: process the input in a reverse order or not.
-    :type reverse: bool
-    :param act: activiation type of gru
-    :type act: BaseActivation
-    :param gate_act: gate activiation type of gru
-    :type gate_act: BaseActivation
-    :param gru_bias_attr: bias parameter attribute of gru layer,
-                          False means no bias, None means default bias.
-    :type gru_bias_attr: ParameterAttribute|False|None
-    :param gru_param_attr: param parameter attribute of gru layer,
-                          None means default param.
-    :type gru_param_attr: ParameterAttribute|None
-    :return: the gru group.
-    :rtype: LayerOutput
-    """
-    with mixed_layer(
-            name='%s_transform' % name,
-            size=size * 3,
-            bias_attr=mixed_bias_attr,
-            layer_attr=mixed_layer_attr) as m:
-        m += full_matrix_projection(input=input, param_attr=mixed_param_attr)
-
-    return grumemory(
-        name=name,
-        input=m,
-        reverse=reverse,
-        bias_attr=gru_bias_attr,
-        param_attr=gru_param_attr,
-        act=act,
-        gate_act=gate_act,
-        layer_attr=gru_cell_attr)
-
-
-@wrap_name_default("bidirectional_gru")
-def bidirectional_gru(input,
-                      size,
-                      name=None,
-                      return_seq=False,
-                      fwd_mixed_param_attr=None,
-                      fwd_mixed_bias_attr=None,
-                      fwd_gru_param_attr=None,
-                      fwd_gru_bias_attr=None,
-                      fwd_act=None,
-                      fwd_gate_act=None,
-                      fwd_mixed_layer_attr=None,
-                      fwd_gru_cell_attr=None,
-                      bwd_mixed_param_attr=None,
-                      bwd_mixed_bias_attr=None,
-                      bwd_gru_param_attr=None,
-                      bwd_gru_bias_attr=None,
-                      bwd_act=None,
-                      bwd_gate_act=None,
-                      bwd_mixed_layer_attr=None,
-                      bwd_gru_cell_attr=None,
-                      last_seq_attr=None,
-                      first_seq_attr=None,
-                      concat_attr=None,
-                      concat_act=None):
-    """
-    A bidirectional_gru is a recurrent unit that iterates over the input
-    sequence both in forward and backward orders, and then concatenate two
-    outputs to form a final output. However, concatenation of two outputs
-    is not the only way to form the final output, you can also, for example,
-    just add them together.
-
-    The example usage is:
-
-    ..  code-block:: python
-
-        bi_gru = bidirectional_gru(input=[input1], size=512)
-
-    :param name: bidirectional gru layer name.
-    :type name: basestring
-    :param input: input layer.
-    :type input: LayerOutput
-    :param size: gru layer size.
-    :type size: int
-    :param return_seq: If set False, the last time step of output are
-                       concatenated and returned.
-                       If set True, the entire output sequences in forward
-                       and backward directions are concatenated and returned.
-    :type return_seq: bool
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    args = locals()
-
-    fw = simple_gru2(
-        name='%s_fw' % name,
-        input=input,
-        size=size,
-        **dict((k[len('fwd_'):], v) for k, v in args.iteritems()
-               if k.startswith('fwd_')))
-
-    bw = simple_gru2(
-        name="%s_bw" % name,
-        input=input,
-        size=size,
-        reverse=True,
-        **dict((k[len('bwd_'):], v) for k, v in args.iteritems()
-               if k.startswith('bwd_')))
-
-    if return_seq:
-        return concat_layer(
-            name=name, input=[fw, bw], layer_attr=concat_attr, act=concat_act)
-    else:
-        fw_seq = last_seq(
-            name="%s_fw_last" % name, input=fw, layer_attr=last_seq_attr)
-        bw_seq = first_seq(
-            name="%s_bw_last" % name, input=bw, layer_attr=first_seq_attr)
-        return concat_layer(
-            name=name,
-            input=[fw_seq, bw_seq],
-            layer_attr=concat_attr,
-            act=concat_act)
-
-
-@wrap_name_default("bidirectional_lstm")
-def bidirectional_lstm(input,
-                       size,
-                       name=None,
-                       return_seq=False,
-                       fwd_mat_param_attr=None,
-                       fwd_bias_param_attr=None,
-                       fwd_inner_param_attr=None,
-                       fwd_act=None,
-                       fwd_gate_act=None,
-                       fwd_state_act=None,
-                       fwd_mixed_layer_attr=None,
-                       fwd_lstm_cell_attr=None,
-                       bwd_mat_param_attr=None,
-                       bwd_bias_param_attr=None,
-                       bwd_inner_param_attr=None,
-                       bwd_act=None,
-                       bwd_gate_act=None,
-                       bwd_state_act=None,
-                       bwd_mixed_layer_attr=None,
-                       bwd_lstm_cell_attr=None,
-                       last_seq_attr=None,
-                       first_seq_attr=None,
-                       concat_attr=None,
-                       concat_act=None):
-    """
-    A bidirectional_lstm is a recurrent unit that iterates over the input
-    sequence both in forward and backward orders, and then concatenate two
-    outputs to form a final output. However, concatenation of two outputs
-    is not the only way to form the final output, you can also, for example,
-    just add them together.
-
-    Please refer to  **Neural Machine Translation by Jointly Learning to Align
-    and Translate** for more details about the bidirectional lstm.
-    The link goes as follows:
-    .. _Link: https://arxiv.org/pdf/1409.0473v3.pdf
-
-    The example usage is:
-
-    ..  code-block:: python
-
-        bi_lstm = bidirectional_lstm(input=[input1], size=512)
-
-    :param name: bidirectional lstm layer name.
-    :type name: basestring
-    :param input: input layer.
-    :type input: LayerOutput
-    :param size: lstm layer size.
-    :type size: int
-    :param return_seq: If set False, the last time step of output are
-                       concatenated and returned.
-                       If set True, the entire output sequences in forward
-                       and backward directions are concatenated and returned.
-    :type return_seq: bool
-    :return: LayerOutput object.
-    :rtype: LayerOutput
-    """
-    args = locals()
-
-    fw = simple_lstm(
-        name='%s_fw' % name,
-        input=input,
-        size=size,
-        **dict((k[len('fwd_'):], v) for k, v in args.iteritems()
-               if k.startswith('fwd_')))
-
-    bw = simple_lstm(
-        name="%s_bw" % name,
-        input=input,
-        size=size,
-        reverse=True,
-        **dict((k[len('bwd_'):], v) for k, v in args.iteritems()
-               if k.startswith('bwd_')))
-
-    if return_seq:
-        return concat_layer(
-            name=name, input=[fw, bw], layer_attr=concat_attr, act=concat_act)
-    else:
-        fw_seq = last_seq(
-            name="%s_fw_last" % name, input=fw, layer_attr=last_seq_attr)
-        bw_seq = first_seq(
-            name="%s_bw_last" % name, input=bw, layer_attr=first_seq_attr)
-        return concat_layer(
-            name=name,
-            input=[fw_seq, bw_seq],
-            layer_attr=concat_attr,
-            act=concat_act)
-
-
-@wrap_name_default()
-@wrap_act_default(param_names=['weight_act'], act=TanhActivation())
-def simple_attention(encoded_sequence,
-                     encoded_proj,
-                     decoder_state,
-                     transform_param_attr=None,
-                     softmax_param_attr=None,
-                     weight_act=None,
-                     name=None):
-    """
-    Calculate and return a context vector with attention mechanism.
-    Size of the context vector equals to size of the encoded_sequence.
-
-    ..  math::
-
-        a(s_{i-1},h_{j}) & = v_{a}f(W_{a}s_{t-1} + U_{a}h_{j})
-
-        e_{i,j} & = a(s_{i-1}, h_{j})
-
-        a_{i,j} & = \\frac{exp(e_{i,j})}{\\sum_{k=1}^{T_x}{exp(e_{i,k})}}
-
-        c_{i} & = \\sum_{j=1}^{T_{x}}a_{i,j}h_{j}
-
-    where :math:`h_{j}` is the jth element of encoded_sequence,
-    :math:`U_{a}h_{j}` is the jth element of encoded_proj
-    :math:`s_{i-1}` is decoder_state
-    :math:`f` is weight_act, and is set to tanh by default.
-
-    Please refer to **Neural Machine Translation by Jointly Learning to
-    Align and Translate** for more details. The link is as follows:
-    https://arxiv.org/abs/1409.0473.
-
-    The example usage is:
-
-    ..  code-block:: python
-
-        context = simple_attention(encoded_sequence=enc_seq,
-                                   encoded_proj=enc_proj,
-                                   decoder_state=decoder_prev,)
-
-    :param name: name of the attention model.
-    :type name: basestring
-    :param softmax_param_attr: parameter attribute of sequence softmax
-                               that is used to produce attention weight.
-    :type softmax_param_attr: ParameterAttribute
-    :param weight_act: activation of the attention model.
-    :type weight_act: BaseActivation
-    :param encoded_sequence: output of the encoder
-    :type encoded_sequence: LayerOutput
-    :param encoded_proj: attention weight is computed by a feed forward neural
-                         network which has two inputs : decoder's hidden state
-                         of previous time step and encoder's output.
-                         encoded_proj is output of the feed-forward network for
-                         encoder's output. Here we pre-compute it outside
-                         simple_attention for speed consideration.
-    :type encoded_proj: LayerOutput
-    :param decoder_state: hidden state of decoder in previous time step
-    :type decoder_state: LayerOutput
-    :param transform_param_attr: parameter attribute of the feed-forward
-                                network that takes decoder_state as inputs to
-                                compute attention weight.
-    :type transform_param_attr: ParameterAttribute
-    :return: a context vector
-    :rtype: LayerOutput
-    """
-    assert encoded_proj.size == decoder_state.size
-    proj_size = encoded_proj.size
-
-    with mixed_layer(size=proj_size, name="%s_transform" % name) as m:
-        m += full_matrix_projection(
-            decoder_state, param_attr=transform_param_attr)
-
-    expanded = expand_layer(
-        input=m, expand_as=encoded_sequence, name='%s_expand' % name)
-
-    with mixed_layer(
-            size=proj_size, act=weight_act, name="%s_combine" % name) as m:
-        m += identity_projection(expanded)
-        m += identity_projection(encoded_proj)
-
-    # sequence softmax is used to normalize similarities between decoder state
-    # and encoder outputs into a distribution
-    attention_weight = fc_layer(
-        input=m,
-        size=1,
-        act=SequenceSoftmaxActivation(),
-        param_attr=softmax_param_attr,
-        name="%s_softmax" % name,
-        bias_attr=False)
-
-    scaled = scaling_layer(
-        weight=attention_weight,
-        input=encoded_sequence,
-        name='%s_scaling' % name)
-
-    return pooling_layer(
-        input=scaled, pooling_type=SumPooling(), name="%s_pooling" % name)
-
-
-@wrap_name_default()
-def dot_product_attention(encoded_sequence,
-                          attended_sequence,
-                          transformed_state,
-                          softmax_param_attr=None,
-                          name=None):
-    """
-    Calculate and return a context vector with dot-product attention mechanism.
-    The dimension of the context vector equals to that of the attended_sequence.
-
-    ..  math::
-
-        a(s_{i-1},h_{j}) & = s_{i-1}^\mathrm{T} h_{j}
-
-        e_{i,j} & = a(s_{i-1}, h_{j})
-
-        a_{i,j} & = \\frac{exp(e_{i,j})}{\\sum_{k=1}^{T_x}{exp(e_{i,k})}}
-
-        c_{i} & = \\sum_{j=1}^{T_{x}}a_{i,j}z_{j}
-
-    where :math:`h_{j}` is the jth element of encoded_sequence,
-    :math:`z_{j}` is the jth element of attended_sequence,
-    :math:`s_{i-1}` is transformed_state.
-
-    The example usage is:
-
-    ..  code-block:: python
-
-        context = dot_product_attention(encoded_sequence=enc_seq,
-                                        attended_sequence=att_seq,
-                                        transformed_state=state,)
-
-    :param name: A prefix attached to the name of each layer that defined inside
-                 the dot_product_attention.
-    :type name: basestring
-    :param softmax_param_attr: The parameter attribute of sequence softmax
-                               that is used to produce attention weight.
-    :type softmax_param_attr: ParameterAttribute
-    :param encoded_sequence: The output hidden vectors of the encoder.
-    :type encoded_sequence: LayerOutput
-    :param attended_sequence: The attention weight is computed by a feed forward neural
-                              network which has two inputs : decoder's transformed hidden
-                              state of previous time step and encoder's output.
-                              attended_sequence is the sequence to be attended.
-    :type attended_sequence: LayerOutput
-    :param transformed_state: The transformed hidden state of decoder in previous time step.
-                              Since the dot-product operation will be performed on it and the
-                              encoded_sequence, their dimensions must be equal. For flexibility,
-                              we suppose transformations of the decoder's hidden state have been
-                              done outside dot_product_attention and no more will be performed
-                              inside. Then users can use either the original or transformed one.
-    :type transformed_state: LayerOutput
-    :return: The context vector.
-    :rtype: LayerOutput
-    """
-    assert transformed_state.size == encoded_sequence.size
-
-    expanded = expand_layer(
-        input=transformed_state,
-        expand_as=encoded_sequence,
-        name='%s_expand' % name)
-
-    m = dot_prod_layer(
-        input1=expanded, input2=encoded_sequence, name='%s_dot-product' % name)
-
-    attention_weight = fc_layer(
-        input=m,
-        size=1,
-        act=SequenceSoftmaxActivation(),
-        param_attr=softmax_param_attr,
-        name="%s_softmax" % name,
-        bias_attr=False)
-
-    scaled = scaling_layer(
-        weight=attention_weight,
-        input=attended_sequence,
-        name='%s_scaling' % name)
-
-    return pooling_layer(
-        input=scaled, pooling_type=SumPooling(), name="%s_pooling" % name)
-
-
-@wrap_name_default()
-def multi_head_attention(query,
-                         key,
-                         value,
-                         key_proj_size,
-                         value_proj_size,
-                         head_num,
-                         attention_type,
-                         softmax_param_attr=None,
-                         name=None):
-    """
-    Calculate and return a context vector with dot-product attention mechanism.
-    The dimension of the context vector equals to value_proj_size * head_num.
-
-    Please refer to **Attention Is All You Need** for more details. The link is
-    as follows:
-    https://arxiv.org/abs/1706.03762.
-
-    The example usage is:
-
-    ..  code-block:: python
-
-        context = multi_head_attention(query=decoder_state,
-                                       key=enc_seq,
-                                       value=enc_seq,
-                                       key_proj_size=64,
-                                       value_pro_size=64,
-                                       head_num=8,
-                                       attention_type='dot-product attention')
-
-    :param name: A prefix attached to the name of each layer that defined inside
-                 the multi_head_attention.
-    :type name: basestring
-    :param softmax_param_attr: The parameter attribute of sequence softmax
-                               that is used to produce attention weight.
-    :type softmax_param_attr: ParameterAttribute
-    :param query: query is used to calculate attention weights over values at current step.
-    :type query: LayerOutput
-    :param key: key is used to calculate the attention weight of the corresponding value.
-    :type key: LayerOutput
-    :param value: value is the sequence to be attended.
-    :type value: LayerOutput
-    :param key_proj_size: The dimension of the linear projection performed on key and query.
-    :type key_proj_size: int
-    :param value_proj_size: The dimension of the linear projection performed on value.
-    :type value_proj_size: int
-    :param head_num: The number of attention heads.
-    :type head_num: int
-    :param attention_type: The type of the attention mechanism used in each attention
-                           heads. Now, we only support scaled dot-product attention and
-                           additive attention.
-    :type attention_type: basestring
-    :return: The context vector.
-    :rtype: LayerOutput
-    """
-    assert attention_type in ['dot-product attention', 'additive attention']
-
-    with mixed_layer(
-            size=key_proj_size * head_num,
-            name='%s_query_proj' % name) as query_proj:
-        query_proj += full_matrix_projection(query)
-    query_proj = expand_layer(input=query_proj, expand_as=key)
-
-    with mixed_layer(
-            size=key_proj_size * head_num,
-            name='%s_key_proj' % name) as key_proj:
-        key_proj += full_matrix_projection(key)
-
-    with mixed_layer(
-            size=value_proj_size * head_num,
-            name='%s_value_proj' % name) as value_proj:
-        value_proj += full_matrix_projection(value)
-
-    head_list = []
-    for i in range(head_num):
-        with mixed_layer(size=key_proj_size) as sub_query_proj:
-            sub_query_proj += identity_projection(
-                query_proj, offset=key_proj_size * i, size=key_proj_size)
-
-        with mixed_layer(size=key_proj_size) as sub_key_proj:
-            sub_key_proj += identity_projection(
-                key_proj, offset=key_proj_size * i, size=key_proj_size)
-
-        with mixed_layer(size=value_proj_size) as sub_value_proj:
-            sub_value_proj += identity_projection(
-                value_proj, offset=value_proj_size * i, size=value_proj_size)
-
-        if attention_type == 'dot-product attention':
-            m = dot_prod_layer(
-                input1=sub_query_proj,
-                input2=sub_key_proj,
-                name='%s_dot-product_%d' % (name, i))
-            m = slope_intercept_layer(
-                input=m,
-                slope=math.sqrt(1.0 / key_proj_size),
-                name='%s_dot-product_scaling_%d' % (name, i))
-        else:
-            with mixed_layer(
-                    size=key_proj_size,
-                    act=TanhActivation(),
-                    name='%s_combine_%d' % (name, i)) as m:
-                m += identity_projection(sub_query_proj)
-                m += identity_projection(sub_key_proj)
-
-        attention_weight = fc_layer(
-            input=m,
-            size=1,
-            act=SequenceSoftmaxActivation(),
-            param_attr=softmax_param_attr,
-            name="%s_softmax_%d" % (name, i),
-            bias_attr=False)
-
-        scaled = scaling_layer(
-            weight=attention_weight,
-            input=sub_value_proj,
-            name='%s_scaling_%d' % (name, i))
-        head = pooling_layer(
-            input=scaled,
-            pooling_type=SumPooling(),
-            name="%s_pooling_%d" % (name, i))
-
-        head_list.append(head)
-
-    attended = concat_layer(head_list)
-
-    return attended
-
-
-def inputs(layers, *args):
-    """
-    Declare the inputs of network. The order of input should be as same as
-    the data provider's return order.
-
-    :param layers: Input Layers.
-    :type layers: list|tuple|LayerOutput.
-    :return:
-    """
-
-    if isinstance(layers, LayerOutput) or isinstance(layers, basestring):
-        layers = [layers]
-    if len(args) != 0:
-        layers.extend(args)
-
-    Inputs(*[l.name for l in layers])
-
-
-def outputs(layers, *args):
-    """
-    Declare the outputs of network. If user has not defined the inputs of
-    network, this method will calculate the input order by dfs travel.
-
-    :param layers: Output layers.
-    :type layers: list|tuple|LayerOutput
-    :return:
-    """
-
-    traveled = set()
-
-    def __dfs_travel__(layer,
-                       predicate=lambda x: x.layer_type == LayerType.DATA):
-        """
-        DFS LRV Travel for output layer.
-
-        The return order is define order for data_layer in this leaf node.
-
-        :param layer:
-        :type layer: LayerOutput
-        :return:
-        """
-        if layer in traveled:
-            return []
-        else:
-            traveled.add(layer)
-
-        assert isinstance(layer, LayerOutput), "layer is %s" % (layer)
-        retv = []
-        if layer.parents is not None:
-            for p in layer.parents:
-                retv.extend(__dfs_travel__(p, predicate))
-
-        if predicate(layer):
-            retv.append(layer)
-        return retv
-
-    if isinstance(layers, LayerOutput):
-        layers = [layers]
-
-    if len(args) != 0:
-        layers.extend(args)
-
-    assert len(layers) > 0
-
-    if HasInputsSet():  # input already set
-        Outputs(*[l.name for l in layers])
-        return  # just return outputs.
-
-    if len(layers) != 1:
-        logger.warning("`outputs` routine try to calculate network's"
-                       " inputs and outputs order. It might not work well."
-                       "Please see follow log carefully.")
-    inputs = []
-    outputs_ = []
-    for each_layer in layers:
-        assert isinstance(each_layer, LayerOutput)
-        inputs.extend(__dfs_travel__(each_layer))
-        outputs_.extend(
-            __dfs_travel__(each_layer,
-                           lambda x: x.layer_type == LayerType.COST))
-
-    # Currently, we got each leaf node's inputs order, output order.
-    # We merge them together.
-
-    final_inputs = []
-    final_outputs = []
-
-    for each_input in inputs:
-        assert isinstance(each_input, LayerOutput)
-        if each_input.name not in final_inputs:
-            final_inputs.append(each_input.name)
-
-    for each_output in outputs_:
-        assert isinstance(each_output, LayerOutput)
-        if each_output.name not in final_outputs:
-            final_outputs.append(each_output.name)
-
-    logger.info("".join(["The input order is [", ", ".join(final_inputs), "]"]))
-
-    if len(final_outputs) == 0:
-        final_outputs = map(lambda x: x.name, layers)
-
-    logger.info("".join(
-        ["The output order is [", ", ".join(final_outputs), "]"]))
-
-    Inputs(*final_inputs)
-    Outputs(*final_outputs)
diff --git a/python/paddle/trainer_config_helpers/optimizers.py b/python/paddle/trainer_config_helpers/optimizers.py
deleted file mode 100644
index 32698e5b2c..0000000000
--- a/python/paddle/trainer_config_helpers/optimizers.py
+++ /dev/null
@@ -1,447 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer.config_parser import Settings, default_decay_rate, \
-    default_gradient_clipping_threshold, default_momentum
-
-from .default_decorators import wrap_param_default
-
-__all__ = [
-    'Optimizer', 'BaseSGDOptimizer', 'MomentumOptimizer', 'AdamaxOptimizer',
-    'AdamOptimizer', 'AdaGradOptimizer', 'RMSPropOptimizer',
-    'DecayedAdaGradOptimizer', 'AdaDeltaOptimizer', 'BaseRegularization',
-    'L2Regularization', 'settings', 'ModelAverage'
-]
-
-
-class Optimizer(object):
-    def to_setting_kwargs(self):
-        raise NotImplementedError()
-
-    def extra_settings(self):
-        pass
-
-    @property
-    def is_support_sparse(self):
-        return True
-
-
-class BaseSGDOptimizer(Optimizer):
-    """
-    SGD Optimizer.
-
-    SGD is an optimization method, trying to find a neural network that
-    minimize the "cost/error" of it by iteration. In paddle's implementation
-    SGD Optimizer is synchronized, which means all gradients will be wait to
-    calculate and reduced into one gradient, then do optimize operation.
-
-    The neural network consider the learning problem of minimizing an objective
-    function, that has the form of a sum
-
-    ..  math::
-
-        Q(w) = \\sum_{i}^{n} Q_i(w)
-
-    The value of function Q sometimes is the cost of neural network (Mean
-    Square Error between prediction and label for example). The function Q is
-    parametrised by w, the weight/bias of neural network. And weights is what to
-    be learned. The i is the i-th observation in (trainning) data.
-
-    So, the SGD method will optimize the weight by
-
-    ..  math::
-
-        w = w - \\eta \\nabla Q(w) = w - \\eta \\sum_{i}^{n} \\nabla Q_i(w)
-
-    where :math:`\\eta` is learning rate. And :math:`n` is batch size.
-    """
-
-    def to_setting_kwargs(self):
-        raise NotImplementedError()
-
-
-class MomentumOptimizer(BaseSGDOptimizer):
-    """
-    MomentumOptimizer.
-
-    When sparse=True, the update scheme:
-
-    ..  math::
-
-        \\alpha_t &= \\alpha_{t-1} / k \\\\
-        \\beta_t &= \\beta_{t-1} / (1 + \\lambda \\gamma_t) \\\\
-        u_t &= u_{t-1} - \\alpha_t \\gamma_t g_t \\\\
-        v_t &= v_{t-1} + \\tau_{t-1} \\alpha_t \\gamma_t g_t \\\\
-        \\tau_t &= \\tau_{t-1} + \\beta_t / \\alpha_t
-    
-    where :math:`k` is momentum, :math:`\\lambda` is decay rate, 
-    :math:`\\gamma_t` is learning rate at the t'th step.
-
-    :param sparse: with sparse support or not.
-    :type sparse: bool
-    """
-
-    def extra_settings(self):
-        default_momentum(self.momentum)
-
-    def to_setting_kwargs(self):
-        if self.sparse:
-            return {'learning_method': 'sparse_momentum'}
-        else:
-            return {'learning_method': 'momentum'}
-
-    def __init__(self, momentum=None, sparse=False):
-        self.momentum = momentum
-        self.sparse = sparse
-
-
-class AdamOptimizer(BaseSGDOptimizer):
-    """
-    Adam optimizer.
-    The details of please refer `Adam: A Method for Stochastic Optimization
-    <https://arxiv.org/abs/1412.6980>`_
-
-    ..  math::
-
-        m(w, t) & = \\beta_1 m(w, t-1) + (1 - \\beta_1) \\nabla Q_i(w) \\\\
-        v(w, t) & = \\beta_2 v(w, t-1) + (1 - \\beta_2)(\\nabla Q_i(w)) ^2 \\\\
-        w & = w - \\frac{\\eta m(w, t)}{\\sqrt{v(w,t) + \\epsilon}}
-
-    :param beta1: the :math:`\\beta_1` in equation.
-    :type beta1: float
-    :param beta2: the :math:`\\beta_2` in equation.
-    :type beta2: float
-    :param epsilon: the :math:`\\epsilon` in equation. It is used to prevent
-                        divided by zero.
-    :type epsilon: float
-    """
-
-    @property
-    def is_support_sparse(self):
-        return False
-
-    def __init__(self, beta1=0.9, beta2=0.999, epsilon=1e-8):
-        self.beta1 = beta1
-        self.beta2 = beta2
-        self.epsilon = epsilon
-
-    def to_setting_kwargs(self):
-        return {
-            'learning_method': 'adam',
-            'adam_beta1': self.beta1,
-            'adam_beta2': self.beta2,
-            'adam_epsilon': self.epsilon
-        }
-
-
-class AdamaxOptimizer(BaseSGDOptimizer):
-    """
-    Adamax optimizer.
-
-    The details of please refer this `Adam: A Method for Stochastic Optimization
-    <https://arxiv.org/abs/1412.6980>`_
-
-    ..  math::
-
-        m_t & = \\beta_1 * m_{t-1} + (1-\\beta_1)* \\nabla Q_i(w) \\\\
-        u_t & = max(\\beta_2*u_{t-1}, abs(\\nabla Q_i(w))) \\\\
-        w_t & = w_{t-1} - (\\eta/(1-\\beta_1^t))*m_t/u_t
-
-    :param beta1: the :math:`\\beta_1` in the equation.
-    :type beta1: float
-    :param beta2: the :math:`\\beta_2` in the equation.
-    :type beta2: float
-    """
-
-    def __init__(self, beta1, beta2):
-        self.beta1 = beta1
-        self.beta2 = beta2
-
-    def to_setting_kwargs(self):
-        return {
-            'learning_method': 'adamax',
-            'adam_beta1': self.beta1,
-            'adam_beta2': self.beta2
-        }
-
-    @property
-    def is_support_sparse(self):
-        return False
-
-
-class AdaGradOptimizer(BaseSGDOptimizer):
-    """
-    Adagrad(for ADAptive GRAdient algorithm) optimizer.
-
-    For details please refer this `Adaptive Subgradient Methods for
-    Online Learning and Stochastic Optimization
-    <http://www.magicbroom.info/Papers/DuchiHaSi10.pdf>`_.
-
-    ..  math::
-
-        G &= \\sum_{\\tau=1}^{t} g_{\\tau} g_{\\tau}^T \\\\
-        w & = w - \\eta diag(G)^{-\\frac{1}{2}} \\circ g
-    """
-
-    def to_setting_kwargs(self):
-        return {'learning_method': 'adagrad'}
-
-    def __init__(self):
-        pass
-
-
-class RMSPropOptimizer(BaseSGDOptimizer):
-    """
-    RMSProp(for Root Mean Square Propagation) optimizer. For details please
-    refer this `slide <http://www.cs.toronto.edu/~tijmen/csc321/slides/
-    lecture_slides_lec6.pdf>`_.
-
-    The equations of this method as follows:
-
-    ..  math::
-
-        v(w, t) & = \\rho v(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 \\\\
-        w & = w - \\frac{\\eta} {\\sqrt{v(w,t) + \\epsilon}} \\nabla Q_{i}(w)
-
-    :param rho: the :math:`\\rho` in the equation. The forgetting factor.
-    :type rho: float
-    :param epsilon: the :math:`\\epsilon` in the equation.
-    :type epsilon: float
-    """
-
-    def to_setting_kwargs(self):
-        return {
-            'learning_method': 'rmsprop',
-            'ada_rou': self.rho,
-            'ada_epsilon': self.epsilon
-        }
-
-    def __init__(self, rho=0.95, epsilon=1e-6):
-        self.rho = rho
-        self.epsilon = epsilon
-
-
-class DecayedAdaGradOptimizer(BaseSGDOptimizer):
-    """
-    AdaGrad method with decayed sum gradients. The equations of this method
-    show as follow.
-
-    ..  math::
-
-        E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 \\\\
-        learning\\_rate &= 1/sqrt( ( E(g_t^2) + \\epsilon )
-
-    :param rho: The :math:`\\rho` parameter in that equation
-    :type rho: float
-    :param epsilon: The :math:`\\epsilon` parameter in that equation.
-    :type epsilon: float
-    """
-
-    def to_setting_kwargs(self):
-        return {
-            'learning_method': 'decayed_adagrad',
-            'ada_rou': self.rho,
-            'ada_epsilon': self.epsilon
-        }
-
-    def __init__(self, rho=0.95, epsilon=1e-6):
-        self.rho = rho
-        self.epsilon = epsilon
-
-
-class AdaDeltaOptimizer(BaseSGDOptimizer):
-    """
-    AdaDelta method. The details of adadelta please refer to this
-    `ADADELTA: AN ADAPTIVE LEARNING RATE METHOD
-    <http://www.matthewzeiler.com/pubs/googleTR2012/googleTR2012.pdf>`_.
-
-    ..  math::
-
-        E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 \\\\
-        learning\\_rate &= sqrt( ( E(dx_{t-1}^2) + \\epsilon ) / ( \\
-                          E(g_t^2) + \\epsilon ) ) \\\\
-        E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\\_rate)^2
-
-    :param rho: :math:`\\rho` in equation
-    :type rho: float
-    :param epsilon: :math:`\\rho` in equation
-    :type epsilon: float
-    """
-
-    def to_setting_kwargs(self):
-        return {
-            'learning_method': 'adadelta',
-            'ada_rou': self.rho,
-            'ada_epsilon': self.epsilon
-        }
-
-    def __init__(self, rho=0.95, epsilon=1e-6):
-        self.rho = rho
-        self.epsilon = epsilon
-
-
-class BaseRegularization(Optimizer):
-    def __init__(self):
-        self.algorithm = ""
-        self.learning_method = ""
-
-    def to_setting_kwargs(self):
-        return {}
-
-
-class L2Regularization(BaseRegularization):
-    def __init__(self, rate):
-        super(L2Regularization, self).__init__()
-        self.decay_rate = rate
-
-    def to_setting_kwargs(self):
-        if self.algorithm == 'owlqn':
-            return {'l2weight': self.decay_rate}
-        else:
-            return dict()
-
-    def extra_settings(self):
-        if self.algorithm == 'sgd' or self.algorithm == 'async_sgd':
-            default_decay_rate(self.decay_rate)
-
-
-class ModelAverage(Optimizer):
-    def to_setting_kwargs(self):
-        return {
-            'average_window': self.average_window,
-            'max_average_window': self.max_average_window,
-            'do_average_in_cpu': self.do_average_in_cpu
-        }
-
-    def __init__(self,
-                 average_window,
-                 max_average_window=None,
-                 do_average_in_cpu=False):
-        self.average_window = average_window
-        self.max_average_window = max_average_window
-        self.do_average_in_cpu = do_average_in_cpu
-
-
-class GradientClippingThreshold(Optimizer):
-    def extra_settings(self):
-        default_gradient_clipping_threshold(self.threshold)
-
-    def __init__(self, threshold):
-        self.threshold = threshold
-
-    def to_setting_kwargs(self):
-        return dict()
-
-
-def __extends__(dict1, dict2):
-    for key in dict2:
-        assert key not in dict1
-        dict1[key] = dict2[key]
-    return dict1
-
-
-@wrap_param_default(
-    ['learning_method'], default_factory=lambda _: MomentumOptimizer())
-@wrap_param_default(
-    ['regularization'], default_factory=lambda _: BaseRegularization())
-def settings(batch_size,
-             learning_rate=1e-3,
-             learning_rate_decay_a=0.,
-             learning_rate_decay_b=0.,
-             learning_rate_schedule='poly',
-             learning_rate_args='',
-             async_lagged_grad_discard_ratio=1.5,
-             learning_method=None,
-             regularization=None,
-             is_async=False,
-             model_average=None,
-             gradient_clipping_threshold=None):
-    """
-    Set the optimization method, learning rate, batch size, and other training
-    settings. The currently supported algorithms are SGD and Async-SGD.
-
-    ..  warning::
-
-        Note that the 'batch_size' in PaddlePaddle is not equal to global
-        training batch size. It represents the single training process's batch
-        size. If you use N processes to train one model, for example use three
-        GPU machines, the global batch size is N*'batch_size'.
-
-    :param batch_size: batch size for one training process.
-    :type batch_size: int
-    :param learning_rate: learning rate for SGD
-    :type learning_rate: float
-    :param learning_method: The extension optimization algorithms of gradient
-                            descent, such as momentum, adagrad, rmsprop, etc.
-                            Note that it should be instance with base type
-                            BaseSGDOptimizer.
-    :type learning_method: BaseSGDOptimizer
-    :param regularization: The regularization method.
-    :type regularization: BaseRegularization
-    :param is_async: Is Async-SGD or not. Default value is False.
-    :type is_async: bool
-    :param model_average: Model Average Settings.
-    :type model_average: ModelAverage
-    :param gradient_clipping_threshold: gradient clipping threshold. If gradient
-                                        value larger than some value, will be
-                                        clipped.
-    :type gradient_clipping_threshold: float
-    :param async_lagged_grad_discard_ratio: async SGD gradient commit control,
-          when async_lagged_grad_discard_ratio * num_gradient_servers commit passed, 
-          the current async SGD gradient is discarded.
-    :type async_lagged_grad_discard_ratio: float
-    """
-    if isinstance(regularization, BaseRegularization):
-        regularization = [regularization]
-
-    assert isinstance(learning_method, Optimizer)
-    if isinstance(learning_method, BaseSGDOptimizer):
-        algorithm = 'async_sgd' if is_async else 'sgd'
-    else:
-        algorithm = 'owlqn'
-
-    args = [
-        'batch_size', 'learning_rate', 'learning_rate_decay_a',
-        'learning_rate_decay_b', 'learning_rate_schedule', 'learning_rate_args',
-        'gradient_clipping_threshold', 'async_lagged_grad_discard_ratio'
-    ]
-    kwargs = dict()
-    kwargs['algorithm'] = algorithm
-    for arg in args:
-        kwargs[arg] = locals()[arg]
-
-    kwargs = __extends__(kwargs, learning_method.to_setting_kwargs())
-    learning_method.extra_settings()
-
-    for regular in regularization:
-        assert isinstance(regular, BaseRegularization)
-        regular.algorithm = algorithm
-        regular.learning_method = kwargs['learning_method']
-        kwargs = __extends__(kwargs, regular.to_setting_kwargs())
-        regular.extra_settings()
-
-    if gradient_clipping_threshold is not None:
-        gradient_clipping_threshold = GradientClippingThreshold(
-            threshold=gradient_clipping_threshold)
-
-    for each in [model_average, gradient_clipping_threshold]:
-        if each is not None:
-            assert isinstance(each, Optimizer)
-            each.algorithm = algorithm
-            each.learning_method = kwargs['learning_method']
-            kwargs = __extends__(kwargs, each.to_setting_kwargs())
-            each.extra_settings()
-
-    # Do Check?
-    Settings(**kwargs)
diff --git a/python/paddle/trainer_config_helpers/poolings.py b/python/paddle/trainer_config_helpers/poolings.py
deleted file mode 100644
index e0aeb311b3..0000000000
--- a/python/paddle/trainer_config_helpers/poolings.py
+++ /dev/null
@@ -1,148 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-"""
-
-__all__ = [
-    "BasePoolingType", "MaxPooling", "AvgPooling", "MaxWithMaskPooling",
-    "CudnnMaxPooling", "CudnnAvgPooling", "CudnnAvgInclPadPooling",
-    "SumPooling", "SquareRootNPooling"
-]
-
-
-class BasePoolingType(object):
-    """
-    Base Pooling Type.
-    Note these pooling types are used for sequence input, not for images.
-    Each PoolingType contains one parameter:
-
-    :param name: pooling layer type name used by paddle.
-    :type name: basestring
-
-    """
-
-    def __init__(self, name):
-        self.name = name
-
-
-class MaxPooling(BasePoolingType):
-    """
-    Max pooling.
-
-    Return the very large values for each dimension in sequence or time steps.
-
-    ..  math::
-
-        max(samples\\_of\\_a\\_sequence)
-
-    :param output_max_index: True if output sequence max index instead of max
-                             value. None means use default value in proto.
-    :type output_max_index: bool|None
-    """
-
-    def __init__(self, output_max_index=None):
-        BasePoolingType.__init__(self, "max")
-        self.output_max_index = output_max_index
-
-
-class MaxWithMaskPooling(BasePoolingType):
-    """
-    MaxWithMask pooling.
-
-    Not only return the very large values for each dimension in sequence or time steps,
-    but also the location indices of found maxinum values.
-
-    """
-
-    def __init__(self):
-        BasePoolingType.__init__(self, "max-pool-with-mask")
-
-
-class CudnnMaxPooling(BasePoolingType):
-    """
-    Cudnn max pooling only support GPU. Return the maxinum value in the
-    pooling window.
-    """
-
-    def __init__(self):
-        BasePoolingType.__init__(self, "cudnn-max-pool")
-
-
-class CudnnAvgPooling(BasePoolingType):
-    """
-    Cudnn average pooling only support GPU. Return the average value in the
-    pooling window.
-    """
-
-    def __init__(self):
-        BasePoolingType.__init__(self, "cudnn-avg-pool")
-
-
-class CudnnAvgInclPadPooling(BasePoolingType):
-    """
-    Cudnn average pooling only support GPU. Return the average value in the
-    pooling window taking into account the padding cells.
-    """
-
-    def __init__(self):
-        BasePoolingType.__init__(self, "cudnn-avg-incl-pad-pool")
-
-
-class AvgPooling(BasePoolingType):
-    """
-    Average pooling.
-
-    Return the average values for each dimension in sequence or time steps.
-
-    ..  math::
-
-        sum(samples\\_of\\_a\\_sequence)/sample\\_num
-    """
-    STRATEGY_AVG = "average"
-    STRATEGY_SUM = "sum"
-    STRATEGY_SQROOTN = "squarerootn"
-
-    def __init__(self, strategy=STRATEGY_AVG):
-        BasePoolingType.__init__(self, "average")
-        self.strategy = strategy
-
-
-class SumPooling(AvgPooling):
-    """
-    Sum pooling.
-
-    Return the sum values of each dimension in sequence or time steps.
-
-    ..  math::
-
-        sum(samples\\_of\\_a\\_sequence)
-    """
-
-    def __init__(self):
-        AvgPooling.__init__(self, AvgPooling.STRATEGY_SUM)
-
-
-class SquareRootNPooling(AvgPooling):
-    """
-    Square Root Pooling.
-
-    Return the square root values of each dimension in sequence or time steps.
-
-    ..  math::
-
-        sum(samples\\_of\\_a\\_sequence)/sqrt(sample\\_num)
-    """
-
-    def __init__(self):
-        AvgPooling.__init__(self, AvgPooling.STRATEGY_SQROOTN)
diff --git a/python/paddle/trainer_config_helpers/tests/CMakeLists.txt b/python/paddle/trainer_config_helpers/tests/CMakeLists.txt
deleted file mode 100644
index 30e0b9906c..0000000000
--- a/python/paddle/trainer_config_helpers/tests/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-#################### test_config_parser #########################
-add_test(NAME layers_test
-  COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_BINARY_DIR}/python/
-        ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/python/paddle/trainer_config_helpers/tests/layers_test.py
-    WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/python/paddle)
-
-add_test(NAME test_reset_hook
-  COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_BINARY_DIR}/python/
-        ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/python/paddle/trainer_config_helpers/tests/test_reset_hook.py
-    WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/python/paddle)
-
-add_paddle_exe(protobuf_equal ProtobufEqualMain.cpp)
-add_test(NAME test_layerHelpers
-  COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_BINARY_DIR}/python/
-  ${PADDLE_BINARY_DIR}/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh ${PYTHON_EXECUTABLE}
-  ${CMAKE_CURRENT_BINARY_DIR}/protobuf_equal
-)
diff --git a/python/paddle/trainer_config_helpers/tests/ProtobufEqualMain.cpp b/python/paddle/trainer_config_helpers/tests/ProtobufEqualMain.cpp
deleted file mode 100644
index 7b10e0b7a6..0000000000
--- a/python/paddle/trainer_config_helpers/tests/ProtobufEqualMain.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <google/protobuf/text_format.h>
-#include <google/protobuf/util/message_differencer.h>
-#include <fstream>
-#include <iostream>
-#include "TrainerConfig.pb.h"
-
-bool loadPb(google::protobuf::Message* conf, const std::string& filename) {
-  std::ifstream fin;
-  fin.open(filename.c_str());
-  if (fin.is_open()) {
-    std::string str((std::istreambuf_iterator<char>(fin)),
-                    std::istreambuf_iterator<char>());
-    bool ok = google::protobuf::TextFormat::ParseFromString(str, conf);
-    fin.close();
-    return ok;
-  } else {
-    return false;
-  }
-}
-
-int main(int argc, char** argv) {
-  std::unique_ptr<google::protobuf::Message> config1;
-  std::unique_ptr<google::protobuf::Message> config2;
-  if (argc == 3) {
-    config1.reset(new paddle::ModelConfig());
-    config2.reset(new paddle::ModelConfig());
-  } else if (argc == 4) {
-    config1.reset(new paddle::TrainerConfig());
-    config2.reset(new paddle::TrainerConfig());
-  }
-  if (!config1 || !config2) {
-    return 1;
-  } else if (!loadPb(config1.get(), argv[1])) {
-    return 2;
-  } else if (!loadPb(config2.get(), argv[2])) {
-    return 3;
-  } else {
-    if (google::protobuf::util::MessageDifferencer::ApproximatelyEquals(
-            *config1, *config2)) {
-      return 0;
-    } else {
-      return 4;
-    }
-  }
-}
diff --git a/python/paddle/trainer_config_helpers/tests/configs/.gitignore b/python/paddle/trainer_config_helpers/tests/configs/.gitignore
deleted file mode 100644
index c654bd41b0..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-protostr/*.unittest
diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
deleted file mode 100755
index 10c941f707..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-export configs=(test_repeat_layer test_fc layer_activations projections test_print_layer
-test_sequence_pooling test_lstmemory_layer test_grumemory_layer
-last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
-img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers
-test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight
-test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
-test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
-test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
-test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer
-test_kmax_seq_socre_layer test_sub_nested_seq_select_layer test_scale_shift_layer
-test_seq_slice_layer test_cross_entropy_over_beam test_roi_pool_layer test_pooling3D_layer
-test_conv3d_layer test_deconv3d_layer test_BatchNorm3D test_resize_layer
-test_scale_sub_region_layer test_dot_prod_layer test_l2_distance_layer
-test_factorization_machine)
-
-export whole_configs=(test_split_datasource)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh b/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
deleted file mode 100755
index 44a75a60cc..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-
-set -e
-cd `dirname $0`
-
-protostr=$PWD/protostr
-. file_list.sh
-
-for conf in ${configs[*]}
-do
-    echo "Generating " $conf
-    $1 -m paddle.utils.dump_config $conf.py > $protostr/$conf.protostr.unittest
-    if [ ! -f "$protostr/$conf.protostr" ]; then 
-        cp $protostr/$conf.protostr.unittest $protostr/$conf.protostr
-    fi
-    cat ${conf}.py |$1 test_config_parser_for_non_file_config.py > $protostr/$conf.protostr.non_file_config.unittest
-done
-
-for conf in ${whole_configs[*]}
-do
-    echo "Generating " $conf
-    $1 -m paddle.utils.dump_config $conf.py "" --whole > $protostr/$conf.protostr.unittest
-    if [ ! -f "$protostr/$conf.protostr" ]; then 
-        cp $protostr/$conf.protostr.unittest $protostr/$conf.protostr
-    fi
-    cat ${conf}.py |$1 test_config_parser_for_non_file_config.py --whole > $protostr/$conf.protostr.non_file_config.unittest
-done
diff --git a/python/paddle/trainer_config_helpers/tests/configs/img_layers.py b/python/paddle/trainer_config_helpers/tests/configs/img_layers.py
deleted file mode 100644
index 767b645424..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/img_layers.py
+++ /dev/null
@@ -1,38 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(learning_rate=1e-3, batch_size=1000)
-
-img = data_layer(name='image', size=256 * 256)
-
-# the parse_conv in config_parse.py is not strictly accurate when filter_size
-# is not square. So here set square filter_size.
-img_conv = img_conv_layer(
-    input=img,
-    num_channels=1,
-    num_filters=64,
-    filter_size=(32, 32),
-    padding=(1, 1),
-    dilation=(1, 1),
-    stride=(1, 1),
-    act=LinearActivation())
-img_bn = batch_norm_layer(input=img_conv, act=ReluActivation())
-
-img_norm = img_cmrnorm_layer(input=img_bn, size=32)
-
-img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling())
-
-outputs(img_pool, img_norm)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py b/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py
deleted file mode 100644
index e17c8fa7c0..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py
+++ /dev/null
@@ -1,38 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(learning_rate=1e-3, batch_size=1000)
-
-img = data_layer(name='image', size=227 * 227)
-
-# the parse_conv in config_parse.py is not strictly accurate when filter_size
-# is not square. So here set square filter_size.
-img_conv = img_conv_layer(
-    input=img,
-    num_channels=1,
-    num_filters=64,
-    filter_size=(32, 32),
-    padding=(1, 1),
-    stride=(1, 1),
-    act=LinearActivation(),
-    trans=True)
-img_bn = batch_norm_layer(input=img_conv, act=ReluActivation())
-
-img_norm = img_cmrnorm_layer(input=img_bn, size=32)
-
-img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling())
-
-outputs(img_pool, img_norm)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py b/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
deleted file mode 100644
index 5b6d2627e4..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-5)
-
-din = data_layer(name='data', size=30)
-
-seq_op = [first_seq, last_seq]
-
-agg_level = [AggregateLevel.TO_SEQUENCE, AggregateLevel.TO_NO_SEQUENCE]
-
-opts = []
-
-for op in seq_op:
-    for al in agg_level:
-        opts.append(op(input=din, agg_level=al))
-
-for op in seq_op:
-    opts.append(
-        op(input=din, agg_level=AggregateLevel.TO_NO_SEQUENCE, stride=5))
-
-outputs(opts)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/layer_activations.py b/python/paddle/trainer_config_helpers/tests/configs/layer_activations.py
deleted file mode 100644
index ac1f7e02c0..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/layer_activations.py
+++ /dev/null
@@ -1,34 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-'''
-Test all activations.
-'''
-
-from paddle.trainer_config_helpers import *
-
-settings(learning_rate=1e-4, batch_size=1000)
-
-din = data_layer(name='input', size=100)
-
-acts = [
-    TanhActivation, SigmoidActivation, SoftmaxActivation, IdentityActivation,
-    LinearActivation, ExpActivation, ReluActivation, BReluActivation,
-    SoftReluActivation, STanhActivation, AbsActivation, SquareActivation
-]
-
-outputs([
-    fc_layer(
-        input=din, size=100, act=act(), name="layer_%d" % i)
-    for i, act in enumerate(acts)
-])
diff --git a/python/paddle/trainer_config_helpers/tests/configs/math_ops.py b/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
deleted file mode 100644
index 29dc634fb3..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-5)
-
-x = data_layer(name='data', size=100)
-x = layer_math.exp(x)
-x = layer_math.sqrt(x)
-x = layer_math.reciprocal(x)
-x = layer_math.log(x)
-x = layer_math.abs(x)
-x = layer_math.sigmoid(x)
-x = layer_math.tanh(x)
-x = layer_math.square(x)
-x = layer_math.relu(x)
-y = 1 + x
-y = y + 1
-y = x + y
-y = y - x
-y = y - 2
-y = 2 - y
-y = 2 * y
-y = y * 3
-z = data_layer(name='data_2', size=1)
-y = y * z
-y = z * y
-y = y + z
-y = z + y
-outputs(y)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/projections.py b/python/paddle/trainer_config_helpers/tests/configs/projections.py
deleted file mode 100644
index 3b7a196d1c..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/projections.py
+++ /dev/null
@@ -1,80 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-'''
-Test mixed layer, projections and operators.
-'''
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-4)
-
-din = data_layer(name='test', size=100)
-
-din = embedding_layer(input=din, size=256)
-
-with mixed_layer(size=100) as m1:
-    m1 += full_matrix_projection(input=din)
-
-with mixed_layer(size=100) as m2:
-    m2 += table_projection(input=m1)
-
-with mixed_layer(size=100) as m3:
-    m3 += identity_projection(input=m2)
-
-with mixed_layer(size=100) as m4:
-    m4 += dotmul_projection(input=m3)
-
-with mixed_layer() as m5:
-    m5 += context_projection(input=m4, context_len=3)
-
-with mixed_layer() as m6:
-    m6 += dotmul_operator(a=m3, b=m4)
-    m6 += scaling_projection(m3)
-
-img = data_layer(name='img', size=32 * 32)
-flt = data_layer(name='filter', size=3 * 3 * 1 * 64)
-
-with mixed_layer() as m7:
-    m7 += conv_operator(
-        img=img, filter=flt, num_filters=64, num_channels=1, filter_size=3)
-    m7 += conv_projection(img, filter_size=3, num_filters=64, num_channels=1)
-
-with mixed_layer() as m8:
-    m8 += conv_operator(
-        img=img,
-        filter=flt,
-        num_filters=64,
-        num_channels=1,
-        filter_size=3,
-        stride=2,
-        padding=1,
-        trans=True)
-    m8 += conv_projection(
-        img,
-        filter_size=3,
-        num_filters=64,
-        num_channels=1,
-        stride=2,
-        padding=1,
-        trans=True)
-end = mixed_layer(
-    input=[
-        full_matrix_projection(input=m5),
-        trans_full_matrix_projection(input=m6),
-        full_matrix_projection(input=m7), full_matrix_projection(input=m8)
-    ],
-    size=100,
-    layer_attr=ExtraAttr(
-        drop_rate=0.5, error_clipping_threshold=40))
-
-outputs(end)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr
deleted file mode 100644
index 3e0f957648..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr
+++ /dev/null
@@ -1,193 +0,0 @@
-type: "nn"
-layers {
-  name: "image"
-  type: "data"
-  size: 65536
-  active_type: ""
-}
-layers {
-  name: "__conv_0__"
-  type: "exconv"
-  size: 3297856
-  active_type: ""
-  inputs {
-    input_layer_name: "image"
-    input_parameter_name: "___conv_0__.w0"
-    conv_conf {
-      filter_size: 32
-      channels: 1
-      stride: 1
-      padding: 1
-      groups: 1
-      filter_channels: 1
-      output_x: 227
-      img_size: 256
-      caffe_mode: true
-      filter_size_y: 32
-      padding_y: 1
-      stride_y: 1
-      output_y: 227
-      img_size_y: 256
-      dilation: 1
-      dilation_y: 1
-    }
-  }
-  bias_parameter_name: "___conv_0__.wbias"
-  num_filters: 64
-  shared_biases: true
-  height: 227
-  width: 227
-}
-layers {
-  name: "__batch_norm_0__"
-  type: "batch_norm"
-  size: 3297856
-  active_type: "relu"
-  inputs {
-    input_layer_name: "__conv_0__"
-    input_parameter_name: "___batch_norm_0__.w0"
-    image_conf {
-      channels: 64
-      img_size: 227
-      img_size_y: 227
-    }
-  }
-  inputs {
-    input_layer_name: "__conv_0__"
-    input_parameter_name: "___batch_norm_0__.w1"
-  }
-  inputs {
-    input_layer_name: "__conv_0__"
-    input_parameter_name: "___batch_norm_0__.w2"
-  }
-  bias_parameter_name: "___batch_norm_0__.wbias"
-  moving_average_fraction: 0.9
-  height: 227
-  width: 227
-  depth: 1
-  epsilon: 1e-05
-}
-layers {
-  name: "__crmnorm_0__"
-  type: "norm"
-  size: 3297856
-  active_type: ""
-  inputs {
-    input_layer_name: "__batch_norm_0__"
-    norm_conf {
-      norm_type: "cmrnorm-projection"
-      channels: 64
-      size: 32
-      scale: 0.0004
-      pow: 0.75
-      output_x: 227
-      img_size: 227
-      blocked: false
-      output_y: 227
-      img_size_y: 227
-    }
-  }
-  height: 227
-  width: 227
-}
-layers {
-  name: "__pool_0__"
-  type: "pool"
-  size: 2458624
-  active_type: ""
-  inputs {
-    input_layer_name: "__conv_0__"
-    pool_conf {
-      pool_type: "max-projection"
-      channels: 64
-      size_x: 32
-      stride: 1
-      output_x: 196
-      img_size: 227
-      padding: 0
-      size_y: 32
-      stride_y: 1
-      output_y: 196
-      img_size_y: 227
-      padding_y: 0
-    }
-  }
-  height: 196
-  width: 196
-}
-parameters {
-  name: "___conv_0__.w0"
-  size: 65536
-  initial_mean: 0.0
-  initial_std: 0.0441941738242
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___conv_0__.wbias"
-  size: 64
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 64
-  dims: 1
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___batch_norm_0__.w0"
-  size: 64
-  initial_mean: 1.0
-  initial_std: 0.0
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___batch_norm_0__.w1"
-  size: 64
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 64
-  initial_strategy: 0
-  initial_smart: false
-  is_static: true
-  is_shared: true
-}
-parameters {
-  name: "___batch_norm_0__.w2"
-  size: 64
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 64
-  initial_strategy: 0
-  initial_smart: false
-  is_static: true
-  is_shared: true
-}
-parameters {
-  name: "___batch_norm_0__.wbias"
-  size: 64
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 64
-  initial_strategy: 0
-  initial_smart: false
-}
-input_layer_names: "image"
-output_layer_names: "__pool_0__"
-output_layer_names: "__crmnorm_0__"
-sub_models {
-  name: "root"
-  layer_names: "image"
-  layer_names: "__conv_0__"
-  layer_names: "__batch_norm_0__"
-  layer_names: "__crmnorm_0__"
-  layer_names: "__pool_0__"
-  input_layer_names: "image"
-  output_layer_names: "__pool_0__"
-  output_layer_names: "__crmnorm_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr
deleted file mode 100644
index a18a4652e1..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr
+++ /dev/null
@@ -1,193 +0,0 @@
-type: "nn"
-layers {
-  name: "image"
-  type: "data"
-  size: 51529
-  active_type: ""
-}
-layers {
-  name: "__conv_0__"
-  type: "exconvt"
-  size: 4194304
-  active_type: ""
-  inputs {
-    input_layer_name: "image"
-    input_parameter_name: "___conv_0__.w0"
-    conv_conf {
-      filter_size: 32
-      channels: 1
-      stride: 1
-      padding: 1
-      groups: 1
-      filter_channels: 64
-      output_x: 227
-      img_size: 256
-      caffe_mode: true
-      filter_size_y: 32
-      padding_y: 1
-      stride_y: 1
-      output_y: 227
-      img_size_y: 256
-      dilation: 1
-      dilation_y: 1
-    }
-  }
-  bias_parameter_name: "___conv_0__.wbias"
-  num_filters: 64
-  shared_biases: true
-  height: 256
-  width: 256
-}
-layers {
-  name: "__batch_norm_0__"
-  type: "batch_norm"
-  size: 4194304
-  active_type: "relu"
-  inputs {
-    input_layer_name: "__conv_0__"
-    input_parameter_name: "___batch_norm_0__.w0"
-    image_conf {
-      channels: 64
-      img_size: 256
-      img_size_y: 256
-    }
-  }
-  inputs {
-    input_layer_name: "__conv_0__"
-    input_parameter_name: "___batch_norm_0__.w1"
-  }
-  inputs {
-    input_layer_name: "__conv_0__"
-    input_parameter_name: "___batch_norm_0__.w2"
-  }
-  bias_parameter_name: "___batch_norm_0__.wbias"
-  moving_average_fraction: 0.9
-  height: 256
-  width: 256
-  depth: 1
-  epsilon: 1e-05
-}
-layers {
-  name: "__crmnorm_0__"
-  type: "norm"
-  size: 4194304
-  active_type: ""
-  inputs {
-    input_layer_name: "__batch_norm_0__"
-    norm_conf {
-      norm_type: "cmrnorm-projection"
-      channels: 64
-      size: 32
-      scale: 0.0004
-      pow: 0.75
-      output_x: 256
-      img_size: 256
-      blocked: false
-      output_y: 256
-      img_size_y: 256
-    }
-  }
-  height: 256
-  width: 256
-}
-layers {
-  name: "__pool_0__"
-  type: "pool"
-  size: 3240000
-  active_type: ""
-  inputs {
-    input_layer_name: "__conv_0__"
-    pool_conf {
-      pool_type: "max-projection"
-      channels: 64
-      size_x: 32
-      stride: 1
-      output_x: 225
-      img_size: 256
-      padding: 0
-      size_y: 32
-      stride_y: 1
-      output_y: 225
-      img_size_y: 256
-      padding_y: 0
-    }
-  }
-  height: 225
-  width: 225
-}
-parameters {
-  name: "___conv_0__.w0"
-  size: 65536
-  initial_mean: 0.0
-  initial_std: 0.0441941738242
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___conv_0__.wbias"
-  size: 64
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 64
-  dims: 1
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___batch_norm_0__.w0"
-  size: 64
-  initial_mean: 1.0
-  initial_std: 0.0
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___batch_norm_0__.w1"
-  size: 64
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 64
-  initial_strategy: 0
-  initial_smart: false
-  is_static: true
-  is_shared: true
-}
-parameters {
-  name: "___batch_norm_0__.w2"
-  size: 64
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 64
-  initial_strategy: 0
-  initial_smart: false
-  is_static: true
-  is_shared: true
-}
-parameters {
-  name: "___batch_norm_0__.wbias"
-  size: 64
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 64
-  initial_strategy: 0
-  initial_smart: false
-}
-input_layer_names: "image"
-output_layer_names: "__pool_0__"
-output_layer_names: "__crmnorm_0__"
-sub_models {
-  name: "root"
-  layer_names: "image"
-  layer_names: "__conv_0__"
-  layer_names: "__batch_norm_0__"
-  layer_names: "__crmnorm_0__"
-  layer_names: "__pool_0__"
-  input_layer_names: "image"
-  output_layer_names: "__pool_0__"
-  output_layer_names: "__crmnorm_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr
deleted file mode 100644
index fee0f8e462..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr
+++ /dev/null
@@ -1,102 +0,0 @@
-type: "nn"
-layers {
-  name: "data"
-  type: "data"
-  size: 30
-  active_type: ""
-}
-layers {
-  name: "__first_seq_0__"
-  type: "seqlastins"
-  size: 30
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-  }
-  select_first: true
-  trans_type: "seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__first_seq_1__"
-  type: "seqlastins"
-  size: 30
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-  }
-  select_first: true
-  trans_type: "non-seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__last_seq_0__"
-  type: "seqlastins"
-  size: 30
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-  }
-  trans_type: "seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__last_seq_1__"
-  type: "seqlastins"
-  size: 30
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-  }
-  trans_type: "non-seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__first_seq_2__"
-  type: "seqlastins"
-  size: 30
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-  }
-  select_first: true
-  trans_type: "non-seq"
-  seq_pool_stride: 5
-}
-layers {
-  name: "__last_seq_2__"
-  type: "seqlastins"
-  size: 30
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-  }
-  trans_type: "non-seq"
-  seq_pool_stride: 5
-}
-input_layer_names: "data"
-output_layer_names: "__first_seq_0__"
-output_layer_names: "__first_seq_1__"
-output_layer_names: "__last_seq_0__"
-output_layer_names: "__last_seq_1__"
-output_layer_names: "__first_seq_2__"
-output_layer_names: "__last_seq_2__"
-sub_models {
-  name: "root"
-  layer_names: "data"
-  layer_names: "__first_seq_0__"
-  layer_names: "__first_seq_1__"
-  layer_names: "__last_seq_0__"
-  layer_names: "__last_seq_1__"
-  layer_names: "__first_seq_2__"
-  layer_names: "__last_seq_2__"
-  input_layer_names: "data"
-  output_layer_names: "__first_seq_0__"
-  output_layer_names: "__first_seq_1__"
-  output_layer_names: "__last_seq_0__"
-  output_layer_names: "__last_seq_1__"
-  output_layer_names: "__first_seq_2__"
-  output_layer_names: "__last_seq_2__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/layer_activations.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/layer_activations.protostr
deleted file mode 100644
index ecf39e4d32..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/layer_activations.protostr
+++ /dev/null
@@ -1,423 +0,0 @@
-type: "nn"
-layers {
-  name: "input"
-  type: "data"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "layer_0"
-  type: "fc"
-  size: 100
-  active_type: "tanh"
-  inputs {
-    input_layer_name: "input"
-    input_parameter_name: "_layer_0.w0"
-  }
-  bias_parameter_name: "_layer_0.wbias"
-}
-layers {
-  name: "layer_1"
-  type: "fc"
-  size: 100
-  active_type: "sigmoid"
-  inputs {
-    input_layer_name: "input"
-    input_parameter_name: "_layer_1.w0"
-  }
-  bias_parameter_name: "_layer_1.wbias"
-}
-layers {
-  name: "layer_2"
-  type: "fc"
-  size: 100
-  active_type: "softmax"
-  inputs {
-    input_layer_name: "input"
-    input_parameter_name: "_layer_2.w0"
-  }
-  bias_parameter_name: "_layer_2.wbias"
-}
-layers {
-  name: "layer_3"
-  type: "fc"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "input"
-    input_parameter_name: "_layer_3.w0"
-  }
-  bias_parameter_name: "_layer_3.wbias"
-}
-layers {
-  name: "layer_4"
-  type: "fc"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "input"
-    input_parameter_name: "_layer_4.w0"
-  }
-  bias_parameter_name: "_layer_4.wbias"
-}
-layers {
-  name: "layer_5"
-  type: "fc"
-  size: 100
-  active_type: "exponential"
-  inputs {
-    input_layer_name: "input"
-    input_parameter_name: "_layer_5.w0"
-  }
-  bias_parameter_name: "_layer_5.wbias"
-}
-layers {
-  name: "layer_6"
-  type: "fc"
-  size: 100
-  active_type: "relu"
-  inputs {
-    input_layer_name: "input"
-    input_parameter_name: "_layer_6.w0"
-  }
-  bias_parameter_name: "_layer_6.wbias"
-}
-layers {
-  name: "layer_7"
-  type: "fc"
-  size: 100
-  active_type: "brelu"
-  inputs {
-    input_layer_name: "input"
-    input_parameter_name: "_layer_7.w0"
-  }
-  bias_parameter_name: "_layer_7.wbias"
-}
-layers {
-  name: "layer_8"
-  type: "fc"
-  size: 100
-  active_type: "softrelu"
-  inputs {
-    input_layer_name: "input"
-    input_parameter_name: "_layer_8.w0"
-  }
-  bias_parameter_name: "_layer_8.wbias"
-}
-layers {
-  name: "layer_9"
-  type: "fc"
-  size: 100
-  active_type: "stanh"
-  inputs {
-    input_layer_name: "input"
-    input_parameter_name: "_layer_9.w0"
-  }
-  bias_parameter_name: "_layer_9.wbias"
-}
-layers {
-  name: "layer_10"
-  type: "fc"
-  size: 100
-  active_type: "abs"
-  inputs {
-    input_layer_name: "input"
-    input_parameter_name: "_layer_10.w0"
-  }
-  bias_parameter_name: "_layer_10.wbias"
-}
-layers {
-  name: "layer_11"
-  type: "fc"
-  size: 100
-  active_type: "square"
-  inputs {
-    input_layer_name: "input"
-    input_parameter_name: "_layer_11.w0"
-  }
-  bias_parameter_name: "_layer_11.wbias"
-}
-parameters {
-  name: "_layer_0.w0"
-  size: 10000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 100
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "_layer_0.wbias"
-  size: 100
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 100
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "_layer_1.w0"
-  size: 10000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 100
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "_layer_1.wbias"
-  size: 100
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 100
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "_layer_2.w0"
-  size: 10000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 100
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "_layer_2.wbias"
-  size: 100
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 100
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "_layer_3.w0"
-  size: 10000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 100
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "_layer_3.wbias"
-  size: 100
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 100
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "_layer_4.w0"
-  size: 10000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 100
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "_layer_4.wbias"
-  size: 100
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 100
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "_layer_5.w0"
-  size: 10000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 100
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "_layer_5.wbias"
-  size: 100
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 100
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "_layer_6.w0"
-  size: 10000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 100
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "_layer_6.wbias"
-  size: 100
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 100
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "_layer_7.w0"
-  size: 10000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 100
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "_layer_7.wbias"
-  size: 100
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 100
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "_layer_8.w0"
-  size: 10000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 100
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "_layer_8.wbias"
-  size: 100
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 100
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "_layer_9.w0"
-  size: 10000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 100
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "_layer_9.wbias"
-  size: 100
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 100
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "_layer_10.w0"
-  size: 10000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 100
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "_layer_10.wbias"
-  size: 100
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 100
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "_layer_11.w0"
-  size: 10000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 100
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "_layer_11.wbias"
-  size: 100
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 100
-  initial_strategy: 0
-  initial_smart: false
-}
-input_layer_names: "input"
-output_layer_names: "layer_0"
-output_layer_names: "layer_1"
-output_layer_names: "layer_2"
-output_layer_names: "layer_3"
-output_layer_names: "layer_4"
-output_layer_names: "layer_5"
-output_layer_names: "layer_6"
-output_layer_names: "layer_7"
-output_layer_names: "layer_8"
-output_layer_names: "layer_9"
-output_layer_names: "layer_10"
-output_layer_names: "layer_11"
-sub_models {
-  name: "root"
-  layer_names: "input"
-  layer_names: "layer_0"
-  layer_names: "layer_1"
-  layer_names: "layer_2"
-  layer_names: "layer_3"
-  layer_names: "layer_4"
-  layer_names: "layer_5"
-  layer_names: "layer_6"
-  layer_names: "layer_7"
-  layer_names: "layer_8"
-  layer_names: "layer_9"
-  layer_names: "layer_10"
-  layer_names: "layer_11"
-  input_layer_names: "input"
-  output_layer_names: "layer_0"
-  output_layer_names: "layer_1"
-  output_layer_names: "layer_2"
-  output_layer_names: "layer_3"
-  output_layer_names: "layer_4"
-  output_layer_names: "layer_5"
-  output_layer_names: "layer_6"
-  output_layer_names: "layer_7"
-  output_layer_names: "layer_8"
-  output_layer_names: "layer_9"
-  output_layer_names: "layer_10"
-  output_layer_names: "layer_11"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr
deleted file mode 100644
index 582207741a..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr
+++ /dev/null
@@ -1,413 +0,0 @@
-type: "nn"
-layers {
-  name: "data"
-  type: "data"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "__exp_0__"
-  type: "mixed"
-  size: 100
-  active_type: "exponential"
-  inputs {
-    input_layer_name: "data"
-    proj_conf {
-      type: "identity"
-      name: "___exp_0__.w0"
-      input_size: 100
-      output_size: 100
-    }
-  }
-}
-layers {
-  name: "__sqrt_0__"
-  type: "mixed"
-  size: 100
-  active_type: "sqrt"
-  inputs {
-    input_layer_name: "__exp_0__"
-    proj_conf {
-      type: "identity"
-      name: "___sqrt_0__.w0"
-      input_size: 100
-      output_size: 100
-    }
-  }
-}
-layers {
-  name: "__reciprocal_0__"
-  type: "mixed"
-  size: 100
-  active_type: "reciprocal"
-  inputs {
-    input_layer_name: "__sqrt_0__"
-    proj_conf {
-      type: "identity"
-      name: "___reciprocal_0__.w0"
-      input_size: 100
-      output_size: 100
-    }
-  }
-}
-layers {
-  name: "__log_0__"
-  type: "mixed"
-  size: 100
-  active_type: "log"
-  inputs {
-    input_layer_name: "__reciprocal_0__"
-    proj_conf {
-      type: "identity"
-      name: "___log_0__.w0"
-      input_size: 100
-      output_size: 100
-    }
-  }
-}
-layers {
-  name: "__abs_0__"
-  type: "mixed"
-  size: 100
-  active_type: "abs"
-  inputs {
-    input_layer_name: "__log_0__"
-    proj_conf {
-      type: "identity"
-      name: "___abs_0__.w0"
-      input_size: 100
-      output_size: 100
-    }
-  }
-}
-layers {
-  name: "__sigmoid_0__"
-  type: "mixed"
-  size: 100
-  active_type: "sigmoid"
-  inputs {
-    input_layer_name: "__abs_0__"
-    proj_conf {
-      type: "identity"
-      name: "___sigmoid_0__.w0"
-      input_size: 100
-      output_size: 100
-    }
-  }
-}
-layers {
-  name: "__tanh_0__"
-  type: "mixed"
-  size: 100
-  active_type: "tanh"
-  inputs {
-    input_layer_name: "__sigmoid_0__"
-    proj_conf {
-      type: "identity"
-      name: "___tanh_0__.w0"
-      input_size: 100
-      output_size: 100
-    }
-  }
-}
-layers {
-  name: "__square_0__"
-  type: "mixed"
-  size: 100
-  active_type: "square"
-  inputs {
-    input_layer_name: "__tanh_0__"
-    proj_conf {
-      type: "identity"
-      name: "___square_0__.w0"
-      input_size: 100
-      output_size: 100
-    }
-  }
-}
-layers {
-  name: "__relu_0__"
-  type: "mixed"
-  size: 100
-  active_type: "relu"
-  inputs {
-    input_layer_name: "__square_0__"
-    proj_conf {
-      type: "identity"
-      name: "___relu_0__.w0"
-      input_size: 100
-      output_size: 100
-    }
-  }
-}
-layers {
-  name: "__slope_intercept_layer_0__"
-  type: "slope_intercept"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__relu_0__"
-  }
-  slope: 1.0
-  intercept: 1
-}
-layers {
-  name: "__slope_intercept_layer_1__"
-  type: "slope_intercept"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__slope_intercept_layer_0__"
-  }
-  slope: 1.0
-  intercept: 1
-}
-layers {
-  name: "__mixed_0__"
-  type: "mixed"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__relu_0__"
-    proj_conf {
-      type: "identity"
-      name: "___mixed_0__.w0"
-      input_size: 100
-      output_size: 100
-    }
-  }
-  inputs {
-    input_layer_name: "__slope_intercept_layer_1__"
-    proj_conf {
-      type: "identity"
-      name: "___mixed_0__.w1"
-      input_size: 100
-      output_size: 100
-    }
-  }
-}
-layers {
-  name: "__slope_intercept_layer_2__"
-  type: "slope_intercept"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__relu_0__"
-  }
-  slope: -1.0
-  intercept: 0.0
-}
-layers {
-  name: "__mixed_1__"
-  type: "mixed"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__mixed_0__"
-    proj_conf {
-      type: "identity"
-      name: "___mixed_1__.w0"
-      input_size: 100
-      output_size: 100
-    }
-  }
-  inputs {
-    input_layer_name: "__slope_intercept_layer_2__"
-    proj_conf {
-      type: "identity"
-      name: "___mixed_1__.w1"
-      input_size: 100
-      output_size: 100
-    }
-  }
-}
-layers {
-  name: "__slope_intercept_layer_3__"
-  type: "slope_intercept"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__mixed_1__"
-  }
-  slope: 1.0
-  intercept: -2
-}
-layers {
-  name: "__slope_intercept_layer_4__"
-  type: "slope_intercept"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__slope_intercept_layer_3__"
-  }
-  slope: -1.0
-  intercept: 0.0
-}
-layers {
-  name: "__slope_intercept_layer_5__"
-  type: "slope_intercept"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__slope_intercept_layer_4__"
-  }
-  slope: 1.0
-  intercept: 2
-}
-layers {
-  name: "__slope_intercept_layer_6__"
-  type: "slope_intercept"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__slope_intercept_layer_5__"
-  }
-  slope: 2
-  intercept: 0.0
-}
-layers {
-  name: "__slope_intercept_layer_7__"
-  type: "slope_intercept"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__slope_intercept_layer_6__"
-  }
-  slope: 3
-  intercept: 0.0
-}
-layers {
-  name: "data_2"
-  type: "data"
-  size: 1
-  active_type: ""
-}
-layers {
-  name: "__scaling_layer_0__"
-  type: "scaling"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "data_2"
-  }
-  inputs {
-    input_layer_name: "__slope_intercept_layer_7__"
-  }
-}
-layers {
-  name: "__scaling_layer_1__"
-  type: "scaling"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "data_2"
-  }
-  inputs {
-    input_layer_name: "__scaling_layer_0__"
-  }
-}
-layers {
-  name: "__repeat_layer_0__"
-  type: "featmap_expand"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "data_2"
-  }
-  num_filters: 100
-}
-layers {
-  name: "__mixed_2__"
-  type: "mixed"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__scaling_layer_1__"
-    proj_conf {
-      type: "identity"
-      name: "___mixed_2__.w0"
-      input_size: 100
-      output_size: 100
-    }
-  }
-  inputs {
-    input_layer_name: "__repeat_layer_0__"
-    proj_conf {
-      type: "identity"
-      name: "___mixed_2__.w1"
-      input_size: 100
-      output_size: 100
-    }
-  }
-}
-layers {
-  name: "__repeat_layer_1__"
-  type: "featmap_expand"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "data_2"
-  }
-  num_filters: 100
-}
-layers {
-  name: "__mixed_3__"
-  type: "mixed"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__mixed_2__"
-    proj_conf {
-      type: "identity"
-      name: "___mixed_3__.w0"
-      input_size: 100
-      output_size: 100
-    }
-  }
-  inputs {
-    input_layer_name: "__repeat_layer_1__"
-    proj_conf {
-      type: "identity"
-      name: "___mixed_3__.w1"
-      input_size: 100
-      output_size: 100
-    }
-  }
-}
-input_layer_names: "data_2"
-input_layer_names: "data"
-output_layer_names: "__mixed_3__"
-sub_models {
-  name: "root"
-  layer_names: "data"
-  layer_names: "__exp_0__"
-  layer_names: "__sqrt_0__"
-  layer_names: "__reciprocal_0__"
-  layer_names: "__log_0__"
-  layer_names: "__abs_0__"
-  layer_names: "__sigmoid_0__"
-  layer_names: "__tanh_0__"
-  layer_names: "__square_0__"
-  layer_names: "__relu_0__"
-  layer_names: "__slope_intercept_layer_0__"
-  layer_names: "__slope_intercept_layer_1__"
-  layer_names: "__mixed_0__"
-  layer_names: "__slope_intercept_layer_2__"
-  layer_names: "__mixed_1__"
-  layer_names: "__slope_intercept_layer_3__"
-  layer_names: "__slope_intercept_layer_4__"
-  layer_names: "__slope_intercept_layer_5__"
-  layer_names: "__slope_intercept_layer_6__"
-  layer_names: "__slope_intercept_layer_7__"
-  layer_names: "data_2"
-  layer_names: "__scaling_layer_0__"
-  layer_names: "__scaling_layer_1__"
-  layer_names: "__repeat_layer_0__"
-  layer_names: "__mixed_2__"
-  layer_names: "__repeat_layer_1__"
-  layer_names: "__mixed_3__"
-  input_layer_names: "data_2"
-  input_layer_names: "data"
-  output_layer_names: "__mixed_3__"
-  is_recurrent_layer_group: false
-}
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr
deleted file mode 100644
index d8bd7b9dfb..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr
+++ /dev/null
@@ -1,466 +0,0 @@
-type: "nn"
-layers {
-  name: "test"
-  type: "data"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "__embedding_0__"
-  type: "mixed"
-  size: 256
-  active_type: ""
-  inputs {
-    input_layer_name: "test"
-    input_parameter_name: "___embedding_0__.w0"
-    proj_conf {
-      type: "table"
-      name: "___embedding_0__.w0"
-      input_size: 100
-      output_size: 256
-    }
-  }
-}
-layers {
-  name: "__mixed_0__"
-  type: "mixed"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__embedding_0__"
-    input_parameter_name: "___mixed_0__.w0"
-    proj_conf {
-      type: "fc"
-      name: "___mixed_0__.w0"
-      input_size: 256
-      output_size: 100
-    }
-  }
-}
-layers {
-  name: "__mixed_1__"
-  type: "mixed"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__mixed_0__"
-    input_parameter_name: "___mixed_1__.w0"
-    proj_conf {
-      type: "table"
-      name: "___mixed_1__.w0"
-      input_size: 100
-      output_size: 100
-    }
-  }
-}
-layers {
-  name: "__mixed_2__"
-  type: "mixed"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__mixed_1__"
-    proj_conf {
-      type: "identity"
-      name: "___mixed_2__.w0"
-      input_size: 100
-      output_size: 100
-    }
-  }
-}
-layers {
-  name: "__mixed_3__"
-  type: "mixed"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__mixed_2__"
-    input_parameter_name: "___mixed_3__.w0"
-    proj_conf {
-      type: "dot_mul"
-      name: "___mixed_3__.w0"
-      input_size: 100
-      output_size: 100
-    }
-  }
-}
-layers {
-  name: "__mixed_4__"
-  type: "mixed"
-  size: 300
-  active_type: ""
-  inputs {
-    input_layer_name: "__mixed_3__"
-    input_parameter_name: "___mixed_4__.w0"
-    proj_conf {
-      type: "context"
-      name: "___mixed_4__.w0"
-      input_size: 100
-      output_size: 300
-      context_start: -1
-      context_length: 3
-      trainable_padding: true
-    }
-  }
-}
-layers {
-  name: "__mixed_5__"
-  type: "mixed"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__mixed_2__"
-  }
-  inputs {
-    input_layer_name: "__mixed_2__"
-    input_parameter_name: "___mixed_5__.w1"
-    proj_conf {
-      type: "scaling"
-      name: "___mixed_5__.w1"
-      input_size: 100
-      output_size: 100
-    }
-  }
-  inputs {
-    input_layer_name: "__mixed_3__"
-  }
-  operator_confs {
-    type: "dot_mul"
-    input_indices: 0
-    input_indices: 2
-    input_sizes: 100
-    input_sizes: 100
-    output_size: 100
-    dotmul_scale: 1
-  }
-}
-layers {
-  name: "img"
-  type: "data"
-  size: 1024
-  active_type: ""
-}
-layers {
-  name: "filter"
-  type: "data"
-  size: 576
-  active_type: ""
-}
-layers {
-  name: "__mixed_6__"
-  type: "mixed"
-  size: 57600
-  active_type: ""
-  inputs {
-    input_layer_name: "img"
-  }
-  inputs {
-    input_layer_name: "img"
-    input_parameter_name: "___mixed_6__.w1"
-    proj_conf {
-      type: "conv"
-      name: "___mixed_6__.w1"
-      input_size: 1024
-      output_size: 57600
-      conv_conf {
-        filter_size: 3
-        channels: 1
-        stride: 1
-        padding: 0
-        groups: 1
-        filter_channels: 1
-        output_x: 30
-        img_size: 32
-        caffe_mode: true
-        filter_size_y: 3
-        padding_y: 0
-        stride_y: 1
-        output_y: 30
-        img_size_y: 32
-      }
-      num_filters: 64
-    }
-  }
-  inputs {
-    input_layer_name: "filter"
-  }
-  operator_confs {
-    type: "conv"
-    input_indices: 0
-    input_indices: 2
-    input_sizes: 1024
-    input_sizes: 576
-    output_size: 57600
-    conv_conf {
-      filter_size: 3
-      channels: 1
-      stride: 1
-      padding: 0
-      groups: 1
-      filter_channels: 1
-      output_x: 30
-      img_size: 32
-      caffe_mode: true
-      filter_size_y: 3
-      padding_y: 0
-      stride_y: 1
-      output_y: 30
-      img_size_y: 32
-    }
-    num_filters: 64
-  }
-}
-layers {
-  name: "__mixed_7__"
-  type: "mixed"
-  size: 254016
-  active_type: ""
-  inputs {
-    input_layer_name: "img"
-  }
-  inputs {
-    input_layer_name: "img"
-    input_parameter_name: "___mixed_7__.w1"
-    proj_conf {
-      type: "convt"
-      name: "___mixed_7__.w1"
-      input_size: 1024
-      output_size: 254016
-      conv_conf {
-        filter_size: 3
-        channels: 1
-        stride: 2
-        padding: 1
-        groups: 1
-        filter_channels: 64
-        output_x: 32
-        img_size: 63
-        caffe_mode: true
-        filter_size_y: 3
-        padding_y: 1
-        stride_y: 2
-        output_y: 32
-        img_size_y: 63
-      }
-      num_filters: 64
-    }
-  }
-  inputs {
-    input_layer_name: "filter"
-  }
-  operator_confs {
-    type: "convt"
-    input_indices: 0
-    input_indices: 2
-    input_sizes: 1024
-    input_sizes: 576
-    output_size: 254016
-    conv_conf {
-      filter_size: 3
-      channels: 1
-      stride: 2
-      padding: 1
-      groups: 1
-      filter_channels: 64
-      output_x: 32
-      img_size: 63
-      caffe_mode: true
-      filter_size_y: 3
-      padding_y: 1
-      stride_y: 2
-      output_y: 32
-      img_size_y: 63
-    }
-    num_filters: 64
-  }
-}
-layers {
-  name: "__mixed_8__"
-  type: "mixed"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__mixed_4__"
-    input_parameter_name: "___mixed_8__.w0"
-    proj_conf {
-      type: "fc"
-      name: "___mixed_8__.w0"
-      input_size: 300
-      output_size: 100
-    }
-  }
-  inputs {
-    input_layer_name: "__mixed_5__"
-    input_parameter_name: "___mixed_8__.w1"
-    proj_conf {
-      type: "trans_fc"
-      name: "___mixed_8__.w1"
-      input_size: 100
-      output_size: 100
-    }
-  }
-  inputs {
-    input_layer_name: "__mixed_6__"
-    input_parameter_name: "___mixed_8__.w2"
-    proj_conf {
-      type: "fc"
-      name: "___mixed_8__.w2"
-      input_size: 57600
-      output_size: 100
-    }
-  }
-  inputs {
-    input_layer_name: "__mixed_7__"
-    input_parameter_name: "___mixed_8__.w3"
-    proj_conf {
-      type: "fc"
-      name: "___mixed_8__.w3"
-      input_size: 254016
-      output_size: 100
-    }
-  }
-  drop_rate: 0.5
-  error_clipping_threshold: 40.0
-}
-parameters {
-  name: "___embedding_0__.w0"
-  size: 25600
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 256
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___mixed_0__.w0"
-  size: 25600
-  initial_mean: 0.0
-  initial_std: 0.0625
-  dims: 256
-  dims: 100
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___mixed_1__.w0"
-  size: 10000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 100
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___mixed_3__.w0"
-  size: 100
-  initial_mean: 0.0
-  initial_std: 1.0
-  dims: 1
-  dims: 100
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___mixed_4__.w0"
-  size: 200
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 2
-  dims: 100
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___mixed_5__.w1"
-  size: 1
-  initial_mean: 0.0
-  initial_std: 1.0
-  dims: 1
-  dims: 1
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___mixed_6__.w1"
-  size: 576
-  initial_mean: 0.0
-  initial_std: 0.471404520791
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___mixed_7__.w1"
-  size: 576
-  initial_mean: 0.0
-  initial_std: 0.471404520791
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___mixed_8__.w0"
-  size: 30000
-  initial_mean: 0.0
-  initial_std: 0.057735026919
-  dims: 300
-  dims: 100
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___mixed_8__.w1"
-  size: 10000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 100
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___mixed_8__.w2"
-  size: 5760000
-  initial_mean: 0.0
-  initial_std: 0.00416666666667
-  dims: 57600
-  dims: 100
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___mixed_8__.w3"
-  size: 25401600
-  initial_mean: 0.0
-  initial_std: 0.00198412698413
-  dims: 254016
-  dims: 100
-  initial_strategy: 0
-  initial_smart: true
-}
-input_layer_names: "test"
-input_layer_names: "img"
-input_layer_names: "filter"
-output_layer_names: "__mixed_8__"
-sub_models {
-  name: "root"
-  layer_names: "test"
-  layer_names: "__embedding_0__"
-  layer_names: "__mixed_0__"
-  layer_names: "__mixed_1__"
-  layer_names: "__mixed_2__"
-  layer_names: "__mixed_3__"
-  layer_names: "__mixed_4__"
-  layer_names: "__mixed_5__"
-  layer_names: "img"
-  layer_names: "filter"
-  layer_names: "__mixed_6__"
-  layer_names: "__mixed_7__"
-  layer_names: "__mixed_8__"
-  input_layer_names: "test"
-  input_layer_names: "img"
-  input_layer_names: "filter"
-  output_layer_names: "__mixed_8__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_fc.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_fc.protostr
deleted file mode 100644
index 3e8633b079..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_fc.protostr
+++ /dev/null
@@ -1,125 +0,0 @@
-type: "nn"
-layers {
-  name: "feature_a"
-  type: "data"
-  size: 200
-  active_type: ""
-}
-layers {
-  name: "feature_b"
-  type: "data"
-  size: 200
-  active_type: ""
-}
-layers {
-  name: "__fc_layer_0__"
-  type: "fc"
-  size: 200
-  active_type: "tanh"
-  inputs {
-    input_layer_name: "feature_a"
-    input_parameter_name: "fc_param"
-  }
-  bias_parameter_name: "bias_param"
-}
-layers {
-  name: "__fc_layer_1__"
-  type: "fc"
-  size: 200
-  active_type: "tanh"
-  inputs {
-    input_layer_name: "feature_b"
-    input_parameter_name: "fc_param"
-  }
-  bias_parameter_name: "bias_param"
-}
-layers {
-  name: "__fc_layer_2__"
-  type: "fc"
-  size: 10
-  active_type: "softmax"
-  inputs {
-    input_layer_name: "__fc_layer_0__"
-    input_parameter_name: "softmax_param"
-  }
-  inputs {
-    input_layer_name: "__fc_layer_1__"
-    input_parameter_name: "softmax_param"
-  }
-}
-layers {
-  name: "label"
-  type: "data"
-  size: 10
-  active_type: ""
-}
-layers {
-  name: "__cost_0__"
-  type: "multi-class-cross-entropy"
-  size: 1
-  active_type: ""
-  inputs {
-    input_layer_name: "__fc_layer_2__"
-  }
-  inputs {
-    input_layer_name: "label"
-  }
-  coeff: 1.0
-}
-parameters {
-  name: "fc_param"
-  size: 40000
-  initial_mean: 0.0
-  initial_std: 1.0
-  dims: 200
-  dims: 200
-  initial_strategy: 1
-  initial_smart: false
-}
-parameters {
-  name: "bias_param"
-  size: 200
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 200
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "softmax_param"
-  size: 2000
-  initial_mean: 0.0
-  initial_std: 1.0
-  dims: 200
-  dims: 10
-  initial_strategy: 1
-  initial_smart: false
-}
-input_layer_names: "feature_a"
-input_layer_names: "feature_b"
-input_layer_names: "label"
-output_layer_names: "__cost_0__"
-evaluators {
-  name: "classification_error_evaluator"
-  type: "classification_error"
-  input_layers: "__fc_layer_2__"
-  input_layers: "label"
-}
-sub_models {
-  name: "root"
-  layer_names: "feature_a"
-  layer_names: "feature_b"
-  layer_names: "__fc_layer_0__"
-  layer_names: "__fc_layer_1__"
-  layer_names: "__fc_layer_2__"
-  layer_names: "label"
-  layer_names: "__cost_0__"
-  input_layer_names: "feature_a"
-  input_layer_names: "feature_b"
-  input_layer_names: "label"
-  output_layer_names: "__cost_0__"
-  evaluator_names: "classification_error_evaluator"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr
deleted file mode 100644
index 7254deb368..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr
+++ /dev/null
@@ -1,289 +0,0 @@
-type: "recurrent_nn"
-layers {
-  name: "data_a"
-  type: "data"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "data_b"
-  type: "data"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "__simple_gru_0___transform"
-  type: "mixed"
-  size: 600
-  active_type: ""
-  inputs {
-    input_layer_name: "data_a"
-    input_parameter_name: "mixed_param"
-    proj_conf {
-      type: "fc"
-      name: "___simple_gru_0___transform.w0"
-      input_size: 100
-      output_size: 600
-    }
-  }
-}
-layers {
-  name: "__simple_gru_0___recurrent_group"
-  type: "recurrent_layer_group"
-  active_type: ""
-}
-layers {
-  name: "__simple_gru_0___transform@__simple_gru_0___recurrent_group"
-  type: "scatter_agent"
-  size: 600
-  active_type: ""
-}
-layers {
-  name: "__simple_gru_0__+delay1@__simple_gru_0___recurrent_group"
-  type: "agent"
-  size: 200
-  active_type: ""
-}
-layers {
-  name: "__simple_gru_0__@__simple_gru_0___recurrent_group"
-  type: "gru_step"
-  size: 200
-  active_type: "tanh"
-  inputs {
-    input_layer_name: "__simple_gru_0___transform@__simple_gru_0___recurrent_group"
-    input_parameter_name: "gru_param"
-  }
-  inputs {
-    input_layer_name: "__simple_gru_0__+delay1@__simple_gru_0___recurrent_group"
-  }
-  bias_parameter_name: "gru_bias"
-  active_gate_type: "sigmoid"
-}
-layers {
-  name: "__simple_gru_0__"
-  type: "gather_agent"
-  size: 200
-  active_type: ""
-}
-layers {
-  name: "__simple_gru_1___transform"
-  type: "mixed"
-  size: 600
-  active_type: ""
-  inputs {
-    input_layer_name: "data_b"
-    input_parameter_name: "mixed_param"
-    proj_conf {
-      type: "fc"
-      name: "___simple_gru_1___transform.w0"
-      input_size: 100
-      output_size: 600
-    }
-  }
-}
-layers {
-  name: "__simple_gru_1___recurrent_group"
-  type: "recurrent_layer_group"
-  active_type: ""
-}
-layers {
-  name: "__simple_gru_1___transform@__simple_gru_1___recurrent_group"
-  type: "scatter_agent"
-  size: 600
-  active_type: ""
-}
-layers {
-  name: "__simple_gru_1__+delay1@__simple_gru_1___recurrent_group"
-  type: "agent"
-  size: 200
-  active_type: ""
-}
-layers {
-  name: "__simple_gru_1__@__simple_gru_1___recurrent_group"
-  type: "gru_step"
-  size: 200
-  active_type: "tanh"
-  inputs {
-    input_layer_name: "__simple_gru_1___transform@__simple_gru_1___recurrent_group"
-    input_parameter_name: "gru_param"
-  }
-  inputs {
-    input_layer_name: "__simple_gru_1__+delay1@__simple_gru_1___recurrent_group"
-  }
-  bias_parameter_name: "gru_bias"
-  active_gate_type: "sigmoid"
-}
-layers {
-  name: "__simple_gru_1__"
-  type: "gather_agent"
-  size: 200
-  active_type: ""
-}
-layers {
-  name: "__last_seq_0__"
-  type: "seqlastins"
-  size: 200
-  active_type: ""
-  inputs {
-    input_layer_name: "__simple_gru_0__"
-  }
-  trans_type: "non-seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__last_seq_1__"
-  type: "seqlastins"
-  size: 200
-  active_type: ""
-  inputs {
-    input_layer_name: "__simple_gru_1__"
-  }
-  trans_type: "non-seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__fc_layer_0__"
-  type: "fc"
-  size: 10
-  active_type: "softmax"
-  inputs {
-    input_layer_name: "__last_seq_0__"
-    input_parameter_name: "softmax_param"
-  }
-  inputs {
-    input_layer_name: "__last_seq_1__"
-    input_parameter_name: "softmax_param"
-  }
-}
-layers {
-  name: "label"
-  type: "data"
-  size: 10
-  active_type: ""
-}
-layers {
-  name: "__cost_0__"
-  type: "multi-class-cross-entropy"
-  size: 1
-  active_type: ""
-  inputs {
-    input_layer_name: "__fc_layer_0__"
-  }
-  inputs {
-    input_layer_name: "label"
-  }
-  coeff: 1.0
-}
-parameters {
-  name: "mixed_param"
-  size: 60000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 600
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "gru_param"
-  size: 120000
-  initial_mean: 0.0
-  initial_std: 0.0707106781187
-  dims: 200
-  dims: 600
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "gru_bias"
-  size: 600
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 600
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "softmax_param"
-  size: 2000
-  initial_mean: 0.0
-  initial_std: 0.0707106781187
-  dims: 200
-  dims: 10
-  initial_strategy: 0
-  initial_smart: true
-}
-input_layer_names: "data_a"
-input_layer_names: "data_b"
-input_layer_names: "label"
-output_layer_names: "__cost_0__"
-evaluators {
-  name: "classification_error_evaluator"
-  type: "classification_error"
-  input_layers: "__fc_layer_0__"
-  input_layers: "label"
-}
-sub_models {
-  name: "root"
-  layer_names: "data_a"
-  layer_names: "data_b"
-  layer_names: "__simple_gru_0___transform"
-  layer_names: "__simple_gru_0___recurrent_group"
-  layer_names: "__simple_gru_0__"
-  layer_names: "__simple_gru_1___transform"
-  layer_names: "__simple_gru_1___recurrent_group"
-  layer_names: "__simple_gru_1__"
-  layer_names: "__last_seq_0__"
-  layer_names: "__last_seq_1__"
-  layer_names: "__fc_layer_0__"
-  layer_names: "label"
-  layer_names: "__cost_0__"
-  input_layer_names: "data_a"
-  input_layer_names: "data_b"
-  input_layer_names: "label"
-  output_layer_names: "__cost_0__"
-  evaluator_names: "classification_error_evaluator"
-  is_recurrent_layer_group: false
-}
-sub_models {
-  name: "__simple_gru_0___recurrent_group"
-  layer_names: "__simple_gru_0___transform@__simple_gru_0___recurrent_group"
-  layer_names: "__simple_gru_0__+delay1@__simple_gru_0___recurrent_group"
-  layer_names: "__simple_gru_0__@__simple_gru_0___recurrent_group"
-  is_recurrent_layer_group: true
-  reversed: false
-  memories {
-    layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group"
-    link_name: "__simple_gru_0__+delay1@__simple_gru_0___recurrent_group"
-  }
-  in_links {
-    layer_name: "__simple_gru_0___transform"
-    link_name: "__simple_gru_0___transform@__simple_gru_0___recurrent_group"
-  }
-  out_links {
-    layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group"
-    link_name: "__simple_gru_0__"
-  }
-}
-sub_models {
-  name: "__simple_gru_1___recurrent_group"
-  layer_names: "__simple_gru_1___transform@__simple_gru_1___recurrent_group"
-  layer_names: "__simple_gru_1__+delay1@__simple_gru_1___recurrent_group"
-  layer_names: "__simple_gru_1__@__simple_gru_1___recurrent_group"
-  is_recurrent_layer_group: true
-  reversed: false
-  memories {
-    layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group"
-    link_name: "__simple_gru_1__+delay1@__simple_gru_1___recurrent_group"
-  }
-  in_links {
-    layer_name: "__simple_gru_1___transform"
-    link_name: "__simple_gru_1___transform@__simple_gru_1___recurrent_group"
-  }
-  out_links {
-    layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group"
-    link_name: "__simple_gru_1__"
-  }
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
deleted file mode 100644
index 75cf231203..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
+++ /dev/null
@@ -1,385 +0,0 @@
-type: "recurrent_nn"
-layers {
-  name: "data_a"
-  type: "data"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "data_b"
-  type: "data"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "__mixed_0__"
-  type: "mixed"
-  size: 400
-  active_type: ""
-  inputs {
-    input_layer_name: "data_a"
-    input_parameter_name: "mixed_param"
-    proj_conf {
-      type: "fc"
-      name: "___mixed_0__.w0"
-      input_size: 100
-      output_size: 400
-    }
-  }
-}
-layers {
-  name: "__mixed_1__"
-  type: "mixed"
-  size: 400
-  active_type: ""
-  inputs {
-    input_layer_name: "data_b"
-    input_parameter_name: "mixed_param"
-    proj_conf {
-      type: "fc"
-      name: "___mixed_1__.w0"
-      input_size: 100
-      output_size: 400
-    }
-  }
-}
-layers {
-  name: "__lstm_group_0___recurrent_group"
-  type: "recurrent_layer_group"
-  active_type: ""
-}
-layers {
-  name: "__mixed_0__@__lstm_group_0___recurrent_group"
-  type: "scatter_agent"
-  size: 400
-  active_type: ""
-}
-layers {
-  name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
-  type: "agent"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
-  type: "agent"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "__lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group"
-  type: "mixed"
-  size: 400
-  active_type: ""
-  inputs {
-    input_layer_name: "__mixed_0__@__lstm_group_0___recurrent_group"
-    proj_conf {
-      type: "identity"
-      name: "___lstm_group_0___input_recurrent.w0"
-      input_size: 400
-      output_size: 400
-    }
-  }
-  inputs {
-    input_layer_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
-    input_parameter_name: "lstm_param"
-    proj_conf {
-      type: "fc"
-      name: "___lstm_group_0___input_recurrent.w1"
-      input_size: 100
-      output_size: 400
-    }
-  }
-}
-layers {
-  name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
-  type: "lstm_step"
-  size: 100
-  active_type: "tanh"
-  inputs {
-    input_layer_name: "__lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group"
-  }
-  inputs {
-    input_layer_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
-  }
-  bias_parameter_name: "lstm_bias"
-  active_gate_type: "sigmoid"
-  active_state_type: "tanh"
-}
-layers {
-  name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
-  type: "get_output"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
-    input_layer_argument: "state"
-  }
-}
-layers {
-  name: "__lstm_group_0__"
-  type: "gather_agent"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "__lstm_group_1___recurrent_group"
-  type: "recurrent_layer_group"
-  active_type: ""
-}
-layers {
-  name: "__mixed_1__@__lstm_group_1___recurrent_group"
-  type: "scatter_agent"
-  size: 400
-  active_type: ""
-}
-layers {
-  name: "__lstm_group_1__+delay1@__lstm_group_1___recurrent_group"
-  type: "agent"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "__lstm_group_1___state+delay1@__lstm_group_1___recurrent_group"
-  type: "agent"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "__lstm_group_1___input_recurrent@__lstm_group_1___recurrent_group"
-  type: "mixed"
-  size: 400
-  active_type: ""
-  inputs {
-    input_layer_name: "__mixed_1__@__lstm_group_1___recurrent_group"
-    proj_conf {
-      type: "identity"
-      name: "___lstm_group_1___input_recurrent.w0"
-      input_size: 400
-      output_size: 400
-    }
-  }
-  inputs {
-    input_layer_name: "__lstm_group_1__+delay1@__lstm_group_1___recurrent_group"
-    input_parameter_name: "lstm_param"
-    proj_conf {
-      type: "fc"
-      name: "___lstm_group_1___input_recurrent.w1"
-      input_size: 100
-      output_size: 400
-    }
-  }
-}
-layers {
-  name: "__lstm_group_1__@__lstm_group_1___recurrent_group"
-  type: "lstm_step"
-  size: 100
-  active_type: "tanh"
-  inputs {
-    input_layer_name: "__lstm_group_1___input_recurrent@__lstm_group_1___recurrent_group"
-  }
-  inputs {
-    input_layer_name: "__lstm_group_1___state+delay1@__lstm_group_1___recurrent_group"
-  }
-  bias_parameter_name: "lstm_bias"
-  active_gate_type: "sigmoid"
-  active_state_type: "tanh"
-}
-layers {
-  name: "__lstm_group_1___state@__lstm_group_1___recurrent_group"
-  type: "get_output"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group"
-    input_layer_argument: "state"
-  }
-}
-layers {
-  name: "__lstm_group_1__"
-  type: "gather_agent"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "__last_seq_0__"
-  type: "seqlastins"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__lstm_group_0__"
-  }
-  trans_type: "non-seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__last_seq_1__"
-  type: "seqlastins"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__lstm_group_1__"
-  }
-  trans_type: "non-seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__fc_layer_0__"
-  type: "fc"
-  size: 10
-  active_type: "softmax"
-  inputs {
-    input_layer_name: "__last_seq_0__"
-    input_parameter_name: "softmax_param"
-  }
-  inputs {
-    input_layer_name: "__last_seq_1__"
-    input_parameter_name: "softmax_param"
-  }
-}
-layers {
-  name: "label"
-  type: "data"
-  size: 10
-  active_type: ""
-}
-layers {
-  name: "__cost_0__"
-  type: "multi-class-cross-entropy"
-  size: 1
-  active_type: ""
-  inputs {
-    input_layer_name: "__fc_layer_0__"
-  }
-  inputs {
-    input_layer_name: "label"
-  }
-  coeff: 1.0
-}
-parameters {
-  name: "mixed_param"
-  size: 40000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 400
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "lstm_param"
-  size: 40000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 400
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "lstm_bias"
-  size: 300
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 300
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "softmax_param"
-  size: 1000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 10
-  initial_strategy: 0
-  initial_smart: true
-}
-input_layer_names: "data_a"
-input_layer_names: "data_b"
-input_layer_names: "label"
-output_layer_names: "__cost_0__"
-evaluators {
-  name: "classification_error_evaluator"
-  type: "classification_error"
-  input_layers: "__fc_layer_0__"
-  input_layers: "label"
-}
-sub_models {
-  name: "root"
-  layer_names: "data_a"
-  layer_names: "data_b"
-  layer_names: "__mixed_0__"
-  layer_names: "__mixed_1__"
-  layer_names: "__lstm_group_0___recurrent_group"
-  layer_names: "__lstm_group_0__"
-  layer_names: "__lstm_group_1___recurrent_group"
-  layer_names: "__lstm_group_1__"
-  layer_names: "__last_seq_0__"
-  layer_names: "__last_seq_1__"
-  layer_names: "__fc_layer_0__"
-  layer_names: "label"
-  layer_names: "__cost_0__"
-  input_layer_names: "data_a"
-  input_layer_names: "data_b"
-  input_layer_names: "label"
-  output_layer_names: "__cost_0__"
-  evaluator_names: "classification_error_evaluator"
-  is_recurrent_layer_group: false
-}
-sub_models {
-  name: "__lstm_group_0___recurrent_group"
-  layer_names: "__mixed_0__@__lstm_group_0___recurrent_group"
-  layer_names: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
-  layer_names: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
-  layer_names: "__lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group"
-  layer_names: "__lstm_group_0__@__lstm_group_0___recurrent_group"
-  layer_names: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
-  is_recurrent_layer_group: true
-  reversed: false
-  memories {
-    layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
-    link_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
-  }
-  memories {
-    layer_name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
-    link_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
-  }
-  in_links {
-    layer_name: "__mixed_0__"
-    link_name: "__mixed_0__@__lstm_group_0___recurrent_group"
-  }
-  out_links {
-    layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
-    link_name: "__lstm_group_0__"
-  }
-}
-sub_models {
-  name: "__lstm_group_1___recurrent_group"
-  layer_names: "__mixed_1__@__lstm_group_1___recurrent_group"
-  layer_names: "__lstm_group_1__+delay1@__lstm_group_1___recurrent_group"
-  layer_names: "__lstm_group_1___state+delay1@__lstm_group_1___recurrent_group"
-  layer_names: "__lstm_group_1___input_recurrent@__lstm_group_1___recurrent_group"
-  layer_names: "__lstm_group_1__@__lstm_group_1___recurrent_group"
-  layer_names: "__lstm_group_1___state@__lstm_group_1___recurrent_group"
-  is_recurrent_layer_group: true
-  reversed: false
-  memories {
-    layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group"
-    link_name: "__lstm_group_1__+delay1@__lstm_group_1___recurrent_group"
-  }
-  memories {
-    layer_name: "__lstm_group_1___state@__lstm_group_1___recurrent_group"
-    link_name: "__lstm_group_1___state+delay1@__lstm_group_1___recurrent_group"
-  }
-  in_links {
-    layer_name: "__mixed_1__"
-    link_name: "__mixed_1__@__lstm_group_1___recurrent_group"
-  }
-  out_links {
-    layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group"
-    link_name: "__lstm_group_1__"
-  }
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr
deleted file mode 100644
index 0d51f70ee0..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr
+++ /dev/null
@@ -1,424 +0,0 @@
-type: "nn"
-layers {
-  name: "data"
-  type: "data"
-  size: 200
-  active_type: ""
-}
-layers {
-  name: "__fc_layer_0__"
-  type: "fc"
-  size: 200
-  active_type: "sigmoid"
-  inputs {
-    input_layer_name: "data"
-    input_parameter_name: "___fc_layer_0__.w0"
-  }
-  bias_parameter_name: "___fc_layer_0__.wbias"
-}
-layers {
-  name: "__recurrent_layer_0__"
-  type: "recurrent"
-  size: 200
-  active_type: "sigmoid"
-  inputs {
-    input_layer_name: "__fc_layer_0__"
-    input_parameter_name: "___recurrent_layer_0__.w0"
-  }
-  bias_parameter_name: "___recurrent_layer_0__.wbias"
-  reversed: false
-}
-layers {
-  name: "__recurrent_layer_1__"
-  type: "recurrent"
-  size: 200
-  active_type: "sigmoid"
-  inputs {
-    input_layer_name: "__fc_layer_0__"
-    input_parameter_name: "___recurrent_layer_1__.w0"
-  }
-  bias_parameter_name: "___recurrent_layer_1__.wbias"
-  reversed: true
-}
-layers {
-  name: "__fc_layer_1__"
-  type: "fc"
-  size: 800
-  active_type: ""
-  inputs {
-    input_layer_name: "__fc_layer_0__"
-    input_parameter_name: "___fc_layer_1__.w0"
-  }
-}
-layers {
-  name: "__lstmemory_0__"
-  type: "lstmemory"
-  size: 200
-  active_type: "sigmoid"
-  inputs {
-    input_layer_name: "__fc_layer_1__"
-    input_parameter_name: "___lstmemory_0__.w0"
-  }
-  bias_parameter_name: "___lstmemory_0__.wbias"
-  reversed: false
-  active_gate_type: "sigmoid"
-  active_state_type: "tanh"
-}
-layers {
-  name: "__fc_layer_2__"
-  type: "fc"
-  size: 800
-  active_type: ""
-  inputs {
-    input_layer_name: "__fc_layer_0__"
-    input_parameter_name: "___fc_layer_2__.w0"
-  }
-}
-layers {
-  name: "__lstmemory_1__"
-  type: "lstmemory"
-  size: 200
-  active_type: "sigmoid"
-  inputs {
-    input_layer_name: "__fc_layer_2__"
-    input_parameter_name: "___lstmemory_1__.w0"
-  }
-  bias_parameter_name: "___lstmemory_1__.wbias"
-  reversed: true
-  active_gate_type: "sigmoid"
-  active_state_type: "tanh"
-}
-layers {
-  name: "__fc_layer_3__"
-  type: "fc"
-  size: 600
-  active_type: ""
-  inputs {
-    input_layer_name: "__fc_layer_0__"
-    input_parameter_name: "___fc_layer_3__.w0"
-  }
-}
-layers {
-  name: "__gru_0__"
-  type: "gated_recurrent"
-  size: 200
-  active_type: "sigmoid"
-  inputs {
-    input_layer_name: "__fc_layer_3__"
-    input_parameter_name: "___gru_0__.w0"
-  }
-  bias_parameter_name: "___gru_0__.wbias"
-  reversed: false
-  active_gate_type: "sigmoid"
-}
-layers {
-  name: "__fc_layer_4__"
-  type: "fc"
-  size: 600
-  active_type: ""
-  inputs {
-    input_layer_name: "__fc_layer_0__"
-    input_parameter_name: "___fc_layer_4__.w0"
-  }
-}
-layers {
-  name: "__gru_1__"
-  type: "gated_recurrent"
-  size: 200
-  active_type: "sigmoid"
-  inputs {
-    input_layer_name: "__fc_layer_4__"
-    input_parameter_name: "___gru_1__.w0"
-  }
-  bias_parameter_name: "___gru_1__.wbias"
-  reversed: true
-  active_gate_type: "sigmoid"
-}
-layers {
-  name: "__last_seq_0__"
-  type: "seqlastins"
-  size: 200
-  active_type: ""
-  inputs {
-    input_layer_name: "__recurrent_layer_0__"
-  }
-  trans_type: "non-seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__first_seq_0__"
-  type: "seqlastins"
-  size: 200
-  active_type: ""
-  inputs {
-    input_layer_name: "__recurrent_layer_1__"
-  }
-  select_first: true
-  trans_type: "non-seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__last_seq_1__"
-  type: "seqlastins"
-  size: 200
-  active_type: ""
-  inputs {
-    input_layer_name: "__lstmemory_0__"
-  }
-  trans_type: "non-seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__first_seq_1__"
-  type: "seqlastins"
-  size: 200
-  active_type: ""
-  inputs {
-    input_layer_name: "__lstmemory_1__"
-  }
-  select_first: true
-  trans_type: "non-seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__last_seq_2__"
-  type: "seqlastins"
-  size: 200
-  active_type: ""
-  inputs {
-    input_layer_name: "__gru_0__"
-  }
-  trans_type: "non-seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__first_seq_2__"
-  type: "seqlastins"
-  size: 200
-  active_type: ""
-  inputs {
-    input_layer_name: "__gru_1__"
-  }
-  select_first: true
-  trans_type: "non-seq"
-  seq_pool_stride: -1
-}
-parameters {
-  name: "___fc_layer_0__.w0"
-  size: 40000
-  initial_mean: 0.0
-  initial_std: 0.0707106781187
-  dims: 200
-  dims: 200
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___fc_layer_0__.wbias"
-  size: 200
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 200
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___recurrent_layer_0__.w0"
-  size: 40000
-  initial_mean: 0.0
-  initial_std: 0.0707106781187
-  dims: 200
-  dims: 200
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___recurrent_layer_0__.wbias"
-  size: 200
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 200
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___recurrent_layer_1__.w0"
-  size: 40000
-  initial_mean: 0.0
-  initial_std: 0.0707106781187
-  dims: 200
-  dims: 200
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___recurrent_layer_1__.wbias"
-  size: 200
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 200
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___fc_layer_1__.w0"
-  size: 160000
-  initial_mean: 0.0
-  initial_std: 0.0707106781187
-  dims: 200
-  dims: 800
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___lstmemory_0__.w0"
-  size: 160000
-  initial_mean: 0.0
-  initial_std: 0.0707106781187
-  dims: 200
-  dims: 200
-  dims: 4
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___lstmemory_0__.wbias"
-  size: 1400
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 1400
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___fc_layer_2__.w0"
-  size: 160000
-  initial_mean: 0.0
-  initial_std: 0.0707106781187
-  dims: 200
-  dims: 800
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___lstmemory_1__.w0"
-  size: 160000
-  initial_mean: 0.0
-  initial_std: 0.0707106781187
-  dims: 200
-  dims: 200
-  dims: 4
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___lstmemory_1__.wbias"
-  size: 1400
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 1400
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___fc_layer_3__.w0"
-  size: 120000
-  initial_mean: 0.0
-  initial_std: 0.0707106781187
-  dims: 200
-  dims: 600
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___gru_0__.w0"
-  size: 120000
-  initial_mean: 0.0
-  initial_std: 0.0707106781187
-  dims: 200
-  dims: 600
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___gru_0__.wbias"
-  size: 600
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 600
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___fc_layer_4__.w0"
-  size: 120000
-  initial_mean: 0.0
-  initial_std: 0.0707106781187
-  dims: 200
-  dims: 600
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___gru_1__.w0"
-  size: 120000
-  initial_mean: 0.0
-  initial_std: 0.0707106781187
-  dims: 200
-  dims: 600
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___gru_1__.wbias"
-  size: 600
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 600
-  initial_strategy: 0
-  initial_smart: false
-}
-input_layer_names: "data"
-output_layer_names: "__last_seq_0__"
-output_layer_names: "__first_seq_0__"
-output_layer_names: "__last_seq_1__"
-output_layer_names: "__first_seq_1__"
-output_layer_names: "__last_seq_2__"
-output_layer_names: "__first_seq_2__"
-sub_models {
-  name: "root"
-  layer_names: "data"
-  layer_names: "__fc_layer_0__"
-  layer_names: "__recurrent_layer_0__"
-  layer_names: "__recurrent_layer_1__"
-  layer_names: "__fc_layer_1__"
-  layer_names: "__lstmemory_0__"
-  layer_names: "__fc_layer_2__"
-  layer_names: "__lstmemory_1__"
-  layer_names: "__fc_layer_3__"
-  layer_names: "__gru_0__"
-  layer_names: "__fc_layer_4__"
-  layer_names: "__gru_1__"
-  layer_names: "__last_seq_0__"
-  layer_names: "__first_seq_0__"
-  layer_names: "__last_seq_1__"
-  layer_names: "__first_seq_1__"
-  layer_names: "__last_seq_2__"
-  layer_names: "__first_seq_2__"
-  input_layer_names: "data"
-  output_layer_names: "__last_seq_0__"
-  output_layer_names: "__first_seq_0__"
-  output_layer_names: "__last_seq_1__"
-  output_layer_names: "__first_seq_1__"
-  output_layer_names: "__last_seq_2__"
-  output_layer_names: "__first_seq_2__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr
deleted file mode 100644
index 9b69ae4a3b..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr
+++ /dev/null
@@ -1,93 +0,0 @@
-type: "nn"
-layers {
-  name: "data3D"
-  type: "data"
-  size: 360
-  active_type: ""
-  height: 6
-  width: 20
-  depth: 3
-}
-layers {
-  name: "__batch_norm_0__"
-  type: "batch_norm"
-  size: 360
-  active_type: "relu"
-  inputs {
-    input_layer_name: "data3D"
-    input_parameter_name: "___batch_norm_0__.w0"
-    image_conf {
-      channels: 1
-      img_size: 20
-      img_size_y: 6
-      img_size_z: 3
-    }
-  }
-  inputs {
-    input_layer_name: "data3D"
-    input_parameter_name: "___batch_norm_0__.w1"
-  }
-  inputs {
-    input_layer_name: "data3D"
-    input_parameter_name: "___batch_norm_0__.w2"
-  }
-  bias_parameter_name: "___batch_norm_0__.wbias"
-  moving_average_fraction: 0.9
-  height: 6
-  width: 20
-  depth: 3
-  epsilon: 1e-05
-}
-parameters {
-  name: "___batch_norm_0__.w0"
-  size: 1
-  initial_mean: 1.0
-  initial_std: 0.0
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___batch_norm_0__.w1"
-  size: 1
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 1
-  initial_strategy: 0
-  initial_smart: false
-  is_static: true
-  is_shared: true
-}
-parameters {
-  name: "___batch_norm_0__.w2"
-  size: 1
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 1
-  initial_strategy: 0
-  initial_smart: false
-  is_static: true
-  is_shared: true
-}
-parameters {
-  name: "___batch_norm_0__.wbias"
-  size: 1
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 1
-  initial_strategy: 0
-  initial_smart: false
-}
-input_layer_names: "data3D"
-output_layer_names: "__batch_norm_0__"
-sub_models {
-  name: "root"
-  layer_names: "data3D"
-  layer_names: "__batch_norm_0__"
-  input_layer_names: "data3D"
-  output_layer_names: "__batch_norm_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bi_grumemory.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bi_grumemory.protostr
deleted file mode 100644
index 8a1399efad..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bi_grumemory.protostr
+++ /dev/null
@@ -1,155 +0,0 @@
-type: "nn"
-layers {
-  name: "data"
-  type: "data"
-  size: 120
-  active_type: ""
-}
-layers {
-  name: "__bidirectional_gru_0___fw_transform"
-  type: "mixed"
-  size: 120
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-    input_parameter_name: "___bidirectional_gru_0___fw_transform.w0"
-    proj_conf {
-      type: "fc"
-      name: "___bidirectional_gru_0___fw_transform.w0"
-      input_size: 120
-      output_size: 120
-    }
-  }
-}
-layers {
-  name: "__bidirectional_gru_0___fw"
-  type: "gated_recurrent"
-  size: 40
-  active_type: "tanh"
-  inputs {
-    input_layer_name: "__bidirectional_gru_0___fw_transform"
-    input_parameter_name: "___bidirectional_gru_0___fw.w0"
-  }
-  bias_parameter_name: "___bidirectional_gru_0___fw.wbias"
-  reversed: false
-  active_gate_type: "sigmoid"
-}
-layers {
-  name: "__bidirectional_gru_0___bw_transform"
-  type: "mixed"
-  size: 120
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-    input_parameter_name: "___bidirectional_gru_0___bw_transform.w0"
-    proj_conf {
-      type: "fc"
-      name: "___bidirectional_gru_0___bw_transform.w0"
-      input_size: 120
-      output_size: 120
-    }
-  }
-}
-layers {
-  name: "__bidirectional_gru_0___bw"
-  type: "gated_recurrent"
-  size: 40
-  active_type: "tanh"
-  inputs {
-    input_layer_name: "__bidirectional_gru_0___bw_transform"
-    input_parameter_name: "___bidirectional_gru_0___bw.w0"
-  }
-  bias_parameter_name: "___bidirectional_gru_0___bw.wbias"
-  reversed: true
-  active_gate_type: "sigmoid"
-}
-layers {
-  name: "__bidirectional_gru_0__"
-  type: "concat"
-  size: 80
-  active_type: ""
-  inputs {
-    input_layer_name: "__bidirectional_gru_0___fw"
-  }
-  inputs {
-    input_layer_name: "__bidirectional_gru_0___bw"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-parameters {
-  name: "___bidirectional_gru_0___fw_transform.w0"
-  size: 14400
-  initial_mean: 0.0
-  initial_std: 0.0912870929175
-  dims: 120
-  dims: 120
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___bidirectional_gru_0___fw.w0"
-  size: 4800
-  initial_mean: 0.0
-  initial_std: 0.158113883008
-  dims: 40
-  dims: 120
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___bidirectional_gru_0___fw.wbias"
-  size: 120
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 120
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___bidirectional_gru_0___bw_transform.w0"
-  size: 14400
-  initial_mean: 0.0
-  initial_std: 0.0912870929175
-  dims: 120
-  dims: 120
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___bidirectional_gru_0___bw.w0"
-  size: 4800
-  initial_mean: 0.0
-  initial_std: 0.158113883008
-  dims: 40
-  dims: 120
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___bidirectional_gru_0___bw.wbias"
-  size: 120
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 120
-  initial_strategy: 0
-  initial_smart: false
-}
-input_layer_names: "data"
-output_layer_names: "__bidirectional_gru_0__"
-sub_models {
-  name: "root"
-  layer_names: "data"
-  layer_names: "__bidirectional_gru_0___fw_transform"
-  layer_names: "__bidirectional_gru_0___fw"
-  layer_names: "__bidirectional_gru_0___bw_transform"
-  layer_names: "__bidirectional_gru_0___bw"
-  layer_names: "__bidirectional_gru_0__"
-  input_layer_names: "data"
-  output_layer_names: "__bidirectional_gru_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bilinear_interp.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bilinear_interp.protostr
deleted file mode 100644
index 25ec632375..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bilinear_interp.protostr
+++ /dev/null
@@ -1,137 +0,0 @@
-type: "nn"
-layers {
-  name: "data"
-  type: "data"
-  size: 2304
-  active_type: ""
-}
-layers {
-  name: "__conv_0__"
-  type: "exconv"
-  size: 36864
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-    input_parameter_name: "___conv_0__.w0"
-    conv_conf {
-      filter_size: 3
-      channels: 1
-      stride: 1
-      padding: 1
-      groups: 1
-      filter_channels: 1
-      output_x: 48
-      img_size: 48
-      caffe_mode: true
-      filter_size_y: 3
-      padding_y: 1
-      stride_y: 1
-      output_y: 48
-      img_size_y: 48
-      dilation: 1
-      dilation_y: 1
-    }
-  }
-  bias_parameter_name: "___conv_0__.wbias"
-  num_filters: 16
-  shared_biases: true
-  height: 48
-  width: 48
-}
-layers {
-  name: "__bilinear_interp_layer_0__"
-  type: "bilinear_interp"
-  size: 65536
-  active_type: ""
-  inputs {
-    input_layer_name: "__conv_0__"
-    bilinear_interp_conf {
-      image_conf {
-        channels: 16
-        img_size: 48
-        img_size_y: 48
-      }
-      out_size_x: 64
-      out_size_y: 64
-    }
-  }
-  height: 64
-  width: 64
-}
-layers {
-  name: "__pool_0__"
-  type: "pool"
-  size: 16384
-  active_type: ""
-  inputs {
-    input_layer_name: "__bilinear_interp_layer_0__"
-    pool_conf {
-      pool_type: "max-projection"
-      channels: 16
-      size_x: 2
-      stride: 2
-      output_x: 32
-      img_size: 64
-      padding: 0
-      size_y: 2
-      stride_y: 2
-      output_y: 32
-      img_size_y: 64
-      padding_y: 0
-    }
-  }
-  height: 32
-  width: 32
-}
-layers {
-  name: "__fc_layer_0__"
-  type: "fc"
-  size: 384
-  active_type: "tanh"
-  inputs {
-    input_layer_name: "__pool_0__"
-    input_parameter_name: "___fc_layer_0__.w0"
-  }
-}
-parameters {
-  name: "___conv_0__.w0"
-  size: 144
-  initial_mean: 0.0
-  initial_std: 0.471404520791
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___conv_0__.wbias"
-  size: 16
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 16
-  dims: 1
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___fc_layer_0__.w0"
-  size: 6291456
-  initial_mean: 0.0
-  initial_std: 0.0078125
-  dims: 16384
-  dims: 384
-  initial_strategy: 0
-  initial_smart: true
-}
-input_layer_names: "data"
-output_layer_names: "__fc_layer_0__"
-sub_models {
-  name: "root"
-  layer_names: "data"
-  layer_names: "__conv_0__"
-  layer_names: "__bilinear_interp_layer_0__"
-  layer_names: "__pool_0__"
-  layer_names: "__fc_layer_0__"
-  input_layer_names: "data"
-  output_layer_names: "__fc_layer_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_clip_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_clip_layer.protostr
deleted file mode 100644
index 4b9578a0c0..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_clip_layer.protostr
+++ /dev/null
@@ -1,31 +0,0 @@
-type: "nn"
-layers {
-  name: "input"
-  type: "data"
-  size: 300
-  active_type: ""
-}
-layers {
-  name: "__clip_0__"
-  type: "clip"
-  size: 300
-  active_type: ""
-  inputs {
-    input_layer_name: "input"
-    clip_conf {
-      min: -10
-      max: 10
-    }
-  }
-}
-input_layer_names: "input"
-output_layer_names: "__clip_0__"
-sub_models {
-  name: "root"
-  layer_names: "input"
-  layer_names: "__clip_0__"
-  input_layer_names: "input"
-  output_layer_names: "__clip_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_conv3d_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_conv3d_layer.protostr
deleted file mode 100644
index 9fe2bc29d3..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_conv3d_layer.protostr
+++ /dev/null
@@ -1,132 +0,0 @@
-type: "nn"
-layers {
-  name: "data"
-  type: "data"
-  size: 36288
-  active_type: ""
-  height: 48
-  width: 42
-  depth: 6
-}
-layers {
-  name: "conv3d_1"
-  type: "conv3d"
-  size: 24192
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-    input_parameter_name: "_conv3d_1.w0"
-    conv_conf {
-      filter_size: 3
-      channels: 3
-      stride: 2
-      padding: 1
-      groups: 1
-      filter_channels: 3
-      output_x: 21
-      img_size: 42
-      caffe_mode: true
-      filter_size_y: 3
-      padding_y: 1
-      stride_y: 2
-      output_y: 24
-      img_size_y: 48
-      filter_size_z: 3
-      padding_z: 1
-      stride_z: 2
-      output_z: 3
-      img_size_z: 6
-    }
-  }
-  bias_parameter_name: "_conv3d_1.wbias"
-  num_filters: 16
-  shared_biases: true
-  height: 24
-  width: 21
-  depth: 3
-}
-layers {
-  name: "conv3d_2"
-  type: "conv3d"
-  size: 24192
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-    input_parameter_name: "_conv3d_2.w0"
-    conv_conf {
-      filter_size: 3
-      channels: 3
-      stride: 2
-      padding: 1
-      groups: 1
-      filter_channels: 3
-      output_x: 21
-      img_size: 42
-      caffe_mode: true
-      filter_size_y: 3
-      padding_y: 1
-      stride_y: 2
-      output_y: 24
-      img_size_y: 48
-      filter_size_z: 3
-      padding_z: 1
-      stride_z: 2
-      output_z: 3
-      img_size_z: 6
-    }
-  }
-  bias_parameter_name: "_conv3d_2.wbias"
-  num_filters: 16
-  shared_biases: true
-  height: 24
-  width: 21
-  depth: 3
-}
-parameters {
-  name: "_conv3d_1.w0"
-  size: 1296
-  initial_mean: 0.0
-  initial_std: 0.272165526976
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "_conv3d_1.wbias"
-  size: 16
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 16
-  dims: 1
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "_conv3d_2.w0"
-  size: 1296
-  initial_mean: 0.0
-  initial_std: 0.272165526976
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "_conv3d_2.wbias"
-  size: 16
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 16
-  dims: 1
-  initial_strategy: 0
-  initial_smart: false
-}
-input_layer_names: "data"
-output_layer_names: "conv3d_2"
-sub_models {
-  name: "root"
-  layer_names: "data"
-  layer_names: "conv3d_1"
-  layer_names: "conv3d_2"
-  input_layer_names: "data"
-  output_layer_names: "conv3d_2"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr
deleted file mode 100644
index 55ab464ddf..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr
+++ /dev/null
@@ -1,375 +0,0 @@
-type: "nn"
-layers {
-  name: "input"
-  type: "data"
-  size: 200
-  active_type: ""
-}
-layers {
-  name: "labels"
-  type: "data"
-  size: 5000
-  active_type: ""
-}
-layers {
-  name: "probs"
-  type: "data"
-  size: 10
-  active_type: ""
-}
-layers {
-  name: "xe-label"
-  type: "data"
-  size: 10
-  active_type: ""
-}
-layers {
-  name: "__fc_layer_0__"
-  type: "fc"
-  size: 4
-  active_type: "tanh"
-  inputs {
-    input_layer_name: "input"
-    input_parameter_name: "___fc_layer_0__.w0"
-  }
-  bias_parameter_name: "___fc_layer_0__.wbias"
-}
-layers {
-  name: "__ctc_layer_0__"
-  type: "ctc"
-  size: 5001
-  active_type: ""
-  inputs {
-    input_layer_name: "input"
-  }
-  inputs {
-    input_layer_name: "labels"
-  }
-  norm_by_times: false
-}
-layers {
-  name: "__warp_ctc_layer_0__"
-  type: "warp_ctc"
-  size: 5001
-  active_type: ""
-  inputs {
-    input_layer_name: "input"
-  }
-  inputs {
-    input_layer_name: "labels"
-  }
-  norm_by_times: false
-  blank: 0
-}
-layers {
-  name: "crf_label"
-  type: "data"
-  size: 4
-  active_type: ""
-}
-layers {
-  name: "__crf_layer_0__"
-  type: "crf"
-  size: 4
-  active_type: ""
-  inputs {
-    input_layer_name: "__fc_layer_0__"
-    input_parameter_name: "___crf_layer_0__.w0"
-  }
-  inputs {
-    input_layer_name: "crf_label"
-  }
-  coeff: 1.0
-}
-layers {
-  name: "left"
-  type: "data"
-  size: 1
-  active_type: ""
-}
-layers {
-  name: "right"
-  type: "data"
-  size: 1
-  active_type: ""
-}
-layers {
-  name: "label"
-  type: "data"
-  size: 1
-  active_type: ""
-}
-layers {
-  name: "__rank_cost_0__"
-  type: "rank-cost"
-  size: 1
-  active_type: ""
-  inputs {
-    input_layer_name: "left"
-  }
-  inputs {
-    input_layer_name: "right"
-  }
-  inputs {
-    input_layer_name: "label"
-  }
-  coeff: 1.0
-}
-layers {
-  name: "list_feature"
-  type: "data"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "list_scores"
-  type: "data"
-  size: 1
-  active_type: ""
-}
-layers {
-  name: "__lambda_cost_0__"
-  type: "lambda_cost"
-  size: 1
-  active_type: ""
-  inputs {
-    input_layer_name: "list_feature"
-  }
-  inputs {
-    input_layer_name: "list_scores"
-  }
-  NDCG_num: 5
-  max_sort_size: -1
-}
-layers {
-  name: "__cross_entropy_0__"
-  type: "multi-class-cross-entropy"
-  size: 1
-  active_type: ""
-  inputs {
-    input_layer_name: "probs"
-  }
-  inputs {
-    input_layer_name: "xe-label"
-  }
-  coeff: 1.0
-}
-layers {
-  name: "__cross_entropy_with_selfnorm_0__"
-  type: "multi_class_cross_entropy_with_selfnorm"
-  active_type: ""
-  inputs {
-    input_layer_name: "probs"
-  }
-  inputs {
-    input_layer_name: "xe-label"
-  }
-  softmax_selfnorm_alpha: 0.1
-  coeff: 1.0
-}
-layers {
-  name: "__huber_regression_cost_0__"
-  type: "huber_regression"
-  size: 1
-  active_type: ""
-  inputs {
-    input_layer_name: "input"
-  }
-  inputs {
-    input_layer_name: "labels"
-  }
-  coeff: 1.0
-  delta: 1.0
-}
-layers {
-  name: "huber_probs"
-  type: "data"
-  size: 1
-  active_type: ""
-}
-layers {
-  name: "huber_label"
-  type: "data"
-  size: 1
-  active_type: ""
-}
-layers {
-  name: "__huber_classification_cost_0__"
-  type: "huber_classification"
-  size: 1
-  active_type: ""
-  inputs {
-    input_layer_name: "huber_probs"
-  }
-  inputs {
-    input_layer_name: "huber_label"
-  }
-  coeff: 1.0
-}
-layers {
-  name: "__multi_binary_label_cross_entropy_0__"
-  type: "multi_binary_label_cross_entropy"
-  size: 1
-  active_type: ""
-  inputs {
-    input_layer_name: "probs"
-  }
-  inputs {
-    input_layer_name: "xe-label"
-  }
-  coeff: 1.0
-}
-layers {
-  name: "__sum_cost_0__"
-  type: "sum_cost"
-  size: 1
-  active_type: ""
-  inputs {
-    input_layer_name: "__fc_layer_0__"
-  }
-  coeff: 1.0
-}
-layers {
-  name: "__nce_layer_0__"
-  type: "nce"
-  size: 1
-  active_type: "sigmoid"
-  inputs {
-    input_layer_name: "__fc_layer_0__"
-    input_parameter_name: "___nce_layer_0__.w0"
-  }
-  inputs {
-    input_layer_name: "labels"
-  }
-  bias_parameter_name: "___nce_layer_0__.wbias"
-  num_classes: 5000
-  num_neg_samples: 10
-}
-parameters {
-  name: "___fc_layer_0__.w0"
-  size: 800
-  initial_mean: 0.0
-  initial_std: 0.0707106781187
-  dims: 200
-  dims: 4
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___fc_layer_0__.wbias"
-  size: 4
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 4
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___crf_layer_0__.w0"
-  size: 24
-  initial_mean: 0.0
-  initial_std: 0.408248290464
-  dims: 6
-  dims: 4
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___nce_layer_0__.w0"
-  size: 20000
-  initial_mean: 0.0
-  initial_std: 0.0141421356237
-  dims: 5000
-  dims: 4
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___nce_layer_0__.wbias"
-  size: 5000
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 5000
-  initial_strategy: 0
-  initial_smart: false
-}
-input_layer_names: "input"
-input_layer_names: "labels"
-input_layer_names: "crf_label"
-input_layer_names: "left"
-input_layer_names: "right"
-input_layer_names: "label"
-input_layer_names: "list_feature"
-input_layer_names: "list_scores"
-input_layer_names: "probs"
-input_layer_names: "xe-label"
-input_layer_names: "huber_probs"
-input_layer_names: "huber_label"
-output_layer_names: "__ctc_layer_0__"
-output_layer_names: "__warp_ctc_layer_0__"
-output_layer_names: "__crf_layer_0__"
-output_layer_names: "__rank_cost_0__"
-output_layer_names: "__lambda_cost_0__"
-output_layer_names: "__cross_entropy_0__"
-output_layer_names: "__cross_entropy_with_selfnorm_0__"
-output_layer_names: "__huber_regression_cost_0__"
-output_layer_names: "__huber_classification_cost_0__"
-output_layer_names: "__multi_binary_label_cross_entropy_0__"
-output_layer_names: "__sum_cost_0__"
-output_layer_names: "__nce_layer_0__"
-sub_models {
-  name: "root"
-  layer_names: "input"
-  layer_names: "labels"
-  layer_names: "probs"
-  layer_names: "xe-label"
-  layer_names: "__fc_layer_0__"
-  layer_names: "__ctc_layer_0__"
-  layer_names: "__warp_ctc_layer_0__"
-  layer_names: "crf_label"
-  layer_names: "__crf_layer_0__"
-  layer_names: "left"
-  layer_names: "right"
-  layer_names: "label"
-  layer_names: "__rank_cost_0__"
-  layer_names: "list_feature"
-  layer_names: "list_scores"
-  layer_names: "__lambda_cost_0__"
-  layer_names: "__cross_entropy_0__"
-  layer_names: "__cross_entropy_with_selfnorm_0__"
-  layer_names: "__huber_regression_cost_0__"
-  layer_names: "huber_probs"
-  layer_names: "huber_label"
-  layer_names: "__huber_classification_cost_0__"
-  layer_names: "__multi_binary_label_cross_entropy_0__"
-  layer_names: "__sum_cost_0__"
-  layer_names: "__nce_layer_0__"
-  input_layer_names: "input"
-  input_layer_names: "labels"
-  input_layer_names: "crf_label"
-  input_layer_names: "left"
-  input_layer_names: "right"
-  input_layer_names: "label"
-  input_layer_names: "list_feature"
-  input_layer_names: "list_scores"
-  input_layer_names: "probs"
-  input_layer_names: "xe-label"
-  input_layer_names: "huber_probs"
-  input_layer_names: "huber_label"
-  output_layer_names: "__ctc_layer_0__"
-  output_layer_names: "__warp_ctc_layer_0__"
-  output_layer_names: "__crf_layer_0__"
-  output_layer_names: "__rank_cost_0__"
-  output_layer_names: "__lambda_cost_0__"
-  output_layer_names: "__cross_entropy_0__"
-  output_layer_names: "__cross_entropy_with_selfnorm_0__"
-  output_layer_names: "__huber_regression_cost_0__"
-  output_layer_names: "__huber_classification_cost_0__"
-  output_layer_names: "__multi_binary_label_cross_entropy_0__"
-  output_layer_names: "__sum_cost_0__"
-  output_layer_names: "__nce_layer_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers_with_weight.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers_with_weight.protostr
deleted file mode 100644
index cec8a73db6..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers_with_weight.protostr
+++ /dev/null
@@ -1,162 +0,0 @@
-type: "nn"
-layers {
-  name: "input"
-  type: "data"
-  size: 300
-  active_type: ""
-}
-layers {
-  name: "label"
-  type: "data"
-  size: 1
-  active_type: ""
-}
-layers {
-  name: "weight"
-  type: "data"
-  size: 1
-  active_type: ""
-}
-layers {
-  name: "__fc_layer_0__"
-  type: "fc"
-  size: 10
-  active_type: "softmax"
-  inputs {
-    input_layer_name: "input"
-    input_parameter_name: "___fc_layer_0__.w0"
-  }
-  bias_parameter_name: "___fc_layer_0__.wbias"
-}
-layers {
-  name: "__cost_0__"
-  type: "multi-class-cross-entropy"
-  size: 1
-  active_type: ""
-  inputs {
-    input_layer_name: "__fc_layer_0__"
-  }
-  inputs {
-    input_layer_name: "label"
-  }
-  inputs {
-    input_layer_name: "weight"
-  }
-  coeff: 1.0
-}
-layers {
-  name: "__square_error_cost_0__"
-  type: "square_error"
-  size: 1
-  active_type: ""
-  inputs {
-    input_layer_name: "__fc_layer_0__"
-  }
-  inputs {
-    input_layer_name: "label"
-  }
-  inputs {
-    input_layer_name: "weight"
-  }
-  coeff: 1.0
-}
-layers {
-  name: "multi_class_label"
-  type: "data"
-  size: 500
-  active_type: ""
-}
-layers {
-  name: "__nce_layer_0__"
-  type: "nce"
-  size: 1
-  active_type: "sigmoid"
-  inputs {
-    input_layer_name: "__fc_layer_0__"
-    input_parameter_name: "___nce_layer_0__.w0"
-  }
-  inputs {
-    input_layer_name: "multi_class_label"
-  }
-  inputs {
-    input_layer_name: "weight"
-  }
-  bias_parameter_name: "___nce_layer_0__.wbias"
-  num_classes: 500
-  num_neg_samples: 10
-}
-parameters {
-  name: "___fc_layer_0__.w0"
-  size: 3000
-  initial_mean: 0.0
-  initial_std: 0.057735026919
-  dims: 300
-  dims: 10
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___fc_layer_0__.wbias"
-  size: 10
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 10
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___nce_layer_0__.w0"
-  size: 5000
-  initial_mean: 0.0
-  initial_std: 0.04472135955
-  dims: 500
-  dims: 10
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___nce_layer_0__.wbias"
-  size: 500
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 500
-  initial_strategy: 0
-  initial_smart: false
-}
-input_layer_names: "input"
-input_layer_names: "label"
-input_layer_names: "weight"
-input_layer_names: "multi_class_label"
-output_layer_names: "__cost_0__"
-output_layer_names: "__square_error_cost_0__"
-output_layer_names: "__nce_layer_0__"
-evaluators {
-  name: "classification_error_evaluator"
-  type: "classification_error"
-  input_layers: "__fc_layer_0__"
-  input_layers: "label"
-  input_layers: "weight"
-}
-sub_models {
-  name: "root"
-  layer_names: "input"
-  layer_names: "label"
-  layer_names: "weight"
-  layer_names: "__fc_layer_0__"
-  layer_names: "__cost_0__"
-  layer_names: "__square_error_cost_0__"
-  layer_names: "multi_class_label"
-  layer_names: "__nce_layer_0__"
-  input_layer_names: "input"
-  input_layer_names: "label"
-  input_layer_names: "weight"
-  input_layer_names: "multi_class_label"
-  output_layer_names: "__cost_0__"
-  output_layer_names: "__square_error_cost_0__"
-  output_layer_names: "__nce_layer_0__"
-  evaluator_names: "classification_error_evaluator"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr
deleted file mode 100644
index a602569697..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr
+++ /dev/null
@@ -1,207 +0,0 @@
-type: "nn"
-layers {
-  name: "sentence_states"
-  type: "data"
-  size: 32
-  active_type: ""
-}
-layers {
-  name: "sentence_scores"
-  type: "data"
-  size: 1
-  active_type: ""
-}
-layers {
-  name: "__kmax_seq_score_layer_0__"
-  type: "kmax_seq_score"
-  active_type: ""
-  inputs {
-    input_layer_name: "sentence_scores"
-  }
-  beam_size: 5
-}
-layers {
-  name: "__sub_nested_seq_layer_0__"
-  type: "sub_nested_seq"
-  size: 32
-  active_type: ""
-  inputs {
-    input_layer_name: "sentence_states"
-  }
-  inputs {
-    input_layer_name: "__kmax_seq_score_layer_0__"
-  }
-}
-layers {
-  name: "__fc_layer_0__"
-  type: "fc"
-  size: 1
-  active_type: ""
-  inputs {
-    input_layer_name: "__sub_nested_seq_layer_0__"
-    input_parameter_name: "___fc_layer_0__.w0"
-  }
-  bias_parameter_name: "___fc_layer_0__.wbias"
-}
-layers {
-  name: "__kmax_seq_score_layer_1__"
-  type: "kmax_seq_score"
-  active_type: ""
-  inputs {
-    input_layer_name: "sentence_scores"
-  }
-  beam_size: 5
-}
-layers {
-  name: "__seq_slice_layer_0__"
-  type: "seq_slice"
-  size: 32
-  active_type: ""
-  inputs {
-    input_layer_name: "__sub_nested_seq_layer_0__"
-  }
-  inputs {
-    input_layer_name: "__kmax_seq_score_layer_1__"
-  }
-  select_first: true
-}
-layers {
-  name: "__fc_layer_1__"
-  type: "fc"
-  size: 1
-  active_type: ""
-  inputs {
-    input_layer_name: "__seq_slice_layer_0__"
-    input_parameter_name: "___fc_layer_1__.w0"
-  }
-  bias_parameter_name: "___fc_layer_1__.wbias"
-}
-layers {
-  name: "__kmax_seq_score_layer_2__"
-  type: "kmax_seq_score"
-  active_type: ""
-  inputs {
-    input_layer_name: "__fc_layer_1__"
-  }
-  beam_size: 5
-}
-layers {
-  name: "sentences_ids"
-  type: "data"
-  size: 1
-  active_type: ""
-}
-layers {
-  name: "start_ids"
-  type: "data"
-  size: 1
-  active_type: ""
-}
-layers {
-  name: "end_ids"
-  type: "data"
-  size: 1
-  active_type: ""
-}
-layers {
-  name: "__cross_entropy_over_beam_0__"
-  type: "cross_entropy_over_beam"
-  active_type: ""
-  inputs {
-    input_layer_name: "sentence_scores"
-  }
-  inputs {
-    input_layer_name: "__kmax_seq_score_layer_0__"
-  }
-  inputs {
-    input_layer_name: "sentences_ids"
-  }
-  inputs {
-    input_layer_name: "__fc_layer_0__"
-  }
-  inputs {
-    input_layer_name: "__kmax_seq_score_layer_1__"
-  }
-  inputs {
-    input_layer_name: "start_ids"
-  }
-  inputs {
-    input_layer_name: "__fc_layer_1__"
-  }
-  inputs {
-    input_layer_name: "__kmax_seq_score_layer_2__"
-  }
-  inputs {
-    input_layer_name: "end_ids"
-  }
-}
-parameters {
-  name: "___fc_layer_0__.w0"
-  size: 32
-  initial_mean: 0.0
-  initial_std: 0.176776695297
-  dims: 32
-  dims: 1
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___fc_layer_0__.wbias"
-  size: 1
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 1
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___fc_layer_1__.w0"
-  size: 32
-  initial_mean: 0.0
-  initial_std: 0.176776695297
-  dims: 32
-  dims: 1
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___fc_layer_1__.wbias"
-  size: 1
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 1
-  initial_strategy: 0
-  initial_smart: false
-}
-input_layer_names: "sentence_scores"
-input_layer_names: "sentences_ids"
-input_layer_names: "sentence_states"
-input_layer_names: "start_ids"
-input_layer_names: "end_ids"
-output_layer_names: "__cross_entropy_over_beam_0__"
-sub_models {
-  name: "root"
-  layer_names: "sentence_states"
-  layer_names: "sentence_scores"
-  layer_names: "__kmax_seq_score_layer_0__"
-  layer_names: "__sub_nested_seq_layer_0__"
-  layer_names: "__fc_layer_0__"
-  layer_names: "__kmax_seq_score_layer_1__"
-  layer_names: "__seq_slice_layer_0__"
-  layer_names: "__fc_layer_1__"
-  layer_names: "__kmax_seq_score_layer_2__"
-  layer_names: "sentences_ids"
-  layer_names: "start_ids"
-  layer_names: "end_ids"
-  layer_names: "__cross_entropy_over_beam_0__"
-  input_layer_names: "sentence_scores"
-  input_layer_names: "sentences_ids"
-  input_layer_names: "sentence_states"
-  input_layer_names: "start_ids"
-  input_layer_names: "end_ids"
-  output_layer_names: "__cross_entropy_over_beam_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_deconv3d_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_deconv3d_layer.protostr
deleted file mode 100644
index 7bf409731c..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_deconv3d_layer.protostr
+++ /dev/null
@@ -1,132 +0,0 @@
-type: "nn"
-layers {
-  name: "data"
-  type: "data"
-  size: 36288
-  active_type: ""
-  height: 48
-  width: 42
-  depth: 6
-}
-layers {
-  name: "deconv3d_1"
-  type: "deconv3d"
-  size: 1387760
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-    input_parameter_name: "_deconv3d_1.w0"
-    conv_conf {
-      filter_size: 3
-      channels: 3
-      stride: 2
-      padding: 1
-      groups: 1
-      filter_channels: 16
-      output_x: 42
-      img_size: 83
-      caffe_mode: true
-      filter_size_y: 3
-      padding_y: 1
-      stride_y: 2
-      output_y: 48
-      img_size_y: 95
-      filter_size_z: 3
-      padding_z: 1
-      stride_z: 2
-      output_z: 6
-      img_size_z: 11
-    }
-  }
-  bias_parameter_name: "_deconv3d_1.wbias"
-  num_filters: 16
-  shared_biases: true
-  height: 95
-  width: 83
-  depth: 11
-}
-layers {
-  name: "deconv3d_2"
-  type: "deconv3d"
-  size: 1387760
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-    input_parameter_name: "_deconv3d_2.w0"
-    conv_conf {
-      filter_size: 3
-      channels: 3
-      stride: 2
-      padding: 1
-      groups: 1
-      filter_channels: 16
-      output_x: 42
-      img_size: 83
-      caffe_mode: true
-      filter_size_y: 3
-      padding_y: 1
-      stride_y: 2
-      output_y: 48
-      img_size_y: 95
-      filter_size_z: 3
-      padding_z: 1
-      stride_z: 2
-      output_z: 6
-      img_size_z: 11
-    }
-  }
-  bias_parameter_name: "_deconv3d_2.wbias"
-  num_filters: 16
-  shared_biases: true
-  height: 95
-  width: 83
-  depth: 11
-}
-parameters {
-  name: "_deconv3d_1.w0"
-  size: 6912
-  initial_mean: 0.0
-  initial_std: 0.272165526976
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "_deconv3d_1.wbias"
-  size: 16
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 16
-  dims: 1
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "_deconv3d_2.w0"
-  size: 6912
-  initial_mean: 0.0
-  initial_std: 0.272165526976
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "_deconv3d_2.wbias"
-  size: 16
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 16
-  dims: 1
-  initial_strategy: 0
-  initial_smart: false
-}
-input_layer_names: "data"
-output_layer_names: "deconv3d_2"
-sub_models {
-  name: "root"
-  layer_names: "data"
-  layer_names: "deconv3d_1"
-  layer_names: "deconv3d_2"
-  input_layer_names: "data"
-  output_layer_names: "deconv3d_2"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_detection_output_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_detection_output_layer.protostr
deleted file mode 100644
index 6690f9852a..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_detection_output_layer.protostr
+++ /dev/null
@@ -1,66 +0,0 @@
-type: "nn"
-layers {
-  name: "input_loc"
-  type: "data"
-  size: 16
-  active_type: ""
-  height: 16
-  width: 1
-}
-layers {
-  name: "input_conf"
-  type: "data"
-  size: 8
-  active_type: ""
-  height: 1
-  width: 8
-}
-layers {
-  name: "priorbox"
-  type: "data"
-  size: 32
-  active_type: ""
-  height: 4
-  width: 8
-}
-layers {
-  name: "test_detection_output"
-  type: "detection_output"
-  size: 1400
-  active_type: ""
-  inputs {
-    input_layer_name: "priorbox"
-    detection_output_conf {
-      num_classes: 21
-      nms_threshold: 0.45
-      nms_top_k: 400
-      background_id: 0
-      input_num: 1
-      keep_top_k: 200
-      confidence_threshold: 0.01
-    }
-  }
-  inputs {
-    input_layer_name: "input_loc"
-  }
-  inputs {
-    input_layer_name: "input_conf"
-  }
-}
-input_layer_names: "priorbox"
-input_layer_names: "input_loc"
-input_layer_names: "input_conf"
-output_layer_names: "test_detection_output"
-sub_models {
-  name: "root"
-  layer_names: "input_loc"
-  layer_names: "input_conf"
-  layer_names: "priorbox"
-  layer_names: "test_detection_output"
-  input_layer_names: "priorbox"
-  input_layer_names: "input_loc"
-  input_layer_names: "input_conf"
-  output_layer_names: "test_detection_output"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_dot_prod_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_dot_prod_layer.protostr
deleted file mode 100644
index f1530c382c..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_dot_prod_layer.protostr
+++ /dev/null
@@ -1,38 +0,0 @@
-type: "nn"
-layers {
-  name: "vector1"
-  type: "data"
-  size: 10
-  active_type: ""
-}
-layers {
-  name: "vector2"
-  type: "data"
-  size: 10
-  active_type: ""
-}
-layers {
-  name: "__dot_prod_layer_0__"
-  type: "dot_prod"
-  size: 1
-  active_type: ""
-  inputs {
-    input_layer_name: "vector1"
-  }
-  inputs {
-    input_layer_name: "vector2"
-  }
-}
-input_layer_names: "vector1"
-input_layer_names: "vector2"
-output_layer_names: "__dot_prod_layer_0__"
-sub_models {
-  name: "root"
-  layer_names: "vector1"
-  layer_names: "vector2"
-  layer_names: "__dot_prod_layer_0__"
-  input_layer_names: "vector1"
-  input_layer_names: "vector2"
-  output_layer_names: "__dot_prod_layer_0__"
-  is_recurrent_layer_group: false
-}
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_expand_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_expand_layer.protostr
deleted file mode 100644
index f4b3605226..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_expand_layer.protostr
+++ /dev/null
@@ -1,56 +0,0 @@
-type: "nn"
-layers {
-  name: "data"
-  type: "data"
-  size: 30
-  active_type: ""
-}
-layers {
-  name: "data_seq"
-  type: "data"
-  size: 30
-  active_type: ""
-}
-layers {
-  name: "__expand_layer_0__"
-  type: "expand"
-  size: 30
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-  }
-  inputs {
-    input_layer_name: "data_seq"
-  }
-  trans_type: "seq"
-}
-layers {
-  name: "__expand_layer_1__"
-  type: "expand"
-  size: 30
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-  }
-  inputs {
-    input_layer_name: "data_seq"
-  }
-  trans_type: "non-seq"
-}
-input_layer_names: "data"
-input_layer_names: "data_seq"
-output_layer_names: "__expand_layer_0__"
-output_layer_names: "__expand_layer_1__"
-sub_models {
-  name: "root"
-  layer_names: "data"
-  layer_names: "data_seq"
-  layer_names: "__expand_layer_0__"
-  layer_names: "__expand_layer_1__"
-  input_layer_names: "data"
-  input_layer_names: "data_seq"
-  output_layer_names: "__expand_layer_0__"
-  output_layer_names: "__expand_layer_1__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_factorization_machine.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_factorization_machine.protostr
deleted file mode 100644
index 4f3002b199..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_factorization_machine.protostr
+++ /dev/null
@@ -1,39 +0,0 @@
-type: "nn"
-layers {
-  name: "data"
-  type: "data"
-  size: 1024
-  active_type: ""
-}
-layers {
-  name: "__factorization_machine_0__"
-  type: "factorization_machine"
-  size: 1
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-    input_parameter_name: "___factorization_machine_0__.w0"
-  }
-  factor_size: 10
-}
-parameters {
-  name: "___factorization_machine_0__.w0"
-  size: 10240
-  initial_mean: 0.0
-  initial_std: 0.03125
-  dims: 1024
-  dims: 10
-  initial_strategy: 0
-  initial_smart: true
-}
-input_layer_names: "data"
-output_layer_names: "__factorization_machine_0__"
-sub_models {
-  name: "root"
-  layer_names: "data"
-  layer_names: "__factorization_machine_0__"
-  input_layer_names: "data"
-  output_layer_names: "__factorization_machine_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_fc.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_fc.protostr
deleted file mode 100644
index 8151898832..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_fc.protostr
+++ /dev/null
@@ -1,98 +0,0 @@
-type: "nn"
-layers {
-  name: "data"
-  type: "data"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "__trans_layer_0__"
-  type: "trans"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-  }
-}
-layers {
-  name: "__fc_layer_0__"
-  type: "fc"
-  size: 100
-  active_type: "tanh"
-  inputs {
-    input_layer_name: "__trans_layer_0__"
-    input_parameter_name: "___fc_layer_0__.w0"
-  }
-}
-layers {
-  name: "mask"
-  type: "data"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "__selective_fc_layer_0__"
-  type: "selective_fc"
-  size: 100
-  active_type: "sigmoid"
-  inputs {
-    input_layer_name: "data"
-    input_parameter_name: "___selective_fc_layer_0__.w0"
-  }
-  inputs {
-    input_layer_name: "mask"
-  }
-  bias_parameter_name: "___selective_fc_layer_0__.wbias"
-  selective_fc_pass_generation: false
-  has_selected_colums: true
-  selective_fc_full_mul_ratio: 0.02
-}
-parameters {
-  name: "___fc_layer_0__.w0"
-  size: 10000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 100
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___selective_fc_layer_0__.w0"
-  size: 10000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 100
-  initial_strategy: 0
-  initial_smart: true
-  is_sparse: false
-}
-parameters {
-  name: "___selective_fc_layer_0__.wbias"
-  size: 100
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 100
-  initial_strategy: 0
-  initial_smart: false
-}
-input_layer_names: "data"
-input_layer_names: "mask"
-output_layer_names: "__fc_layer_0__"
-output_layer_names: "__selective_fc_layer_0__"
-sub_models {
-  name: "root"
-  layer_names: "data"
-  layer_names: "__trans_layer_0__"
-  layer_names: "__fc_layer_0__"
-  layer_names: "mask"
-  layer_names: "__selective_fc_layer_0__"
-  input_layer_names: "data"
-  input_layer_names: "mask"
-  output_layer_names: "__fc_layer_0__"
-  output_layer_names: "__selective_fc_layer_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_gated_unit_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_gated_unit_layer.protostr
deleted file mode 100644
index f1e4d894a5..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_gated_unit_layer.protostr
+++ /dev/null
@@ -1,106 +0,0 @@
-type: "nn"
-layers {
-  name: "input"
-  type: "data"
-  size: 256
-  active_type: ""
-}
-layers {
-  name: "__gated_unit_layer_0___input_proj"
-  type: "fc"
-  size: 512
-  active_type: "tanh"
-  inputs {
-    input_layer_name: "input"
-    input_parameter_name: "___gated_unit_layer_0___input_proj.w0"
-  }
-  bias_parameter_name: "___gated_unit_layer_0___input_proj.wbias"
-  error_clipping_threshold: 100.0
-}
-layers {
-  name: "__gated_unit_layer_0___gate"
-  type: "fc"
-  size: 512
-  active_type: "sigmoid"
-  inputs {
-    input_layer_name: "input"
-    input_parameter_name: "___gated_unit_layer_0___gate.w0"
-  }
-  bias_parameter_name: "___gated_unit_layer_0___gate.wbias"
-  error_clipping_threshold: 100.0
-}
-layers {
-  name: "__gated_unit_layer_0___gated_act"
-  type: "mixed"
-  size: 512
-  active_type: ""
-  inputs {
-    input_layer_name: "__gated_unit_layer_0___input_proj"
-  }
-  inputs {
-    input_layer_name: "__gated_unit_layer_0___gate"
-  }
-  error_clipping_threshold: 100.0
-  operator_confs {
-    type: "dot_mul"
-    input_indices: 0
-    input_indices: 1
-    input_sizes: 512
-    input_sizes: 512
-    output_size: 512
-    dotmul_scale: 1
-  }
-}
-parameters {
-  name: "___gated_unit_layer_0___input_proj.w0"
-  size: 131072
-  initial_mean: 0.0
-  initial_std: 0.0001
-  dims: 256
-  dims: 512
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___gated_unit_layer_0___input_proj.wbias"
-  size: 512
-  initial_mean: 0.0
-  initial_std: 1
-  dims: 1
-  dims: 512
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___gated_unit_layer_0___gate.w0"
-  size: 131072
-  initial_mean: 0.0
-  initial_std: 0.0001
-  dims: 256
-  dims: 512
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___gated_unit_layer_0___gate.wbias"
-  size: 512
-  initial_mean: 0.0
-  initial_std: 1
-  dims: 1
-  dims: 512
-  initial_strategy: 0
-  initial_smart: false
-}
-input_layer_names: "input"
-output_layer_names: "__gated_unit_layer_0___gated_act"
-sub_models {
-  name: "root"
-  layer_names: "input"
-  layer_names: "__gated_unit_layer_0___input_proj"
-  layer_names: "__gated_unit_layer_0___gate"
-  layer_names: "__gated_unit_layer_0___gated_act"
-  input_layer_names: "input"
-  output_layer_names: "__gated_unit_layer_0___gated_act"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_grumemory_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_grumemory_layer.protostr
deleted file mode 100644
index 2c19b2fd12..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_grumemory_layer.protostr
+++ /dev/null
@@ -1,51 +0,0 @@
-type: "nn"
-layers {
-  name: "data"
-  type: "data"
-  size: 120
-  active_type: ""
-}
-layers {
-  name: "__gru_0__"
-  type: "gated_recurrent"
-  size: 40
-  active_type: "sigmoid"
-  inputs {
-    input_layer_name: "data"
-    input_parameter_name: "___gru_0__.w0"
-  }
-  bias_parameter_name: "___gru_0__.wbias"
-  reversed: true
-  active_gate_type: "tanh"
-}
-parameters {
-  name: "___gru_0__.w0"
-  size: 4800
-  initial_mean: 0.0
-  initial_std: 0.158113883008
-  dims: 40
-  dims: 120
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___gru_0__.wbias"
-  size: 120
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 120
-  initial_strategy: 0
-  initial_smart: false
-}
-input_layer_names: "data"
-output_layer_names: "__gru_0__"
-sub_models {
-  name: "root"
-  layer_names: "data"
-  layer_names: "__gru_0__"
-  input_layer_names: "data"
-  output_layer_names: "__gru_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_hsigmoid.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_hsigmoid.protostr
deleted file mode 100644
index e81fcb13c4..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_hsigmoid.protostr
+++ /dev/null
@@ -1,62 +0,0 @@
-type: "nn"
-layers {
-  name: "data"
-  type: "data"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "label"
-  type: "data"
-  size: 10
-  active_type: ""
-}
-layers {
-  name: "__hsigmoid_0__"
-  type: "hsigmoid"
-  size: 1
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-    input_parameter_name: "___hsigmoid_0__.w0"
-  }
-  inputs {
-    input_layer_name: "label"
-  }
-  bias_parameter_name: "___hsigmoid_0__.wbias"
-  num_classes: 10
-}
-parameters {
-  name: "___hsigmoid_0__.w0"
-  size: 900
-  initial_mean: 0.0
-  initial_std: 0.333333333333
-  dims: 9
-  dims: 100
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___hsigmoid_0__.wbias"
-  size: 9
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 9
-  initial_strategy: 0
-  initial_smart: false
-}
-input_layer_names: "data"
-input_layer_names: "label"
-output_layer_names: "__hsigmoid_0__"
-sub_models {
-  name: "root"
-  layer_names: "data"
-  layer_names: "label"
-  layer_names: "__hsigmoid_0__"
-  input_layer_names: "data"
-  input_layer_names: "label"
-  output_layer_names: "__hsigmoid_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr
deleted file mode 100644
index f93d368c86..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr
+++ /dev/null
@@ -1,59 +0,0 @@
-type: "nn"
-layers {
-  name: "input_seq"
-  type: "data"
-  size: 128
-  active_type: ""
-}
-layers {
-  name: "__fc_layer_0__"
-  type: "fc"
-  size: 1
-  active_type: "exponential"
-  inputs {
-    input_layer_name: "input_seq"
-    input_parameter_name: "___fc_layer_0__.w0"
-  }
-  bias_parameter_name: "___fc_layer_0__.wbias"
-}
-layers {
-  name: "__kmax_seq_score_layer_0__"
-  type: "kmax_seq_score"
-  active_type: ""
-  inputs {
-    input_layer_name: "__fc_layer_0__"
-  }
-  beam_size: 5
-}
-parameters {
-  name: "___fc_layer_0__.w0"
-  size: 128
-  initial_mean: 0.0
-  initial_std: 0.0883883476483
-  dims: 128
-  dims: 1
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___fc_layer_0__.wbias"
-  size: 1
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 1
-  initial_strategy: 0
-  initial_smart: false
-}
-input_layer_names: "input_seq"
-output_layer_names: "__kmax_seq_score_layer_0__"
-sub_models {
-  name: "root"
-  layer_names: "input_seq"
-  layer_names: "__fc_layer_0__"
-  layer_names: "__kmax_seq_score_layer_0__"
-  input_layer_names: "input_seq"
-  output_layer_names: "__kmax_seq_score_layer_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_l2_distance_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_l2_distance_layer.protostr
deleted file mode 100644
index 9ba33689ed..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_l2_distance_layer.protostr
+++ /dev/null
@@ -1,39 +0,0 @@
-type: "nn"
-layers {
-  name: "x"
-  type: "data"
-  size: 128
-  active_type: ""
-}
-layers {
-  name: "y"
-  type: "data"
-  size: 128
-  active_type: ""
-}
-layers {
-  name: "__l2_distance_layer_0__"
-  type: "l2_distance"
-  size: 1
-  active_type: ""
-  inputs {
-    input_layer_name: "x"
-  }
-  inputs {
-    input_layer_name: "y"
-  }
-}
-input_layer_names: "x"
-input_layer_names: "y"
-output_layer_names: "__l2_distance_layer_0__"
-sub_models {
-  name: "root"
-  layer_names: "x"
-  layer_names: "y"
-  layer_names: "__l2_distance_layer_0__"
-  input_layer_names: "x"
-  input_layer_names: "y"
-  output_layer_names: "__l2_distance_layer_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_lstmemory_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_lstmemory_layer.protostr
deleted file mode 100644
index 76a4afab82..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_lstmemory_layer.protostr
+++ /dev/null
@@ -1,53 +0,0 @@
-type: "nn"
-layers {
-  name: "data"
-  type: "data"
-  size: 128
-  active_type: ""
-}
-layers {
-  name: "__lstmemory_0__"
-  type: "lstmemory"
-  size: 32
-  active_type: "tanh"
-  inputs {
-    input_layer_name: "data"
-    input_parameter_name: "___lstmemory_0__.w0"
-  }
-  bias_parameter_name: "___lstmemory_0__.wbias"
-  reversed: true
-  active_gate_type: "tanh"
-  active_state_type: "tanh"
-}
-parameters {
-  name: "___lstmemory_0__.w0"
-  size: 4096
-  initial_mean: 0.0
-  initial_std: 0.176776695297
-  dims: 32
-  dims: 32
-  dims: 4
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___lstmemory_0__.wbias"
-  size: 224
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 224
-  initial_strategy: 0
-  initial_smart: false
-}
-input_layer_names: "data"
-output_layer_names: "__lstmemory_0__"
-sub_models {
-  name: "root"
-  layer_names: "data"
-  layer_names: "__lstmemory_0__"
-  input_layer_names: "data"
-  output_layer_names: "__lstmemory_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr
deleted file mode 100644
index 39dc487146..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr
+++ /dev/null
@@ -1,233 +0,0 @@
-type: "nn"
-layers {
-  name: "data"
-  type: "data"
-  size: 2304
-  active_type: ""
-  height: 48
-  width: 48
-}
-layers {
-  name: "__conv_0__"
-  type: "exconv"
-  size: 36864
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-    input_parameter_name: "___conv_0__.w0"
-    conv_conf {
-      filter_size: 3
-      channels: 1
-      stride: 1
-      padding: 1
-      groups: 1
-      filter_channels: 1
-      output_x: 48
-      img_size: 48
-      caffe_mode: true
-      filter_size_y: 3
-      padding_y: 1
-      stride_y: 1
-      output_y: 48
-      img_size_y: 48
-      dilation: 1
-      dilation_y: 1
-    }
-  }
-  bias_parameter_name: "___conv_0__.wbias"
-  num_filters: 16
-  shared_biases: true
-  height: 48
-  width: 48
-}
-layers {
-  name: "__maxout_layer_0__"
-  type: "maxout"
-  size: 18432
-  active_type: ""
-  inputs {
-    input_layer_name: "__conv_0__"
-    maxout_conf {
-      image_conf {
-        channels: 16
-        img_size: 48
-        img_size_y: 48
-      }
-      groups: 2
-    }
-  }
-  height: 48
-  width: 48
-}
-layers {
-  name: "__pool_0__"
-  type: "pool"
-  size: 4608
-  active_type: ""
-  inputs {
-    input_layer_name: "__maxout_layer_0__"
-    pool_conf {
-      pool_type: "max-projection"
-      channels: 8
-      size_x: 2
-      stride: 2
-      output_x: 24
-      img_size: 48
-      padding: 0
-      size_y: 2
-      stride_y: 2
-      output_y: 24
-      img_size_y: 48
-      padding_y: 0
-    }
-  }
-  height: 24
-  width: 24
-}
-layers {
-  name: "__conv_1__"
-  type: "exconv"
-  size: 73728
-  active_type: ""
-  inputs {
-    input_layer_name: "__pool_0__"
-    input_parameter_name: "___conv_1__.w0"
-    conv_conf {
-      filter_size: 3
-      channels: 8
-      stride: 1
-      padding: 1
-      groups: 1
-      filter_channels: 8
-      output_x: 24
-      img_size: 24
-      caffe_mode: true
-      filter_size_y: 3
-      padding_y: 1
-      stride_y: 1
-      output_y: 24
-      img_size_y: 24
-      dilation: 1
-      dilation_y: 1
-    }
-  }
-  bias_parameter_name: "___conv_1__.wbias"
-  num_filters: 128
-  shared_biases: true
-  height: 24
-  width: 24
-}
-layers {
-  name: "__maxout_layer_1__"
-  type: "maxout"
-  size: 18432
-  active_type: ""
-  inputs {
-    input_layer_name: "__conv_1__"
-    maxout_conf {
-      image_conf {
-        channels: 128
-        img_size: 24
-        img_size_y: 24
-      }
-      groups: 4
-    }
-  }
-  height: 24
-  width: 24
-}
-layers {
-  name: "__block_expand_layer_0__"
-  type: "blockexpand"
-  size: 192
-  active_type: ""
-  inputs {
-    input_layer_name: "__maxout_layer_1__"
-    block_expand_conf {
-      channels: 32
-      stride_x: 1
-      stride_y: 1
-      padding_x: 0
-      padding_y: 0
-      block_x: 1
-      block_y: 6
-      output_x: 0
-      output_y: 0
-      img_size_x: 0
-      img_size_y: 0
-    }
-  }
-}
-layers {
-  name: "__fc_layer_0__"
-  type: "fc"
-  size: 384
-  active_type: "tanh"
-  inputs {
-    input_layer_name: "__block_expand_layer_0__"
-    input_parameter_name: "___fc_layer_0__.w0"
-  }
-}
-parameters {
-  name: "___conv_0__.w0"
-  size: 144
-  initial_mean: 0.0
-  initial_std: 0.471404520791
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___conv_0__.wbias"
-  size: 16
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 16
-  dims: 1
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___conv_1__.w0"
-  size: 9216
-  initial_mean: 0.0
-  initial_std: 0.166666666667
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___conv_1__.wbias"
-  size: 128
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 128
-  dims: 1
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___fc_layer_0__.w0"
-  size: 73728
-  initial_mean: 0.0
-  initial_std: 0.0721687836487
-  dims: 192
-  dims: 384
-  initial_strategy: 0
-  initial_smart: true
-}
-input_layer_names: "data"
-output_layer_names: "__fc_layer_0__"
-sub_models {
-  name: "root"
-  layer_names: "data"
-  layer_names: "__conv_0__"
-  layer_names: "__maxout_layer_0__"
-  layer_names: "__pool_0__"
-  layer_names: "__conv_1__"
-  layer_names: "__maxout_layer_1__"
-  layer_names: "__block_expand_layer_0__"
-  layer_names: "__fc_layer_0__"
-  input_layer_names: "data"
-  output_layer_names: "__fc_layer_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_multibox_loss_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_multibox_loss_layer.protostr
deleted file mode 100644
index 0ba84dcc6d..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_multibox_loss_layer.protostr
+++ /dev/null
@@ -1,79 +0,0 @@
-type: "nn"
-layers {
-  name: "input_loc"
-  type: "data"
-  size: 16
-  active_type: ""
-  height: 16
-  width: 1
-}
-layers {
-  name: "input_conf"
-  type: "data"
-  size: 8
-  active_type: ""
-  height: 1
-  width: 8
-}
-layers {
-  name: "priorbox"
-  type: "data"
-  size: 32
-  active_type: ""
-  height: 4
-  width: 8
-}
-layers {
-  name: "label"
-  type: "data"
-  size: 24
-  active_type: ""
-  height: 4
-  width: 6
-}
-layers {
-  name: "test_multibox_loss"
-  type: "multibox_loss"
-  size: 1
-  active_type: ""
-  inputs {
-    input_layer_name: "priorbox"
-    multibox_loss_conf {
-      num_classes: 21
-      overlap_threshold: 0.5
-      neg_pos_ratio: 3.0
-      neg_overlap: 0.5
-      background_id: 0
-      input_num: 1
-    }
-  }
-  inputs {
-    input_layer_name: "label"
-  }
-  inputs {
-    input_layer_name: "input_loc"
-  }
-  inputs {
-    input_layer_name: "input_conf"
-  }
-}
-input_layer_names: "priorbox"
-input_layer_names: "label"
-input_layer_names: "input_loc"
-input_layer_names: "input_conf"
-output_layer_names: "test_multibox_loss"
-sub_models {
-  name: "root"
-  layer_names: "input_loc"
-  layer_names: "input_conf"
-  layer_names: "priorbox"
-  layer_names: "label"
-  layer_names: "test_multibox_loss"
-  input_layer_names: "priorbox"
-  input_layer_names: "label"
-  input_layer_names: "input_loc"
-  input_layer_names: "input_conf"
-  output_layer_names: "test_multibox_loss"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_multiplex_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_multiplex_layer.protostr
deleted file mode 100644
index 379842ba8d..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_multiplex_layer.protostr
+++ /dev/null
@@ -1,63 +0,0 @@
-type: "nn"
-layers {
-  name: "index"
-  type: "data"
-  size: 1
-  active_type: ""
-}
-layers {
-  name: "data1"
-  type: "data"
-  size: 30
-  active_type: ""
-}
-layers {
-  name: "data2"
-  type: "data"
-  size: 30
-  active_type: ""
-}
-layers {
-  name: "data3"
-  type: "data"
-  size: 30
-  active_type: ""
-}
-layers {
-  name: "__multiplex_layer_0__"
-  type: "multiplex"
-  size: 30
-  active_type: ""
-  inputs {
-    input_layer_name: "index"
-  }
-  inputs {
-    input_layer_name: "data1"
-  }
-  inputs {
-    input_layer_name: "data2"
-  }
-  inputs {
-    input_layer_name: "data3"
-  }
-}
-input_layer_names: "index"
-input_layer_names: "data1"
-input_layer_names: "data2"
-input_layer_names: "data3"
-output_layer_names: "__multiplex_layer_0__"
-sub_models {
-  name: "root"
-  layer_names: "index"
-  layer_names: "data1"
-  layer_names: "data2"
-  layer_names: "data3"
-  layer_names: "__multiplex_layer_0__"
-  input_layer_names: "index"
-  input_layer_names: "data1"
-  input_layer_names: "data2"
-  input_layer_names: "data3"
-  output_layer_names: "__multiplex_layer_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_ntm_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_ntm_layers.protostr
deleted file mode 100644
index c1bfdf1b19..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_ntm_layers.protostr
+++ /dev/null
@@ -1,225 +0,0 @@
-type: "nn"
-layers {
-  name: "w"
-  type: "data"
-  size: 1
-  active_type: ""
-}
-layers {
-  name: "a"
-  type: "data"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "b"
-  type: "data"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "c"
-  type: "data"
-  size: 200
-  active_type: ""
-}
-layers {
-  name: "d"
-  type: "data"
-  size: 31
-  active_type: ""
-}
-layers {
-  name: "__interpolation_layer_0__"
-  type: "interpolation"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "w"
-  }
-  inputs {
-    input_layer_name: "a"
-  }
-  inputs {
-    input_layer_name: "b"
-  }
-}
-layers {
-  name: "__power_layer_0__"
-  type: "power"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "w"
-  }
-  inputs {
-    input_layer_name: "a"
-  }
-}
-layers {
-  name: "__scaling_layer_0__"
-  type: "scaling"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "w"
-  }
-  inputs {
-    input_layer_name: "a"
-  }
-}
-layers {
-  name: "__cos_sim_0__"
-  type: "cos"
-  size: 1
-  active_type: ""
-  inputs {
-    input_layer_name: "a"
-  }
-  inputs {
-    input_layer_name: "b"
-  }
-  cos_scale: 1
-}
-layers {
-  name: "__cos_sim_1__"
-  type: "cos_vm"
-  size: 2
-  active_type: ""
-  inputs {
-    input_layer_name: "a"
-  }
-  inputs {
-    input_layer_name: "c"
-  }
-  cos_scale: 1
-}
-layers {
-  name: "__sum_to_one_norm_layer_0__"
-  type: "sum_to_one_norm"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "a"
-  }
-}
-layers {
-  name: "__conv_shift_layer_0__"
-  type: "conv_shift"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "a"
-  }
-  inputs {
-    input_layer_name: "d"
-  }
-}
-layers {
-  name: "__tensor_layer_0__"
-  type: "tensor"
-  size: 1000
-  active_type: ""
-  inputs {
-    input_layer_name: "a"
-    input_parameter_name: "___tensor_layer_0__.w0"
-  }
-  inputs {
-    input_layer_name: "b"
-  }
-  bias_parameter_name: "___tensor_layer_0__.wbias"
-}
-layers {
-  name: "__slope_intercept_layer_0__"
-  type: "slope_intercept"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "a"
-  }
-  slope: 0.7
-  intercept: 0.9
-}
-layers {
-  name: "__linear_comb_layer_0__"
-  type: "convex_comb"
-  size: 2
-  active_type: ""
-  inputs {
-    input_layer_name: "b"
-  }
-  inputs {
-    input_layer_name: "c"
-  }
-}
-parameters {
-  name: "___tensor_layer_0__.w0"
-  size: 10000000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 100
-  dims: 1000
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___tensor_layer_0__.wbias"
-  size: 1000
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 1000
-  initial_strategy: 0
-  initial_smart: false
-}
-input_layer_names: "w"
-input_layer_names: "a"
-input_layer_names: "b"
-input_layer_names: "c"
-input_layer_names: "d"
-output_layer_names: "__interpolation_layer_0__"
-output_layer_names: "__power_layer_0__"
-output_layer_names: "__scaling_layer_0__"
-output_layer_names: "__cos_sim_0__"
-output_layer_names: "__cos_sim_1__"
-output_layer_names: "__sum_to_one_norm_layer_0__"
-output_layer_names: "__conv_shift_layer_0__"
-output_layer_names: "__tensor_layer_0__"
-output_layer_names: "__slope_intercept_layer_0__"
-output_layer_names: "__linear_comb_layer_0__"
-sub_models {
-  name: "root"
-  layer_names: "w"
-  layer_names: "a"
-  layer_names: "b"
-  layer_names: "c"
-  layer_names: "d"
-  layer_names: "__interpolation_layer_0__"
-  layer_names: "__power_layer_0__"
-  layer_names: "__scaling_layer_0__"
-  layer_names: "__cos_sim_0__"
-  layer_names: "__cos_sim_1__"
-  layer_names: "__sum_to_one_norm_layer_0__"
-  layer_names: "__conv_shift_layer_0__"
-  layer_names: "__tensor_layer_0__"
-  layer_names: "__slope_intercept_layer_0__"
-  layer_names: "__linear_comb_layer_0__"
-  input_layer_names: "w"
-  input_layer_names: "a"
-  input_layer_names: "b"
-  input_layer_names: "c"
-  input_layer_names: "d"
-  output_layer_names: "__interpolation_layer_0__"
-  output_layer_names: "__power_layer_0__"
-  output_layer_names: "__scaling_layer_0__"
-  output_layer_names: "__cos_sim_0__"
-  output_layer_names: "__cos_sim_1__"
-  output_layer_names: "__sum_to_one_norm_layer_0__"
-  output_layer_names: "__conv_shift_layer_0__"
-  output_layer_names: "__tensor_layer_0__"
-  output_layer_names: "__slope_intercept_layer_0__"
-  output_layer_names: "__linear_comb_layer_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pad.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pad.protostr
deleted file mode 100644
index d5d6d31a17..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pad.protostr
+++ /dev/null
@@ -1,122 +0,0 @@
-type: "nn"
-layers {
-  name: "data"
-  type: "data"
-  size: 2016
-  active_type: ""
-  height: 48
-  width: 42
-}
-layers {
-  name: "__conv_0__"
-  type: "exconv"
-  size: 32256
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-    input_parameter_name: "___conv_0__.w0"
-    conv_conf {
-      filter_size: 3
-      channels: 1
-      stride: 1
-      padding: 1
-      groups: 1
-      filter_channels: 1
-      output_x: 42
-      img_size: 42
-      caffe_mode: true
-      filter_size_y: 3
-      padding_y: 1
-      stride_y: 1
-      output_y: 48
-      img_size_y: 48
-      dilation: 1
-      dilation_y: 1
-    }
-  }
-  bias_parameter_name: "___conv_0__.wbias"
-  num_filters: 16
-  shared_biases: true
-  height: 48
-  width: 42
-}
-layers {
-  name: "__pool_0__"
-  type: "pool"
-  size: 8064
-  active_type: ""
-  inputs {
-    input_layer_name: "__conv_0__"
-    pool_conf {
-      pool_type: "max-projection"
-      channels: 16
-      size_x: 2
-      stride: 2
-      output_x: 21
-      img_size: 42
-      padding: 0
-      size_y: 2
-      stride_y: 2
-      output_y: 24
-      img_size_y: 48
-      padding_y: 0
-    }
-  }
-  height: 24
-  width: 21
-}
-layers {
-  name: "__pad_0__"
-  type: "pad"
-  size: 14175
-  active_type: ""
-  inputs {
-    input_layer_name: "__pool_0__"
-    pad_conf {
-      image_conf {
-        channels: 16
-        img_size: 21
-        img_size_y: 24
-      }
-      pad_c: 2
-      pad_c: 3
-      pad_h: 1
-      pad_h: 2
-      pad_w: 3
-      pad_w: 1
-    }
-  }
-  height: 27
-  width: 25
-}
-parameters {
-  name: "___conv_0__.w0"
-  size: 144
-  initial_mean: 0.0
-  initial_std: 0.471404520791
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___conv_0__.wbias"
-  size: 16
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 16
-  dims: 1
-  initial_strategy: 0
-  initial_smart: false
-}
-input_layer_names: "data"
-output_layer_names: "__pad_0__"
-sub_models {
-  name: "root"
-  layer_names: "data"
-  layer_names: "__conv_0__"
-  layer_names: "__pool_0__"
-  layer_names: "__pad_0__"
-  input_layer_names: "data"
-  output_layer_names: "__pad_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pooling3D_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pooling3D_layer.protostr
deleted file mode 100644
index 8eb98593f6..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pooling3D_layer.protostr
+++ /dev/null
@@ -1,123 +0,0 @@
-type: "nn"
-layers {
-  name: "data_2d"
-  type: "data"
-  size: 6000
-  active_type: ""
-  height: 20
-  width: 10
-}
-layers {
-  name: "pool___2d"
-  type: "pool"
-  size: 840
-  active_type: ""
-  inputs {
-    input_layer_name: "data_2d"
-    pool_conf {
-      pool_type: "avg-projection"
-      channels: 30
-      size_x: 5
-      stride: 3
-      output_x: 4
-      img_size: 10
-      padding: 1
-      size_y: 5
-      stride_y: 3
-      output_y: 7
-      img_size_y: 20
-      padding_y: 1
-    }
-  }
-  height: 7
-  width: 4
-}
-layers {
-  name: "data_3d_1"
-  type: "data"
-  size: 60000
-  active_type: ""
-  height: 20
-  width: 10
-  depth: 10
-}
-layers {
-  name: "pool_3d_1"
-  type: "pool3d"
-  size: 3360
-  active_type: ""
-  inputs {
-    input_layer_name: "data_3d_1"
-    pool_conf {
-      pool_type: "avg-projection"
-      channels: 30
-      size_x: 5
-      stride: 3
-      output_x: 4
-      img_size: 10
-      padding: 1
-      size_y: 5
-      stride_y: 3
-      output_y: 7
-      img_size_y: 20
-      padding_y: 1
-      size_z: 5
-      stride_z: 3
-      output_z: 4
-      img_size_z: 10
-      padding_z: 1
-    }
-  }
-  height: 7
-  width: 4
-  depth: 4
-}
-layers {
-  name: "pool_3d_2"
-  type: "pool3d"
-  size: 3360
-  active_type: ""
-  inputs {
-    input_layer_name: "data_3d_1"
-    pool_conf {
-      pool_type: "max-projection"
-      channels: 30
-      size_x: 5
-      stride: 3
-      output_x: 4
-      img_size: 10
-      padding: 1
-      size_y: 5
-      stride_y: 3
-      output_y: 7
-      img_size_y: 20
-      padding_y: 1
-      size_z: 5
-      stride_z: 3
-      output_z: 4
-      img_size_z: 10
-      padding_z: 1
-    }
-  }
-  height: 7
-  width: 4
-  depth: 4
-}
-input_layer_names: "data_2d"
-output_layer_names: "pool___2d"
-output_layer_names: "pool_3d_1"
-output_layer_names: "pool_3d_2"
-sub_models {
-  name: "root"
-  layer_names: "data_2d"
-  layer_names: "pool___2d"
-  layer_names: "data_3d_1"
-  layer_names: "pool_3d_1"
-  layer_names: "pool_3d_2"
-  input_layer_names: "data_2d"
-  output_layer_names: "pool___2d"
-  output_layer_names: "pool_3d_1"
-  output_layer_names: "pool_3d_2"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr
deleted file mode 100644
index 63fb38c650..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr
+++ /dev/null
@@ -1,144 +0,0 @@
-type: "nn"
-layers {
-  name: "input"
-  type: "data"
-  size: 300
-  active_type: ""
-  height: 10
-  width: 10
-}
-layers {
-  name: "__prelu_layer_0__"
-  type: "prelu"
-  size: 300
-  active_type: ""
-  inputs {
-    input_layer_name: "input"
-    input_parameter_name: "___prelu_layer_0__.w0"
-  }
-  partial_sum: 1
-  height: 10
-  width: 10
-  depth: 1
-}
-layers {
-  name: "__prelu_layer_1__"
-  type: "prelu"
-  size: 300
-  active_type: ""
-  inputs {
-    input_layer_name: "input"
-    input_parameter_name: "___prelu_layer_1__.w0"
-  }
-  partial_sum: 1
-  height: 10
-  width: 10
-  depth: 1
-}
-layers {
-  name: "__prelu_layer_2__"
-  type: "prelu"
-  size: 300
-  active_type: ""
-  inputs {
-    input_layer_name: "input"
-    input_parameter_name: "___prelu_layer_2__.w0"
-  }
-  partial_sum: 5
-  height: 10
-  width: 10
-  depth: 1
-}
-layers {
-  name: "__prelu_layer_3__"
-  type: "prelu"
-  size: 300
-  active_type: ""
-  inputs {
-    input_layer_name: "input"
-    input_parameter_name: "___prelu_layer_3__.w0"
-  }
-  partial_sum: 300
-  height: 10
-  width: 10
-  depth: 1
-}
-layers {
-  name: "__prelu_layer_4__"
-  type: "prelu"
-  size: 300
-  active_type: ""
-  inputs {
-    input_layer_name: "input"
-    input_parameter_name: "___prelu_layer_4__.w0"
-  }
-  partial_sum: 100
-  height: 10
-  width: 10
-  depth: 1
-}
-parameters {
-  name: "___prelu_layer_0__.w0"
-  size: 300
-  initial_mean: 0.25
-  initial_std: 0.0
-  dims: 1
-  dims: 300
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___prelu_layer_1__.w0"
-  size: 300
-  initial_mean: 0.25
-  initial_std: 0.0
-  dims: 1
-  dims: 300
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___prelu_layer_2__.w0"
-  size: 60
-  initial_mean: 0.25
-  initial_std: 0.0
-  dims: 1
-  dims: 60
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___prelu_layer_3__.w0"
-  size: 1
-  initial_mean: 0.25
-  initial_std: 0.0
-  dims: 1
-  dims: 1
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___prelu_layer_4__.w0"
-  size: 3
-  initial_mean: 0.25
-  initial_std: 0.0
-  dims: 1
-  dims: 3
-  initial_strategy: 0
-  initial_smart: false
-}
-input_layer_names: "input"
-output_layer_names: "__prelu_layer_4__"
-sub_models {
-  name: "root"
-  layer_names: "input"
-  layer_names: "__prelu_layer_0__"
-  layer_names: "__prelu_layer_1__"
-  layer_names: "__prelu_layer_2__"
-  layer_names: "__prelu_layer_3__"
-  layer_names: "__prelu_layer_4__"
-  input_layer_names: "input"
-  output_layer_names: "__prelu_layer_4__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_print_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_print_layer.protostr
deleted file mode 100644
index f4cc492dfb..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_print_layer.protostr
+++ /dev/null
@@ -1,27 +0,0 @@
-type: "nn"
-layers {
-  name: "input"
-  type: "data"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "__print_0__"
-  type: "print"
-  active_type: ""
-  inputs {
-    input_layer_name: "input"
-  }
-  user_arg: "layer=input %s"
-}
-input_layer_names: "input"
-output_layer_names: "input"
-sub_models {
-  name: "root"
-  layer_names: "input"
-  layer_names: "__print_0__"
-  input_layer_names: "input"
-  output_layer_names: "input"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_recursive_topology.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_recursive_topology.protostr
deleted file mode 100644
index 046037936a..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_recursive_topology.protostr
+++ /dev/null
@@ -1,593 +0,0 @@
-type: "nn"
-layers {
-  name: "data"
-  type: "data"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "__addto_0__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-  }
-  inputs {
-    input_layer_name: "data"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_1__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_0__"
-  }
-  inputs {
-    input_layer_name: "__addto_0__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_2__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_1__"
-  }
-  inputs {
-    input_layer_name: "__addto_1__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_3__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_2__"
-  }
-  inputs {
-    input_layer_name: "__addto_2__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_4__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_3__"
-  }
-  inputs {
-    input_layer_name: "__addto_3__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_5__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_4__"
-  }
-  inputs {
-    input_layer_name: "__addto_4__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_6__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_5__"
-  }
-  inputs {
-    input_layer_name: "__addto_5__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_7__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_6__"
-  }
-  inputs {
-    input_layer_name: "__addto_6__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_8__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_7__"
-  }
-  inputs {
-    input_layer_name: "__addto_7__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_9__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_8__"
-  }
-  inputs {
-    input_layer_name: "__addto_8__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_10__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_9__"
-  }
-  inputs {
-    input_layer_name: "__addto_9__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_11__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_10__"
-  }
-  inputs {
-    input_layer_name: "__addto_10__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_12__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_11__"
-  }
-  inputs {
-    input_layer_name: "__addto_11__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_13__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_12__"
-  }
-  inputs {
-    input_layer_name: "__addto_12__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_14__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_13__"
-  }
-  inputs {
-    input_layer_name: "__addto_13__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_15__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_14__"
-  }
-  inputs {
-    input_layer_name: "__addto_14__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_16__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_15__"
-  }
-  inputs {
-    input_layer_name: "__addto_15__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_17__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_16__"
-  }
-  inputs {
-    input_layer_name: "__addto_16__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_18__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_17__"
-  }
-  inputs {
-    input_layer_name: "__addto_17__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_19__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_18__"
-  }
-  inputs {
-    input_layer_name: "__addto_18__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_20__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_19__"
-  }
-  inputs {
-    input_layer_name: "__addto_19__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_21__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_20__"
-  }
-  inputs {
-    input_layer_name: "__addto_20__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_22__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_21__"
-  }
-  inputs {
-    input_layer_name: "__addto_21__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_23__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_22__"
-  }
-  inputs {
-    input_layer_name: "__addto_22__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_24__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_23__"
-  }
-  inputs {
-    input_layer_name: "__addto_23__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_25__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_24__"
-  }
-  inputs {
-    input_layer_name: "__addto_24__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_26__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_25__"
-  }
-  inputs {
-    input_layer_name: "__addto_25__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_27__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_26__"
-  }
-  inputs {
-    input_layer_name: "__addto_26__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_28__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_27__"
-  }
-  inputs {
-    input_layer_name: "__addto_27__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_29__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_28__"
-  }
-  inputs {
-    input_layer_name: "__addto_28__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_30__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_29__"
-  }
-  inputs {
-    input_layer_name: "__addto_29__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__addto_31__"
-  type: "addto"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__addto_30__"
-  }
-  inputs {
-    input_layer_name: "__addto_30__"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__fc_layer_0__"
-  type: "fc"
-  size: 32
-  active_type: "relu"
-  inputs {
-    input_layer_name: "__addto_31__"
-    input_parameter_name: "___fc_layer_0__.w0"
-  }
-  bias_parameter_name: "___fc_layer_0__.wbias"
-}
-layers {
-  name: "__fc_layer_1__"
-  type: "fc"
-  size: 10
-  active_type: "softmax"
-  inputs {
-    input_layer_name: "__fc_layer_0__"
-    input_parameter_name: "___fc_layer_1__.w0"
-  }
-  bias_parameter_name: "___fc_layer_1__.wbias"
-}
-parameters {
-  name: "___fc_layer_0__.w0"
-  size: 3200
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 32
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___fc_layer_0__.wbias"
-  size: 32
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 32
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___fc_layer_1__.w0"
-  size: 320
-  initial_mean: 0.0
-  initial_std: 0.176776695297
-  dims: 32
-  dims: 10
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___fc_layer_1__.wbias"
-  size: 10
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 10
-  initial_strategy: 0
-  initial_smart: false
-}
-input_layer_names: "data"
-output_layer_names: "__fc_layer_1__"
-sub_models {
-  name: "root"
-  layer_names: "data"
-  layer_names: "__addto_0__"
-  layer_names: "__addto_1__"
-  layer_names: "__addto_2__"
-  layer_names: "__addto_3__"
-  layer_names: "__addto_4__"
-  layer_names: "__addto_5__"
-  layer_names: "__addto_6__"
-  layer_names: "__addto_7__"
-  layer_names: "__addto_8__"
-  layer_names: "__addto_9__"
-  layer_names: "__addto_10__"
-  layer_names: "__addto_11__"
-  layer_names: "__addto_12__"
-  layer_names: "__addto_13__"
-  layer_names: "__addto_14__"
-  layer_names: "__addto_15__"
-  layer_names: "__addto_16__"
-  layer_names: "__addto_17__"
-  layer_names: "__addto_18__"
-  layer_names: "__addto_19__"
-  layer_names: "__addto_20__"
-  layer_names: "__addto_21__"
-  layer_names: "__addto_22__"
-  layer_names: "__addto_23__"
-  layer_names: "__addto_24__"
-  layer_names: "__addto_25__"
-  layer_names: "__addto_26__"
-  layer_names: "__addto_27__"
-  layer_names: "__addto_28__"
-  layer_names: "__addto_29__"
-  layer_names: "__addto_30__"
-  layer_names: "__addto_31__"
-  layer_names: "__fc_layer_0__"
-  layer_names: "__fc_layer_1__"
-  input_layer_names: "data"
-  output_layer_names: "__fc_layer_1__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_repeat_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_repeat_layer.protostr
deleted file mode 100644
index e012386ff9..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_repeat_layer.protostr
+++ /dev/null
@@ -1,42 +0,0 @@
-type: "nn"
-layers {
-  name: "data"
-  type: "data"
-  size: 30
-  active_type: ""
-}
-layers {
-  name: "__repeat_layer_0__"
-  type: "featmap_expand"
-  size: 300
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-  }
-  num_filters: 10
-}
-layers {
-  name: "__repeat_layer_1__"
-  type: "featmap_expand"
-  size: 300
-  active_type: "tanh"
-  inputs {
-    input_layer_name: "data"
-  }
-  num_filters: 10
-  user_arg: "as_col_vec"
-}
-input_layer_names: "data"
-output_layer_names: "__repeat_layer_0__"
-output_layer_names: "__repeat_layer_1__"
-sub_models {
-  name: "root"
-  layer_names: "data"
-  layer_names: "__repeat_layer_0__"
-  layer_names: "__repeat_layer_1__"
-  input_layer_names: "data"
-  output_layer_names: "__repeat_layer_0__"
-  output_layer_names: "__repeat_layer_1__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_resize_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_resize_layer.protostr
deleted file mode 100644
index 9399252b23..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_resize_layer.protostr
+++ /dev/null
@@ -1,27 +0,0 @@
-type: "nn"
-layers {
-  name: "input"
-  type: "data"
-  size: 300
-  active_type: ""
-}
-layers {
-  name: "__resize_0__"
-  type: "resize"
-  size: 150
-  active_type: ""
-  inputs {
-    input_layer_name: "input"
-  }
-}
-input_layer_names: "input"
-output_layer_names: "__resize_0__"
-sub_models {
-  name: "root"
-  layer_names: "input"
-  layer_names: "__resize_0__"
-  input_layer_names: "input"
-  output_layer_names: "__resize_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
deleted file mode 100644
index 711785be37..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
+++ /dev/null
@@ -1,738 +0,0 @@
-type: "recurrent_nn"
-layers {
-  name: "seq_input"
-  type: "data"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "sub_seq_input"
-  type: "data"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "label"
-  type: "data"
-  size: 1
-  active_type: ""
-}
-layers {
-  name: "__mixed_0__"
-  type: "mixed"
-  size: 400
-  active_type: ""
-  inputs {
-    input_layer_name: "seq_input"
-    input_parameter_name: "___mixed_0__.w0"
-    proj_conf {
-      type: "fc"
-      name: "___mixed_0__.w0"
-      input_size: 100
-      output_size: 400
-    }
-  }
-}
-layers {
-  name: "__mixed_1__"
-  type: "mixed"
-  size: 300
-  active_type: ""
-  inputs {
-    input_layer_name: "seq_input"
-    input_parameter_name: "___mixed_1__.w0"
-    proj_conf {
-      type: "fc"
-      name: "___mixed_1__.w0"
-      input_size: 100
-      output_size: 300
-    }
-  }
-}
-layers {
-  name: "__recurrent_group_0__"
-  type: "recurrent_layer_group"
-  active_type: ""
-}
-layers {
-  name: "seq_input@__recurrent_group_0__"
-  type: "scatter_agent"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "rnn_forward+delay1@__recurrent_group_0__"
-  type: "agent"
-  size: 200
-  active_type: ""
-}
-layers {
-  name: "rnn_forward@__recurrent_group_0__"
-  type: "fc"
-  size: 200
-  active_type: "tanh"
-  inputs {
-    input_layer_name: "seq_input@__recurrent_group_0__"
-    input_parameter_name: "_rnn_forward@__recurrent_group_0__.w0"
-  }
-  inputs {
-    input_layer_name: "rnn_forward+delay1@__recurrent_group_0__"
-    input_parameter_name: "_rnn_forward@__recurrent_group_0__.w1"
-  }
-  bias_parameter_name: "_rnn_forward@__recurrent_group_0__.wbias"
-}
-layers {
-  name: "rnn_forward"
-  type: "gather_agent"
-  size: 200
-  active_type: ""
-}
-layers {
-  name: "__last_seq_0__"
-  type: "seqlastins"
-  size: 200
-  active_type: ""
-  inputs {
-    input_layer_name: "rnn_forward"
-  }
-  trans_type: "non-seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__recurrent_group_1__"
-  type: "recurrent_layer_group"
-  active_type: ""
-}
-layers {
-  name: "seq_input@__recurrent_group_1__"
-  type: "scatter_agent"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "rnn_back+delay1@__recurrent_group_1__"
-  type: "agent"
-  size: 200
-  active_type: ""
-}
-layers {
-  name: "rnn_back@__recurrent_group_1__"
-  type: "fc"
-  size: 200
-  active_type: "tanh"
-  inputs {
-    input_layer_name: "seq_input@__recurrent_group_1__"
-    input_parameter_name: "_rnn_back@__recurrent_group_1__.w0"
-  }
-  inputs {
-    input_layer_name: "rnn_back+delay1@__recurrent_group_1__"
-    input_parameter_name: "_rnn_back@__recurrent_group_1__.w1"
-  }
-  bias_parameter_name: "_rnn_back@__recurrent_group_1__.wbias"
-}
-layers {
-  name: "rnn_back"
-  type: "gather_agent"
-  size: 200
-  active_type: ""
-}
-layers {
-  name: "__first_seq_0__"
-  type: "seqlastins"
-  size: 200
-  active_type: ""
-  inputs {
-    input_layer_name: "rnn_back"
-  }
-  select_first: true
-  trans_type: "non-seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__recurrent_group_2__"
-  type: "recurrent_layer_group"
-  active_type: ""
-}
-layers {
-  name: "sub_seq_input@__recurrent_group_2__"
-  type: "scatter_agent"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "rnn_subseq_forward+delay1@__recurrent_group_2__"
-  type: "agent"
-  size: 200
-  active_type: ""
-}
-layers {
-  name: "rnn_subseq_forward@__recurrent_group_2__"
-  type: "fc"
-  size: 200
-  active_type: "tanh"
-  inputs {
-    input_layer_name: "sub_seq_input@__recurrent_group_2__"
-    input_parameter_name: "_rnn_subseq_forward@__recurrent_group_2__.w0"
-  }
-  inputs {
-    input_layer_name: "rnn_subseq_forward+delay1@__recurrent_group_2__"
-    input_parameter_name: "_rnn_subseq_forward@__recurrent_group_2__.w1"
-  }
-  bias_parameter_name: "_rnn_subseq_forward@__recurrent_group_2__.wbias"
-}
-layers {
-  name: "rnn_subseq_forward"
-  type: "gather_agent"
-  size: 200
-  active_type: ""
-}
-layers {
-  name: "__last_seq_1__"
-  type: "seqlastins"
-  size: 200
-  active_type: ""
-  inputs {
-    input_layer_name: "rnn_subseq_forward"
-  }
-  trans_type: "non-seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__lstm_group_0___recurrent_group"
-  type: "recurrent_layer_group"
-  active_type: ""
-}
-layers {
-  name: "__mixed_0__@__lstm_group_0___recurrent_group"
-  type: "scatter_agent"
-  size: 400
-  active_type: ""
-}
-layers {
-  name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
-  type: "agent"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
-  type: "agent"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "__lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group"
-  type: "mixed"
-  size: 400
-  active_type: ""
-  inputs {
-    input_layer_name: "__mixed_0__@__lstm_group_0___recurrent_group"
-    proj_conf {
-      type: "identity"
-      name: "___lstm_group_0___input_recurrent.w0"
-      input_size: 400
-      output_size: 400
-    }
-  }
-  inputs {
-    input_layer_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
-    input_parameter_name: "___lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group.w1"
-    proj_conf {
-      type: "fc"
-      name: "___lstm_group_0___input_recurrent.w1"
-      input_size: 100
-      output_size: 400
-    }
-  }
-}
-layers {
-  name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
-  type: "lstm_step"
-  size: 100
-  active_type: "tanh"
-  inputs {
-    input_layer_name: "__lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group"
-  }
-  inputs {
-    input_layer_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
-  }
-  bias_parameter_name: "___lstm_group_0__@__lstm_group_0___recurrent_group.wbias"
-  active_gate_type: "sigmoid"
-  active_state_type: "tanh"
-}
-layers {
-  name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
-  type: "get_output"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
-    input_layer_argument: "state"
-  }
-}
-layers {
-  name: "__lstm_group_0__"
-  type: "gather_agent"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "__last_seq_2__"
-  type: "seqlastins"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__lstm_group_0__"
-  }
-  trans_type: "non-seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__gru_group_0___recurrent_group"
-  type: "recurrent_layer_group"
-  active_type: ""
-}
-layers {
-  name: "__mixed_1__@__gru_group_0___recurrent_group"
-  type: "scatter_agent"
-  size: 300
-  active_type: ""
-}
-layers {
-  name: "__gru_group_0__+delay1@__gru_group_0___recurrent_group"
-  type: "agent"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "__gru_group_0__@__gru_group_0___recurrent_group"
-  type: "gru_step"
-  size: 100
-  active_type: "tanh"
-  inputs {
-    input_layer_name: "__mixed_1__@__gru_group_0___recurrent_group"
-    input_parameter_name: "___gru_group_0__@__gru_group_0___recurrent_group.w0"
-  }
-  inputs {
-    input_layer_name: "__gru_group_0__+delay1@__gru_group_0___recurrent_group"
-  }
-  bias_parameter_name: "___gru_group_0__@__gru_group_0___recurrent_group.wbias"
-  active_gate_type: "sigmoid"
-}
-layers {
-  name: "__gru_group_0__"
-  type: "gather_agent"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "__last_seq_3__"
-  type: "seqlastins"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "__gru_group_0__"
-  }
-  trans_type: "non-seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__recurrent_group_3__"
-  type: "recurrent_layer_group"
-  active_type: ""
-}
-layers {
-  name: "seq_input@__recurrent_group_3__"
-  type: "scatter_agent"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "__memory_6__@__recurrent_group_3__"
-  type: "agent"
-  size: 200
-  active_type: ""
-}
-layers {
-  name: "__fc_layer_0__@__recurrent_group_3__"
-  type: "fc"
-  size: 200
-  active_type: "tanh"
-  inputs {
-    input_layer_name: "seq_input@__recurrent_group_3__"
-    input_parameter_name: "___fc_layer_0__@__recurrent_group_3__.w0"
-  }
-  inputs {
-    input_layer_name: "__memory_6__@__recurrent_group_3__"
-    input_parameter_name: "___fc_layer_0__@__recurrent_group_3__.w1"
-  }
-  bias_parameter_name: "___fc_layer_0__@__recurrent_group_3__.wbias"
-}
-layers {
-  name: "__fc_layer_0__"
-  type: "gather_agent"
-  size: 200
-  active_type: ""
-}
-layers {
-  name: "__last_seq_4__"
-  type: "seqlastins"
-  size: 200
-  active_type: ""
-  inputs {
-    input_layer_name: "__fc_layer_0__"
-  }
-  trans_type: "non-seq"
-  seq_pool_stride: -1
-}
-parameters {
-  name: "___mixed_0__.w0"
-  size: 40000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 400
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___mixed_1__.w0"
-  size: 30000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 300
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "_rnn_forward@__recurrent_group_0__.w0"
-  size: 20000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 200
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "_rnn_forward@__recurrent_group_0__.w1"
-  size: 40000
-  initial_mean: 0.0
-  initial_std: 0.0707106781187
-  dims: 200
-  dims: 200
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "_rnn_forward@__recurrent_group_0__.wbias"
-  size: 200
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 200
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "_rnn_back@__recurrent_group_1__.w0"
-  size: 20000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 200
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "_rnn_back@__recurrent_group_1__.w1"
-  size: 40000
-  initial_mean: 0.0
-  initial_std: 0.0707106781187
-  dims: 200
-  dims: 200
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "_rnn_back@__recurrent_group_1__.wbias"
-  size: 200
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 200
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "_rnn_subseq_forward@__recurrent_group_2__.w0"
-  size: 20000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 200
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "_rnn_subseq_forward@__recurrent_group_2__.w1"
-  size: 40000
-  initial_mean: 0.0
-  initial_std: 0.0707106781187
-  dims: 200
-  dims: 200
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "_rnn_subseq_forward@__recurrent_group_2__.wbias"
-  size: 200
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 200
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group.w1"
-  size: 40000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 400
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___lstm_group_0__@__lstm_group_0___recurrent_group.wbias"
-  size: 300
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 300
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___gru_group_0__@__gru_group_0___recurrent_group.w0"
-  size: 30000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 300
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___gru_group_0__@__gru_group_0___recurrent_group.wbias"
-  size: 300
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 300
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___fc_layer_0__@__recurrent_group_3__.w0"
-  size: 20000
-  initial_mean: 0.0
-  initial_std: 0.1
-  dims: 100
-  dims: 200
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___fc_layer_0__@__recurrent_group_3__.w1"
-  size: 40000
-  initial_mean: 0.0
-  initial_std: 0.0707106781187
-  dims: 200
-  dims: 200
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___fc_layer_0__@__recurrent_group_3__.wbias"
-  size: 200
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 200
-  initial_strategy: 0
-  initial_smart: false
-}
-input_layer_names: "seq_input"
-input_layer_names: "sub_seq_input"
-output_layer_names: "__last_seq_0__"
-output_layer_names: "__first_seq_0__"
-output_layer_names: "__last_seq_1__"
-output_layer_names: "__last_seq_2__"
-output_layer_names: "__last_seq_3__"
-output_layer_names: "__last_seq_4__"
-sub_models {
-  name: "root"
-  layer_names: "seq_input"
-  layer_names: "sub_seq_input"
-  layer_names: "label"
-  layer_names: "__mixed_0__"
-  layer_names: "__mixed_1__"
-  layer_names: "__recurrent_group_0__"
-  layer_names: "rnn_forward"
-  layer_names: "__last_seq_0__"
-  layer_names: "__recurrent_group_1__"
-  layer_names: "rnn_back"
-  layer_names: "__first_seq_0__"
-  layer_names: "__recurrent_group_2__"
-  layer_names: "rnn_subseq_forward"
-  layer_names: "__last_seq_1__"
-  layer_names: "__lstm_group_0___recurrent_group"
-  layer_names: "__lstm_group_0__"
-  layer_names: "__last_seq_2__"
-  layer_names: "__gru_group_0___recurrent_group"
-  layer_names: "__gru_group_0__"
-  layer_names: "__last_seq_3__"
-  layer_names: "__recurrent_group_3__"
-  layer_names: "__fc_layer_0__"
-  layer_names: "__last_seq_4__"
-  input_layer_names: "seq_input"
-  input_layer_names: "sub_seq_input"
-  output_layer_names: "__last_seq_0__"
-  output_layer_names: "__first_seq_0__"
-  output_layer_names: "__last_seq_1__"
-  output_layer_names: "__last_seq_2__"
-  output_layer_names: "__last_seq_3__"
-  output_layer_names: "__last_seq_4__"
-  is_recurrent_layer_group: false
-}
-sub_models {
-  name: "__recurrent_group_0__"
-  layer_names: "seq_input@__recurrent_group_0__"
-  layer_names: "rnn_forward+delay1@__recurrent_group_0__"
-  layer_names: "rnn_forward@__recurrent_group_0__"
-  is_recurrent_layer_group: true
-  reversed: false
-  memories {
-    layer_name: "rnn_forward@__recurrent_group_0__"
-    link_name: "rnn_forward+delay1@__recurrent_group_0__"
-  }
-  in_links {
-    layer_name: "seq_input"
-    link_name: "seq_input@__recurrent_group_0__"
-  }
-  out_links {
-    layer_name: "rnn_forward@__recurrent_group_0__"
-    link_name: "rnn_forward"
-  }
-}
-sub_models {
-  name: "__recurrent_group_1__"
-  layer_names: "seq_input@__recurrent_group_1__"
-  layer_names: "rnn_back+delay1@__recurrent_group_1__"
-  layer_names: "rnn_back@__recurrent_group_1__"
-  is_recurrent_layer_group: true
-  reversed: true
-  memories {
-    layer_name: "rnn_back@__recurrent_group_1__"
-    link_name: "rnn_back+delay1@__recurrent_group_1__"
-  }
-  in_links {
-    layer_name: "seq_input"
-    link_name: "seq_input@__recurrent_group_1__"
-  }
-  out_links {
-    layer_name: "rnn_back@__recurrent_group_1__"
-    link_name: "rnn_back"
-  }
-}
-sub_models {
-  name: "__recurrent_group_2__"
-  layer_names: "sub_seq_input@__recurrent_group_2__"
-  layer_names: "rnn_subseq_forward+delay1@__recurrent_group_2__"
-  layer_names: "rnn_subseq_forward@__recurrent_group_2__"
-  is_recurrent_layer_group: true
-  reversed: false
-  memories {
-    layer_name: "rnn_subseq_forward@__recurrent_group_2__"
-    link_name: "rnn_subseq_forward+delay1@__recurrent_group_2__"
-  }
-  in_links {
-    layer_name: "sub_seq_input"
-    link_name: "sub_seq_input@__recurrent_group_2__"
-  }
-  out_links {
-    layer_name: "rnn_subseq_forward@__recurrent_group_2__"
-    link_name: "rnn_subseq_forward"
-  }
-}
-sub_models {
-  name: "__lstm_group_0___recurrent_group"
-  layer_names: "__mixed_0__@__lstm_group_0___recurrent_group"
-  layer_names: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
-  layer_names: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
-  layer_names: "__lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group"
-  layer_names: "__lstm_group_0__@__lstm_group_0___recurrent_group"
-  layer_names: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
-  is_recurrent_layer_group: true
-  reversed: false
-  memories {
-    layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
-    link_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
-  }
-  memories {
-    layer_name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
-    link_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
-  }
-  in_links {
-    layer_name: "__mixed_0__"
-    link_name: "__mixed_0__@__lstm_group_0___recurrent_group"
-  }
-  out_links {
-    layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
-    link_name: "__lstm_group_0__"
-  }
-}
-sub_models {
-  name: "__gru_group_0___recurrent_group"
-  layer_names: "__mixed_1__@__gru_group_0___recurrent_group"
-  layer_names: "__gru_group_0__+delay1@__gru_group_0___recurrent_group"
-  layer_names: "__gru_group_0__@__gru_group_0___recurrent_group"
-  is_recurrent_layer_group: true
-  reversed: false
-  memories {
-    layer_name: "__gru_group_0__@__gru_group_0___recurrent_group"
-    link_name: "__gru_group_0__+delay1@__gru_group_0___recurrent_group"
-  }
-  in_links {
-    layer_name: "__mixed_1__"
-    link_name: "__mixed_1__@__gru_group_0___recurrent_group"
-  }
-  out_links {
-    layer_name: "__gru_group_0__@__gru_group_0___recurrent_group"
-    link_name: "__gru_group_0__"
-  }
-}
-sub_models {
-  name: "__recurrent_group_3__"
-  layer_names: "seq_input@__recurrent_group_3__"
-  layer_names: "__memory_6__@__recurrent_group_3__"
-  layer_names: "__fc_layer_0__@__recurrent_group_3__"
-  is_recurrent_layer_group: true
-  reversed: false
-  memories {
-    layer_name: "__fc_layer_0__@__recurrent_group_3__"
-    link_name: "__memory_6__@__recurrent_group_3__"
-  }
-  in_links {
-    layer_name: "seq_input"
-    link_name: "seq_input@__recurrent_group_3__"
-  }
-  out_links {
-    layer_name: "__fc_layer_0__@__recurrent_group_3__"
-    link_name: "__fc_layer_0__"
-  }
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr
deleted file mode 100644
index 0ec88aa998..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr
+++ /dev/null
@@ -1,100 +0,0 @@
-type: "nn"
-layers {
-  name: "data"
-  type: "data"
-  size: 588
-  active_type: ""
-  height: 14
-  width: 14
-}
-layers {
-  name: "rois"
-  type: "data"
-  size: 10
-  active_type: ""
-}
-layers {
-  name: "__conv_0__"
-  type: "exconv"
-  size: 3136
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-    input_parameter_name: "___conv_0__.w0"
-    conv_conf {
-      filter_size: 3
-      channels: 3
-      stride: 1
-      padding: 1
-      groups: 1
-      filter_channels: 3
-      output_x: 14
-      img_size: 14
-      caffe_mode: true
-      filter_size_y: 3
-      padding_y: 1
-      stride_y: 1
-      output_y: 14
-      img_size_y: 14
-      dilation: 1
-      dilation_y: 1
-    }
-  }
-  bias_parameter_name: "___conv_0__.wbias"
-  num_filters: 16
-  shared_biases: true
-  height: 14
-  width: 14
-}
-layers {
-  name: "__roi_pool_0__"
-  type: "roi_pool"
-  size: 784
-  active_type: ""
-  inputs {
-    input_layer_name: "__conv_0__"
-    roi_pool_conf {
-      pooled_width: 7
-      pooled_height: 7
-      spatial_scale: 0.0625
-    }
-  }
-  inputs {
-    input_layer_name: "rois"
-  }
-  height: 7
-  width: 7
-}
-parameters {
-  name: "___conv_0__.w0"
-  size: 432
-  initial_mean: 0.0
-  initial_std: 0.272165526976
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___conv_0__.wbias"
-  size: 16
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 16
-  dims: 1
-  initial_strategy: 0
-  initial_smart: false
-}
-input_layer_names: "data"
-input_layer_names: "rois"
-output_layer_names: "__roi_pool_0__"
-sub_models {
-  name: "root"
-  layer_names: "data"
-  layer_names: "rois"
-  layer_names: "__conv_0__"
-  layer_names: "__roi_pool_0__"
-  input_layer_names: "data"
-  input_layer_names: "rois"
-  output_layer_names: "__roi_pool_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_conv.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_conv.protostr
deleted file mode 100644
index 19c9f16574..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_conv.protostr
+++ /dev/null
@@ -1,41 +0,0 @@
-type: "nn"
-layers {
-  name: "data"
-  type: "data"
-  size: 2560
-  active_type: ""
-}
-layers {
-  name: "__row_conv_layer_0__"
-  type: "row_conv"
-  size: 2560
-  active_type: "relu"
-  inputs {
-    input_layer_name: "data"
-    input_parameter_name: "___row_conv_layer_0__.w0"
-    row_conv_conf {
-      context_length: 19
-    }
-  }
-}
-parameters {
-  name: "___row_conv_layer_0__.w0"
-  size: 48640
-  initial_mean: 0.0
-  initial_std: 0.229415733871
-  dims: 19
-  dims: 2560
-  initial_strategy: 0
-  initial_smart: true
-}
-input_layer_names: "data"
-output_layer_names: "__row_conv_layer_0__"
-sub_models {
-  name: "root"
-  layer_names: "data"
-  layer_names: "__row_conv_layer_0__"
-  input_layer_names: "data"
-  output_layer_names: "__row_conv_layer_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_l2_norm_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_l2_norm_layer.protostr
deleted file mode 100644
index c2786ff55c..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_l2_norm_layer.protostr
+++ /dev/null
@@ -1,27 +0,0 @@
-type: "nn"
-layers {
-  name: "input"
-  type: "data"
-  size: 300
-  active_type: ""
-}
-layers {
-  name: "__row_l2_norm_layer_0__"
-  type: "row_l2_norm"
-  size: 300
-  active_type: ""
-  inputs {
-    input_layer_name: "input"
-  }
-}
-input_layer_names: "input"
-output_layer_names: "__row_l2_norm_layer_0__"
-sub_models {
-  name: "root"
-  layer_names: "input"
-  layer_names: "__row_l2_norm_layer_0__"
-  input_layer_names: "input"
-  output_layer_names: "__row_l2_norm_layer_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_scale_shift_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_scale_shift_layer.protostr
deleted file mode 100644
index 35ade126a2..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_scale_shift_layer.protostr
+++ /dev/null
@@ -1,72 +0,0 @@
-type: "nn"
-layers {
-  name: "data"
-  type: "data"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "__scale_shift_0__"
-  type: "scale_shift"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-    input_parameter_name: "___scale_shift_0__.w0"
-  }
-}
-layers {
-  name: "__scale_shift_1__"
-  type: "scale_shift"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-    input_parameter_name: "___scale_shift_1__.w0"
-  }
-  bias_parameter_name: "___scale_shift_1__.wbias"
-}
-parameters {
-  name: "___scale_shift_0__.w0"
-  size: 1
-  initial_mean: 0.0
-  initial_std: 1.0
-  dims: 1
-  dims: 1
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___scale_shift_1__.w0"
-  size: 1
-  initial_mean: 0.0
-  initial_std: 1.0
-  dims: 1
-  dims: 1
-  initial_strategy: 0
-  initial_smart: true
-}
-parameters {
-  name: "___scale_shift_1__.wbias"
-  size: 1
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 1
-  initial_strategy: 0
-  initial_smart: false
-}
-input_layer_names: "data"
-output_layer_names: "__scale_shift_0__"
-output_layer_names: "__scale_shift_1__"
-sub_models {
-  name: "root"
-  layer_names: "data"
-  layer_names: "__scale_shift_0__"
-  layer_names: "__scale_shift_1__"
-  input_layer_names: "data"
-  output_layer_names: "__scale_shift_0__"
-  output_layer_names: "__scale_shift_1__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_scale_sub_region_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_scale_sub_region_layer.protostr
deleted file mode 100644
index d20133a10e..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_scale_sub_region_layer.protostr
+++ /dev/null
@@ -1,51 +0,0 @@
-type: "nn"
-layers {
-  name: "data"
-  type: "data"
-  size: 2016
-  active_type: ""
-  height: 48
-  width: 42
-}
-layers {
-  name: "indices"
-  type: "data"
-  size: 6
-  active_type: ""
-}
-layers {
-  name: "__scale_sub_region_0__"
-  type: "scale_sub_region"
-  size: 2016
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-    scale_sub_region_conf {
-      image_conf {
-        channels: 1
-        img_size: 42
-        img_size_y: 48
-      }
-      value: 0.0
-    }
-  }
-  inputs {
-    input_layer_name: "indices"
-  }
-  height: 48
-  width: 42
-}
-input_layer_names: "data"
-input_layer_names: "indices"
-output_layer_names: "__scale_sub_region_0__"
-sub_models {
-  name: "root"
-  layer_names: "data"
-  layer_names: "indices"
-  layer_names: "__scale_sub_region_0__"
-  input_layer_names: "data"
-  input_layer_names: "indices"
-  output_layer_names: "__scale_sub_region_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_concat_reshape.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_concat_reshape.protostr
deleted file mode 100644
index 9d1b41c9d5..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_concat_reshape.protostr
+++ /dev/null
@@ -1,51 +0,0 @@
-type: "nn"
-layers {
-  name: "data1"
-  type: "data"
-  size: 30
-  active_type: ""
-}
-layers {
-  name: "data2"
-  type: "data"
-  size: 30
-  active_type: ""
-}
-layers {
-  name: "__seqconcat_0__"
-  type: "seqconcat"
-  size: 30
-  active_type: ""
-  inputs {
-    input_layer_name: "data1"
-  }
-  inputs {
-    input_layer_name: "data2"
-  }
-}
-layers {
-  name: "__seqreshape_0__"
-  type: "seqreshape"
-  size: 5
-  active_type: ""
-  inputs {
-    input_layer_name: "data1"
-  }
-}
-input_layer_names: "data1"
-input_layer_names: "data2"
-output_layer_names: "__seqconcat_0__"
-output_layer_names: "__seqreshape_0__"
-sub_models {
-  name: "root"
-  layer_names: "data1"
-  layer_names: "data2"
-  layer_names: "__seqconcat_0__"
-  layer_names: "__seqreshape_0__"
-  input_layer_names: "data1"
-  input_layer_names: "data2"
-  output_layer_names: "__seqconcat_0__"
-  output_layer_names: "__seqreshape_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr
deleted file mode 100644
index 5b73d614fe..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr
+++ /dev/null
@@ -1,79 +0,0 @@
-type: "nn"
-layers {
-  name: "word"
-  type: "data"
-  size: 128
-  active_type: ""
-}
-layers {
-  name: "starts"
-  type: "data"
-  size: 5
-  active_type: ""
-}
-layers {
-  name: "ends"
-  type: "data"
-  size: 5
-  active_type: ""
-}
-layers {
-  name: "__seq_slice_layer_0__"
-  type: "seq_slice"
-  size: 128
-  active_type: ""
-  inputs {
-    input_layer_name: "word"
-  }
-  inputs {
-    input_layer_name: "starts"
-  }
-  inputs {
-    input_layer_name: "ends"
-  }
-}
-layers {
-  name: "__seq_slice_layer_1__"
-  type: "seq_slice"
-  size: 128
-  active_type: ""
-  inputs {
-    input_layer_name: "word"
-  }
-  inputs {
-    input_layer_name: "starts"
-  }
-  select_first: true
-}
-layers {
-  name: "__seq_slice_layer_2__"
-  type: "seq_slice"
-  size: 128
-  active_type: ""
-  inputs {
-    input_layer_name: "word"
-  }
-  inputs {
-    input_layer_name: "ends"
-  }
-  select_first: false
-}
-input_layer_names: "word"
-output_layer_names: "__seq_slice_layer_0__"
-output_layer_names: "__seq_slice_layer_1__"
-output_layer_names: "__seq_slice_layer_2__"
-sub_models {
-  name: "root"
-  layer_names: "word"
-  layer_names: "starts"
-  layer_names: "ends"
-  layer_names: "__seq_slice_layer_0__"
-  layer_names: "__seq_slice_layer_1__"
-  layer_names: "__seq_slice_layer_2__"
-  input_layer_names: "word"
-  output_layer_names: "__seq_slice_layer_0__"
-  output_layer_names: "__seq_slice_layer_1__"
-  output_layer_names: "__seq_slice_layer_2__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr
deleted file mode 100644
index 8989561df0..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr
+++ /dev/null
@@ -1,162 +0,0 @@
-type: "nn"
-layers {
-  name: "dat_in"
-  type: "data"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "__seq_pooling_0__"
-  type: "max"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "dat_in"
-  }
-  trans_type: "seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__seq_pooling_1__"
-  type: "max"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "dat_in"
-  }
-  trans_type: "non-seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__seq_pooling_2__"
-  type: "average"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "dat_in"
-  }
-  average_strategy: "average"
-  trans_type: "seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__seq_pooling_3__"
-  type: "average"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "dat_in"
-  }
-  average_strategy: "average"
-  trans_type: "non-seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__seq_pooling_4__"
-  type: "average"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "dat_in"
-  }
-  average_strategy: "sum"
-  trans_type: "seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__seq_pooling_5__"
-  type: "average"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "dat_in"
-  }
-  average_strategy: "sum"
-  trans_type: "non-seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__seq_pooling_6__"
-  type: "max"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "dat_in"
-  }
-  trans_type: "non-seq"
-  seq_pool_stride: 5
-}
-layers {
-  name: "__seq_pooling_7__"
-  type: "average"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "dat_in"
-  }
-  average_strategy: "average"
-  trans_type: "non-seq"
-  seq_pool_stride: 5
-}
-layers {
-  name: "__seq_pooling_8__"
-  type: "average"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "dat_in"
-  }
-  average_strategy: "sum"
-  trans_type: "non-seq"
-  seq_pool_stride: 5
-}
-layers {
-  name: "__seq_pooling_9__"
-  type: "max"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "dat_in"
-  }
-  output_max_index: true
-  trans_type: "non-seq"
-  seq_pool_stride: -1
-}
-input_layer_names: "dat_in"
-output_layer_names: "__seq_pooling_0__"
-output_layer_names: "__seq_pooling_1__"
-output_layer_names: "__seq_pooling_2__"
-output_layer_names: "__seq_pooling_3__"
-output_layer_names: "__seq_pooling_4__"
-output_layer_names: "__seq_pooling_5__"
-output_layer_names: "__seq_pooling_6__"
-output_layer_names: "__seq_pooling_7__"
-output_layer_names: "__seq_pooling_8__"
-output_layer_names: "__seq_pooling_9__"
-sub_models {
-  name: "root"
-  layer_names: "dat_in"
-  layer_names: "__seq_pooling_0__"
-  layer_names: "__seq_pooling_1__"
-  layer_names: "__seq_pooling_2__"
-  layer_names: "__seq_pooling_3__"
-  layer_names: "__seq_pooling_4__"
-  layer_names: "__seq_pooling_5__"
-  layer_names: "__seq_pooling_6__"
-  layer_names: "__seq_pooling_7__"
-  layer_names: "__seq_pooling_8__"
-  layer_names: "__seq_pooling_9__"
-  input_layer_names: "dat_in"
-  output_layer_names: "__seq_pooling_0__"
-  output_layer_names: "__seq_pooling_1__"
-  output_layer_names: "__seq_pooling_2__"
-  output_layer_names: "__seq_pooling_3__"
-  output_layer_names: "__seq_pooling_4__"
-  output_layer_names: "__seq_pooling_5__"
-  output_layer_names: "__seq_pooling_6__"
-  output_layer_names: "__seq_pooling_7__"
-  output_layer_names: "__seq_pooling_8__"
-  output_layer_names: "__seq_pooling_9__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_smooth_l1.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_smooth_l1.protostr
deleted file mode 100644
index 4aa041ea2e..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_smooth_l1.protostr
+++ /dev/null
@@ -1,40 +0,0 @@
-type: "nn"
-layers {
-  name: "input"
-  type: "data"
-  size: 300
-  active_type: ""
-}
-layers {
-  name: "label"
-  type: "data"
-  size: 300
-  active_type: ""
-}
-layers {
-  name: "__smooth_l1_cost_0__"
-  type: "smooth_l1"
-  size: 1
-  active_type: ""
-  inputs {
-    input_layer_name: "input"
-  }
-  inputs {
-    input_layer_name: "label"
-  }
-  coeff: 1.0
-}
-input_layer_names: "input"
-input_layer_names: "label"
-output_layer_names: "__smooth_l1_cost_0__"
-sub_models {
-  name: "root"
-  layer_names: "input"
-  layer_names: "label"
-  layer_names: "__smooth_l1_cost_0__"
-  input_layer_names: "input"
-  input_layer_names: "label"
-  output_layer_names: "__smooth_l1_cost_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_split_datasource.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_split_datasource.protostr
deleted file mode 100644
index 569b0b945a..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_split_datasource.protostr
+++ /dev/null
@@ -1,72 +0,0 @@
-model_config {
-  type: "nn"
-  layers {
-    name: "a"
-    type: "data"
-    size: 10
-    active_type: ""
-  }
-  input_layer_names: "a"
-  output_layer_names: "a"
-  sub_models {
-    name: "root"
-    layer_names: "a"
-    input_layer_names: "a"
-    output_layer_names: "a"
-    is_recurrent_layer_group: false
-  }
-}
-data_config {
-  type: "py2"
-  files: "train.list"
-  async_load_data: false
-  for_test: false
-  load_data_module: "a"
-  load_data_object: "c"
-  load_data_args: ""
-  data_ratio: 1
-  is_main_data: true
-  usage_ratio: 1.0
-}
-opt_config {
-  batch_size: 1000
-  algorithm: "sgd"
-  learning_rate: 0.001
-  learning_rate_decay_a: 0.0
-  learning_rate_decay_b: 0.0
-  l1weight: 0.1
-  l2weight: 0.0
-  c1: 0.0001
-  backoff: 0.5
-  owlqn_steps: 10
-  max_backoff: 5
-  l2weight_zero_iter: 0
-  average_window: 0
-  learning_method: "momentum"
-  ada_epsilon: 1e-06
-  do_average_in_cpu: false
-  ada_rou: 0.95
-  learning_rate_schedule: "poly"
-  delta_add_rate: 1.0
-  shrink_parameter_value: 0
-  adam_beta1: 0.9
-  adam_beta2: 0.999
-  adam_epsilon: 1e-08
-  learning_rate_args: ""
-  async_lagged_grad_discard_ratio: 1.5
-}
-test_data_config {
-  type: "py2"
-  files: "test.list"
-  async_load_data: false
-  for_test: true
-  load_data_module: "b"
-  load_data_object: "d"
-  load_data_args: ""
-  data_ratio: 1
-  is_main_data: true
-  usage_ratio: 1.0
-}
-save_dir: "./output/model"
-start_pass: 0
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_spp_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_spp_layer.protostr
deleted file mode 100644
index ca1b2d8cff..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_spp_layer.protostr
+++ /dev/null
@@ -1,40 +0,0 @@
-type: "nn"
-layers {
-  name: "data"
-  type: "data"
-  size: 3200
-  active_type: ""
-  height: 20
-  width: 10
-}
-layers {
-  name: "__spp_0__"
-  type: "spp"
-  size: 80
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-    spp_conf {
-      image_conf {
-        channels: 16
-        img_size: 10
-        img_size_y: 20
-      }
-      pool_type: "max-projection"
-      pyramid_height: 2
-    }
-  }
-  height: 1
-  width: 5
-}
-input_layer_names: "data"
-output_layer_names: "__spp_0__"
-sub_models {
-  name: "root"
-  layer_names: "data"
-  layer_names: "__spp_0__"
-  input_layer_names: "data"
-  output_layer_names: "__spp_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sub_nested_seq_select_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sub_nested_seq_select_layer.protostr
deleted file mode 100644
index 4b906b113e..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sub_nested_seq_select_layer.protostr
+++ /dev/null
@@ -1,37 +0,0 @@
-type: "nn"
-layers {
-  name: "input_seq"
-  type: "data"
-  size: 300
-  active_type: ""
-}
-layers {
-  name: "input"
-  type: "data"
-  size: 5
-  active_type: ""
-}
-layers {
-  name: "__sub_nested_seq_layer_0__"
-  type: "sub_nested_seq"
-  size: 300
-  active_type: ""
-  inputs {
-    input_layer_name: "input_seq"
-  }
-  inputs {
-    input_layer_name: "input"
-  }
-}
-input_layer_names: "input_seq"
-output_layer_names: "__sub_nested_seq_layer_0__"
-sub_models {
-  name: "root"
-  layer_names: "input_seq"
-  layer_names: "input"
-  layer_names: "__sub_nested_seq_layer_0__"
-  input_layer_names: "input_seq"
-  output_layer_names: "__sub_nested_seq_layer_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/unused_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/unused_layers.protostr
deleted file mode 100644
index 89ed28406e..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/unused_layers.protostr
+++ /dev/null
@@ -1,27 +0,0 @@
-type: "nn"
-layers {
-  name: "probs"
-  type: "data"
-  size: 100
-  active_type: ""
-}
-layers {
-  name: "__sampling_id_layer_0__"
-  type: "sampling_id"
-  size: 100
-  active_type: ""
-  inputs {
-    input_layer_name: "probs"
-  }
-}
-input_layer_names: "probs"
-output_layer_names: "__sampling_id_layer_0__"
-sub_models {
-  name: "root"
-  layer_names: "probs"
-  layer_names: "__sampling_id_layer_0__"
-  input_layer_names: "probs"
-  output_layer_names: "__sampling_id_layer_0__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/util_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/util_layers.protostr
deleted file mode 100644
index 7a2f3eab38..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/util_layers.protostr
+++ /dev/null
@@ -1,87 +0,0 @@
-type: "nn"
-layers {
-  name: "a"
-  type: "data"
-  size: 10
-  active_type: ""
-}
-layers {
-  name: "b"
-  type: "data"
-  size: 10
-  active_type: ""
-}
-layers {
-  name: "__addto_0__"
-  type: "addto"
-  size: 10
-  active_type: ""
-  inputs {
-    input_layer_name: "a"
-  }
-  inputs {
-    input_layer_name: "b"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__concat_0__"
-  type: "concat"
-  size: 20
-  active_type: ""
-  inputs {
-    input_layer_name: "a"
-  }
-  inputs {
-    input_layer_name: "b"
-  }
-  height: 0
-  width: 0
-  depth: 1
-}
-layers {
-  name: "__concat_1__"
-  type: "concat2"
-  size: 20
-  active_type: ""
-  inputs {
-    input_layer_name: "a"
-    proj_conf {
-      type: "identity"
-      name: "___concat_1__.w0"
-      input_size: 10
-      output_size: 10
-    }
-  }
-  inputs {
-    input_layer_name: "b"
-    proj_conf {
-      type: "identity"
-      name: "___concat_1__.w1"
-      input_size: 10
-      output_size: 10
-    }
-  }
-}
-input_layer_names: "a"
-input_layer_names: "b"
-output_layer_names: "__addto_0__"
-output_layer_names: "__concat_0__"
-output_layer_names: "__concat_1__"
-sub_models {
-  name: "root"
-  layer_names: "a"
-  layer_names: "b"
-  layer_names: "__addto_0__"
-  layer_names: "__concat_0__"
-  layer_names: "__concat_1__"
-  input_layer_names: "a"
-  input_layer_names: "b"
-  output_layer_names: "__addto_0__"
-  output_layer_names: "__concat_0__"
-  output_layer_names: "__concat_1__"
-  is_recurrent_layer_group: false
-}
-
diff --git a/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh b/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh
deleted file mode 100755
index c8a3b190b1..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/bin/bash
-cd `dirname $0`
-
-set -e
-PYTHON_EXEC=$1
-COMPARE_PROTO_UTIL=$2
-
-protostr=`dirname $0`/protostr
-
-files=`ls $protostr | grep -v "unittest"`
-
-./generate_protostr.sh ${PYTHON_EXEC}
-
-. ./file_list.sh
-
-if [ -z ${COMPARE_PROTO_UTIL} ]; then
-  for file in $files
-  do
-      base_protostr=$protostr/$file
-      new_protostr=$protostr/$file.unittest
-      diff $base_protostr $new_protostr -u
-      diff $protostr/$file $protostr/$file.non_file_config.unittest -u
-  done
-else
-  for file in ${configs[*]}
-  do
-    if ! ${COMPARE_PROTO_UTIL} $protostr/$file.protostr $protostr/$file.protostr.unittest; then
-      diff $protostr/$file.protostr $protostr/$file.protostr.unittest -u
-    fi
-    if ! ${COMPARE_PROTO_UTIL} $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest; then
-      diff $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest -u
-    fi
-  done
-
-  for file in ${whole_configs[*]}
-  do
-    if ! ${COMPARE_PROTO_UTIL} $protostr/$file.protostr $protostr/$file.protostr.unittest --whole; then
-      diff $protostr/$file.protostr $protostr/$file.protostr.unittest -u
-    fi
-    if ! ${COMPARE_PROTO_UTIL} $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest --whole; then
-      diff $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest -u
-    fi
-  done
-fi
diff --git a/python/paddle/trainer_config_helpers/tests/configs/shared_fc.py b/python/paddle/trainer_config_helpers/tests/configs/shared_fc.py
deleted file mode 100644
index 3229252a2f..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/shared_fc.py
+++ /dev/null
@@ -1,43 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(learning_rate=1e-4, batch_size=1000)
-
-a = data_layer(name='feature_a', size=200)
-b = data_layer(name='feature_b', size=200)
-
-fc_param = ParamAttr(name='fc_param', initial_max=1.0, initial_min=-1.0)
-bias_param = ParamAttr(name='bias_param', initial_mean=0.0, initial_std=0.0)
-
-softmax_param = ParamAttr(
-    name='softmax_param', initial_max=1.0, initial_min=-1.0)
-
-hidden_a = fc_layer(
-    input=a, size=200, param_attr=fc_param, bias_attr=bias_param)
-hidden_b = fc_layer(
-    input=b, size=200, param_attr=fc_param, bias_attr=bias_param)
-
-predict = fc_layer(
-    input=[hidden_a, hidden_b],
-    param_attr=[softmax_param, softmax_param],
-    bias_attr=False,
-    size=10,
-    act=SoftmaxActivation())
-
-outputs(
-    classification_cost(
-        input=predict, label=data_layer(
-            name='label', size=10)))
diff --git a/python/paddle/trainer_config_helpers/tests/configs/shared_gru.py b/python/paddle/trainer_config_helpers/tests/configs/shared_gru.py
deleted file mode 100644
index dff561fdf7..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/shared_gru.py
+++ /dev/null
@@ -1,54 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(learning_rate=1e-4, batch_size=1000)
-
-data_1 = data_layer(name='data_a', size=100)
-data_2 = data_layer(name='data_b', size=100)
-
-mixed_param = ParamAttr(name='mixed_param')
-
-gru_param = ParamAttr(name='gru_param')
-gru_bias = ParamAttr(name='gru_bias', initial_mean=0., initial_std=0.)
-
-gru1 = simple_gru(
-    input=data_1,
-    size=200,
-    mixed_param_attr=mixed_param,
-    mixed_bias_param_attr=False,
-    gru_bias_attr=gru_bias,
-    gru_param_attr=gru_param)
-
-gru2 = simple_gru(
-    input=data_2,
-    size=200,
-    mixed_param_attr=mixed_param,
-    mixed_bias_param_attr=False,
-    gru_bias_attr=gru_bias,
-    gru_param_attr=gru_param)
-
-softmax_param = ParamAttr(name='softmax_param')
-
-predict = fc_layer(
-    input=[last_seq(input=gru1), last_seq(input=gru2)],
-    size=10,
-    param_attr=[softmax_param, softmax_param],
-    bias_attr=False,
-    act=SoftmaxActivation())
-outputs(
-    classification_cost(
-        input=predict, label=data_layer(
-            name='label', size=10)))
diff --git a/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py b/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
deleted file mode 100644
index 97ef2d07ae..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
+++ /dev/null
@@ -1,56 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(learning_rate=1e-4, batch_size=1000)
-
-data_1 = data_layer(name='data_a', size=100)
-data_2 = data_layer(name='data_b', size=100)
-
-mixed_param = ParamAttr(name='mixed_param')
-
-with mixed_layer(size=400, bias_attr=False) as m1:
-    m1 += full_matrix_projection(input=data_1, param_attr=mixed_param)
-
-with mixed_layer(size=400, bias_attr=False) as m2:
-    m2 += full_matrix_projection(input=data_2, param_attr=mixed_param)
-
-lstm_param = ParamAttr(name='lstm_param')
-lstm_bias = ParamAttr(name='lstm_bias', initial_mean=0., initial_std=0.)
-
-lstm1 = lstmemory_group(
-    input=m1,
-    param_attr=lstm_param,
-    lstm_bias_attr=lstm_bias,
-    input_proj_bias_attr=False)
-
-lstm2 = lstmemory_group(
-    input=m2,
-    param_attr=lstm_param,
-    lstm_bias_attr=lstm_bias,
-    input_proj_bias_attr=False)
-
-softmax_param = ParamAttr(name='softmax_param')
-
-predict = fc_layer(
-    input=[last_seq(input=lstm1), last_seq(input=lstm2)],
-    size=10,
-    param_attr=[softmax_param, softmax_param],
-    bias_attr=False,
-    act=SoftmaxActivation())
-outputs(
-    classification_cost(
-        input=predict, label=data_layer(
-            name='label', size=10)))
diff --git a/python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py b/python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py
deleted file mode 100644
index f882efcba2..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py
+++ /dev/null
@@ -1,51 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-4)
-
-din = data_layer(name='data', size=200)
-
-hidden = fc_layer(input=din, size=200, act=SigmoidActivation())
-
-rnn = recurrent_layer(input=hidden, act=SigmoidActivation())
-
-rnn2 = recurrent_layer(input=hidden, act=SigmoidActivation(), reverse=True)
-
-lstm1_param = fc_layer(
-    input=hidden, size=200 * 4, act=LinearActivation(), bias_attr=False)
-
-lstm1 = lstmemory(input=lstm1_param, act=SigmoidActivation())
-
-lstm2_param = fc_layer(
-    input=hidden, size=200 * 4, act=LinearActivation(), bias_attr=False)
-
-lstm2 = lstmemory(input=lstm2_param, act=SigmoidActivation(), reverse=True)
-
-gru1_param = fc_layer(
-    input=hidden, size=200 * 3, act=LinearActivation(), bias_attr=False)
-gru1 = grumemory(input=gru1_param, act=SigmoidActivation())
-
-gru2_param = fc_layer(
-    input=hidden, size=200 * 3, act=LinearActivation(), bias_attr=False)
-gru2 = grumemory(input=gru2_param, act=SigmoidActivation(), reverse=True)
-
-outputs(
-    last_seq(input=rnn),
-    first_seq(input=rnn2),
-    last_seq(input=lstm1),
-    first_seq(input=lstm2),
-    last_seq(input=gru1),
-    first_seq(gru2))
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_BatchNorm3D.py b/python/paddle/trainer_config_helpers/tests/configs/test_BatchNorm3D.py
deleted file mode 100644
index 169038deb1..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_BatchNorm3D.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-4)
-
-#data = data_layer(name='data', size=180, width=30, height=6)
-#batchNorm = batch_norm_layer(data, num_channels=1)
-#outputs(batchNorm)
-
-data3D = data_layer(name='data3D', size=120 * 3, width=20, height=6, depth=3)
-batchNorm3D = batch_norm_layer(data3D, num_channels=1, img3D=True)
-outputs(batchNorm3D)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_bi_grumemory.py b/python/paddle/trainer_config_helpers/tests/configs/test_bi_grumemory.py
deleted file mode 100644
index d29e4e5c4d..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_bi_grumemory.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-4)
-
-din = data_layer(name='data', size=120)
-
-outputs(bidirectional_gru(input=din, size=40, return_seq=True))
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py b/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py
deleted file mode 100644
index 5e724ba7d1..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py
+++ /dev/null
@@ -1,41 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-5)
-
-data = data_layer(name='data', size=2304)
-
-conv = img_conv_layer(
-    input=data,
-    filter_size=3,
-    num_channels=1,
-    num_filters=16,
-    padding=1,
-    act=LinearActivation(),
-    bias_attr=True)
-
-bilinear = bilinear_interp_layer(input=conv, out_size_x=64, out_size_y=64)
-
-pool = img_pool_layer(
-    input=bilinear,
-    num_channels=16,
-    pool_size=2,
-    stride=2,
-    pool_type=MaxPooling())
-
-fc = fc_layer(input=pool, size=384, bias_attr=False)
-
-outputs(fc)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_clip_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_clip_layer.py
deleted file mode 100644
index 95a1192bfa..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_clip_layer.py
+++ /dev/null
@@ -1,20 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-data = data_layer(name='input', size=300)
-clip = clip_layer(input=data, min=-10, max=10)
-
-outputs(clip)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_config_parser_for_non_file_config.py b/python/paddle/trainer_config_helpers/tests/configs/test_config_parser_for_non_file_config.py
deleted file mode 100644
index 9b791a0222..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_config_parser_for_non_file_config.py
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import re
-import getopt
-
-
-def main(print_whole_config, globals, locals):
-    '''
-     this test will all test_config.py
-  '''
-    cmdstr = """from paddle.trainer.config_parser import parse_config\n"""
-    importstr = ""
-    functionstr = ""
-
-    for line in sys.stdin:
-        if re.match("^import", line) or re.match("^from.*import", line):
-            importstr = importstr + line
-        else:
-            functionstr = functionstr + "  " + line
-
-    cmdstr = cmdstr + importstr + """def configs():\n""" + functionstr
-    #cmdstr = cmdstr + """def configs():\n""" + importstr + functionstr
-    if print_whole_config:
-        cmdstr = cmdstr + """print parse_config(configs, "")"""
-    else:
-        cmdstr = cmdstr + """print parse_config(configs, "").model_config"""
-
-    exec (cmdstr, globals, locals)
-
-
-if __name__ == '__main__':
-    whole = False
-    opts, args = getopt.getopt(sys.argv[1:], "", ["whole"])
-    for op, value in opts:
-        if op == "--whole":
-            whole = True
-    main(whole, globals(), locals())
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_conv3d_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_conv3d_layer.py
deleted file mode 100644
index f9966e399e..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_conv3d_layer.py
+++ /dev/null
@@ -1,63 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-5)
-
-num_channels = 3
-filter_size = 3
-filter_size_y = 3
-filter_size_z = 3
-stride = 2
-stride_y = 2
-stride_z = 2
-padding = 1
-padding_y = 1
-padding_z = 1
-groups = 1
-
-data = data_layer(
-    name='data', size=12096 * num_channels, height=48, width=42, depth=6)
-# first
-conv3d_1 = img_conv3d_layer(
-    input=data,
-    name='conv3d_1',
-    num_filters=16,
-    num_channels=num_channels,
-    filter_size=filter_size,
-    stride=stride,
-    padding=padding,
-    groups=groups,
-    bias_attr=True,
-    shared_biases=True,
-    trans=False,
-    layer_type="conv3d",
-    act=LinearActivation())
-# second
-conv3d_2 = img_conv3d_layer(
-    input=data,
-    name='conv3d_2',
-    num_filters=16,
-    num_channels=num_channels,
-    filter_size=[filter_size, filter_size_y, filter_size_z],
-    stride=[stride, stride_y, stride_z],
-    padding=[padding, padding_y, padding_z],
-    groups=groups,
-    bias_attr=True,
-    shared_biases=True,
-    trans=False,
-    layer_type="conv3d",
-    act=LinearActivation())
-outputs(conv3d_2)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
deleted file mode 100644
index 351694fd55..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(learning_rate=1e-4, batch_size=1000)
-
-seq_in = data_layer(name='input', size=200)
-labels = data_layer(name='labels', size=5000)
-
-probs = data_layer(name='probs', size=10)
-xe_label = data_layer(name='xe-label', size=10)
-
-hidden = fc_layer(input=seq_in, size=4)
-outputs(
-    ctc_layer(
-        input=seq_in, label=labels),
-    warp_ctc_layer(
-        input=seq_in, label=labels, blank=0),
-    crf_layer(
-        input=hidden, label=data_layer(
-            name='crf_label', size=4)),
-    rank_cost(
-        left=data_layer(
-            name='left', size=1),
-        right=data_layer(
-            name='right', size=1),
-        label=data_layer(
-            name='label', size=1)),
-    lambda_cost(
-        input=data_layer(
-            name='list_feature', size=100),
-        score=data_layer(
-            name='list_scores', size=1)),
-    cross_entropy(
-        input=probs, label=xe_label),
-    cross_entropy_with_selfnorm(
-        input=probs, label=xe_label),
-    huber_regression_cost(
-        input=seq_in, label=labels),
-    huber_classification_cost(
-        input=data_layer(
-            name='huber_probs', size=1),
-        label=data_layer(
-            name='huber_label', size=1)),
-    multi_binary_label_cross_entropy(
-        input=probs, label=xe_label),
-    sum_cost(input=hidden),
-    nce_layer(
-        input=hidden, label=labels))
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py
deleted file mode 100644
index 8cbcf5de0a..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py
+++ /dev/null
@@ -1,33 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(learning_rate=1e-4, batch_size=1000)
-
-data = data_layer(name='input', size=300)
-lbl = data_layer(name='label', size=1)
-wt = data_layer(name='weight', size=1)
-fc = fc_layer(input=data, size=10, act=SoftmaxActivation())
-
-outputs(
-    classification_cost(
-        input=fc, label=lbl, weight=wt),
-    square_error_cost(
-        input=fc, label=lbl, weight=wt),
-    nce_layer(
-        input=fc,
-        label=data_layer(
-            name='multi_class_label', size=500),
-        weight=wt))
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_crop.py b/python/paddle/trainer_config_helpers/tests/configs/test_crop.py
deleted file mode 100644
index b4ffff252b..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_crop.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-5)
-
-data = data_layer(name='data', size=2016, height=48, width=42)
-refernce_data = data_layer(name='data', size=768, height=16, width=16)
-
-conv = img_conv_layer(
-    input=data,
-    filter_size=3,
-    num_channels=1,
-    num_filters=16,
-    padding=1,
-    act=LinearActivation(),
-    bias_attr=True)
-
-pool = img_pool_layer(input=conv, pool_size=2, stride=2, pool_type=MaxPooling())
-
-crop = crop_layer(input=[pool, refernce_data], axis=2)
-
-outputs(pad)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py b/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py
deleted file mode 100644
index 4a5bdf1181..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/usr/bin/env python
-#coding=utf-8
-
-from paddle.trainer_config_helpers import *
-beam_size = 5
-
-# the first beam expansion.
-sentence_states = data_layer(name="sentence_states", size=32)
-sentence_scores = data_layer(name="sentence_scores", size=1)
-topk_sentence_ids = kmax_seq_score_layer(
-    input=sentence_scores, beam_size=beam_size)
-
-# the second beam expansion.
-topk_sen = sub_nested_seq_layer(
-    input=sentence_states, selected_indices=topk_sentence_ids)
-start_pos_scores = fc_layer(input=topk_sen, size=1, act=LinearActivation())
-topk_start_pos_ids = kmax_seq_score_layer(
-    input=sentence_scores, beam_size=beam_size)
-
-# the final beam expansion.
-topk_start_spans = seq_slice_layer(
-    input=topk_sen, starts=topk_start_pos_ids, ends=None)
-end_pos_scores = fc_layer(
-    input=topk_start_spans, size=1, act=LinearActivation())
-topk_end_pos_ids = kmax_seq_score_layer(
-    input=end_pos_scores, beam_size=beam_size)
-
-# define the cost
-sentence_idx = data_layer(name="sentences_ids", size=1)
-start_idx = data_layer(name="start_ids", size=1)
-end_idx = data_layer(name="end_ids", size=1)
-cost = cross_entropy_over_beam(input=[
-    BeamInput(
-        candidate_scores=sentence_scores,
-        selected_candidates=topk_sentence_ids,
-        gold=sentence_idx), BeamInput(
-            candidate_scores=start_pos_scores,
-            selected_candidates=topk_start_pos_ids,
-            gold=start_idx), BeamInput(
-                candidate_scores=end_pos_scores,
-                selected_candidates=topk_end_pos_ids,
-                gold=end_idx)
-])
-
-outputs(cost)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_deconv3d_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_deconv3d_layer.py
deleted file mode 100644
index 08e701c7a8..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_deconv3d_layer.py
+++ /dev/null
@@ -1,64 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-5)
-
-num_channels = 3
-filter_size = 3
-filter_size_y = 3
-filter_size_z = 3
-stride = 2
-stride_y = 2
-stride_z = 2
-padding = 1
-padding_y = 1
-padding_z = 1
-groups = 1
-
-data = data_layer(
-    name='data', size=12096 * num_channels, height=48, width=42, depth=6)
-
-# first
-deconv3d_1 = img_conv3d_layer(
-    input=data,
-    name='deconv3d_1',
-    num_filters=16,
-    num_channels=num_channels,
-    filter_size=filter_size,
-    stride=stride,
-    padding=padding,
-    groups=groups,
-    bias_attr=True,
-    shared_biases=True,
-    trans=True,
-    layer_type="deconv3d",
-    act=LinearActivation())
-# second
-deconv3d_2 = img_conv3d_layer(
-    input=data,
-    name='deconv3d_2',
-    num_filters=16,
-    num_channels=num_channels,
-    filter_size=[filter_size, filter_size_y, filter_size_z],
-    stride=[stride, stride_y, stride_z],
-    padding=[padding, padding_y, padding_z],
-    groups=groups,
-    bias_attr=True,
-    shared_biases=True,
-    trans=True,
-    layer_type="deconv3d",
-    act=LinearActivation())
-outputs(deconv3d_2)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_detection_output_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_detection_output_layer.py
deleted file mode 100644
index 4ecd1c2b7e..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_detection_output_layer.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-5)
-
-input_loc = data_layer(name='input_loc', size=16, height=16, width=1)
-
-input_conf = data_layer(name='input_conf', size=8, height=1, width=8)
-
-priorbox = data_layer(name='priorbox', size=32, height=4, width=8)
-
-detout = detection_output_layer(
-    input_loc=input_loc,
-    input_conf=input_conf,
-    priorbox=priorbox,
-    num_classes=21,
-    nms_threshold=0.45,
-    nms_top_k=400,
-    keep_top_k=200,
-    confidence_threshold=0.01,
-    background_id=0,
-    name='test_detection_output')
-
-outputs(detout)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_dot_prod_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_dot_prod_layer.py
deleted file mode 100644
index 9b444bc2c0..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_dot_prod_layer.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-vec1 = data_layer(name='vector1', size=10)
-vec2 = data_layer(name='vector2', size=10)
-dot_product = dot_prod_layer(input1=vec1, input2=vec2)
-
-outputs(dot_product)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py
deleted file mode 100644
index 85101d2b92..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-5)
-
-din = data_layer(name='data', size=30)
-data_seq = data_layer(name='data_seq', size=30)
-
-outputs(
-    expand_layer(
-        input=din, expand_as=data_seq, expand_level=ExpandLevel.FROM_SEQUENCE),
-    expand_layer(
-        input=din,
-        expand_as=data_seq,
-        expand_level=ExpandLevel.FROM_NO_SEQUENCE))
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_factorization_machine.py b/python/paddle/trainer_config_helpers/tests/configs/test_factorization_machine.py
deleted file mode 100644
index 48ac46c5bb..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_factorization_machine.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-data = data_layer(name='data', size=1024)
-
-fm = factorization_machine(input=data, factor_size=10)
-
-outputs(fm)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_fc.py b/python/paddle/trainer_config_helpers/tests/configs/test_fc.py
deleted file mode 100644
index f1e454d211..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_fc.py
+++ /dev/null
@@ -1,30 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-5)
-
-din = data_layer(name='data', size=100)
-
-trans = trans_layer(input=din)
-
-hidden = fc_layer(input=trans, size=100, bias_attr=False)
-
-mask = data_layer(name='mask', size=100)
-
-hidden_sel = selective_fc_layer(
-    input=din, select=mask, size=100, act=SigmoidActivation())
-
-outputs(hidden, hidden_sel)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_gated_unit_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_gated_unit_layer.py
deleted file mode 100644
index afc3e9207c..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_gated_unit_layer.py
+++ /dev/null
@@ -1,30 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-data = data_layer(name='input', size=256)
-glu = gated_unit_layer(
-    size=512,
-    input=data,
-    act=TanhActivation(),
-    gate_attr=ExtraLayerAttribute(error_clipping_threshold=100.0),
-    gate_param_attr=ParamAttr(initial_std=1e-4),
-    gate_bias_attr=ParamAttr(initial_std=1),
-    inproj_attr=ExtraLayerAttribute(error_clipping_threshold=100.0),
-    inproj_param_attr=ParamAttr(initial_std=1e-4),
-    inproj_bias_attr=ParamAttr(initial_std=1),
-    layer_attr=ExtraLayerAttribute(error_clipping_threshold=100.0))
-
-outputs(glu)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py
deleted file mode 100644
index ac9902d08c..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-4)
-
-din = data_layer(name='data', size=120)
-
-outputs(
-    grumemory(
-        input=din,
-        size=40,
-        reverse=True,
-        gate_act=TanhActivation(),
-        act=SigmoidActivation()))
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py b/python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py
deleted file mode 100644
index da781c149b..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(learning_rate=1e-4, batch_size=1000)
-
-din = data_layer(name='data', size=100)
-label = data_layer(name='label', size=10)
-
-outputs(hsigmoid(input=din, label=label, num_classes=10))
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py
deleted file mode 100644
index 171da10f75..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/usr/bin/env python
-#coding=utf-8
-from paddle.trainer_config_helpers import *
-
-data = data_layer(name="input_seq", size=128)
-scores = fc_layer(input=data, size=1, act=ExpActivation())
-kmax_seq_id = kmax_seq_score_layer(input=scores, beam_size=5)
-
-outputs(kmax_seq_id)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_l2_distance_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_l2_distance_layer.py
deleted file mode 100644
index 42c9b5deea..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_l2_distance_layer.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-outputs(
-    l2_distance_layer(
-        x=data_layer(
-            name='x', size=128), y=data_layer(
-                name='y', size=128)))
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py
deleted file mode 100644
index 26eeea5461..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-5)
-
-din = data_layer(name='data', size=128)
-
-outputs(
-    lstmemory(
-        input=din,
-        reverse=True,
-        gate_act=TanhActivation(),
-        act=TanhActivation(),
-        size=32))
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py b/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py
deleted file mode 100644
index 2cd41a306a..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py
+++ /dev/null
@@ -1,56 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-5)
-
-data = data_layer(name='data', size=2304, height=48, width=48)
-
-conv = img_conv_layer(
-    input=data,
-    filter_size=3,
-    num_channels=1,
-    num_filters=16,
-    padding=1,
-    act=LinearActivation(),
-    bias_attr=True)
-
-maxout = maxout_layer(input=conv, num_channels=16, groups=2)
-
-pool = img_pool_layer(
-    input=maxout, num_channels=8, pool_size=2, stride=2, pool_type=MaxPooling())
-
-conv2 = img_conv_layer(
-    input=pool,
-    filter_size=3,
-    num_channels=8,
-    num_filters=128,
-    padding=1,
-    act=LinearActivation(),
-    bias_attr=True)
-
-maxout2 = maxout_layer(input=conv2, num_channels=128, groups=4)
-
-block = block_expand_layer(
-    input=maxout2,
-    num_channels=32,
-    stride_x=1,
-    stride_y=1,
-    block_x=1,
-    block_y=6)
-
-fc = fc_layer(input=block, size=384, bias_attr=False)
-
-outputs(fc)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_multibox_loss_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_multibox_loss_layer.py
deleted file mode 100644
index b4fd9052c4..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_multibox_loss_layer.py
+++ /dev/null
@@ -1,39 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-5)
-
-input_loc = data_layer(name='input_loc', size=16, height=16, width=1)
-
-input_conf = data_layer(name='input_conf', size=8, height=1, width=8)
-
-priorbox = data_layer(name='priorbox', size=32, height=4, width=8)
-
-label = data_layer(name='label', size=24, height=4, width=6)
-
-multibox_loss = multibox_loss_layer(
-    input_loc=input_loc,
-    input_conf=input_conf,
-    priorbox=priorbox,
-    label=label,
-    num_classes=21,
-    overlap_threshold=0.5,
-    neg_pos_ratio=3.0,
-    neg_overlap=0.5,
-    background_id=0,
-    name='test_multibox_loss')
-
-outputs(multibox_loss)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_multiplex_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_multiplex_layer.py
deleted file mode 100644
index bfba07be86..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_multiplex_layer.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-5)
-
-index = data_layer(name='index', size=1)
-din1 = data_layer(name='data1', size=30)
-din2 = data_layer(name='data2', size=30)
-din3 = data_layer(name='data3', size=30)
-
-dout = multiplex_layer([index, din1, din2, din3])
-
-outputs(dout)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py b/python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py
deleted file mode 100644
index 891894172c..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py
+++ /dev/null
@@ -1,44 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-5)
-
-weight = data_layer(name='w', size=1)
-a = data_layer(name='a', size=100)
-b = data_layer(name='b', size=100)
-c = data_layer(name='c', size=200)
-d = data_layer(name='d', size=31)
-
-outputs(
-    interpolation_layer(
-        input=[a, b], weight=weight),
-    power_layer(
-        input=a, weight=weight),
-    scaling_layer(
-        input=a, weight=weight),
-    cos_sim(
-        a=a, b=b),
-    cos_sim(
-        a=a, b=c, size=2),
-    sum_to_one_norm_layer(input=a),
-    conv_shift_layer(
-        a=a, b=d),
-    tensor_layer(
-        a=a, b=b, size=1000),
-    slope_intercept_layer(
-        input=a, slope=0.7, intercept=0.9),
-    linear_comb_layer(
-        weights=b, vectors=c))
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_pad.py b/python/paddle/trainer_config_helpers/tests/configs/test_pad.py
deleted file mode 100644
index c5825c82e5..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_pad.py
+++ /dev/null
@@ -1,34 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-5)
-
-data = data_layer(name='data', size=2016, height=48, width=42)
-
-conv = img_conv_layer(
-    input=data,
-    filter_size=3,
-    num_channels=1,
-    num_filters=16,
-    padding=1,
-    act=LinearActivation(),
-    bias_attr=True)
-
-pool = img_pool_layer(input=conv, pool_size=2, stride=2, pool_type=MaxPooling())
-
-pad = pad_layer(input=pool, pad_c=[2, 3], pad_h=[1, 2], pad_w=[3, 1])
-
-outputs(pad)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py
deleted file mode 100644
index 5ff52c195a..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py
+++ /dev/null
@@ -1,52 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=100, learning_rate=1e-5)
-
-data_2d = data_layer(name='data_2d', size=6000, height=20, width=10)
-
-pool_2d = img_pool_layer(
-    name="pool___2d",
-    input=data_2d,
-    num_channels=30,
-    pool_size=5,
-    stride=3,
-    padding=1,
-    pool_type=AvgPooling())
-outputs(pool_2d)
-
-data_3d = data_layer(
-    name='data_3d_1', size=60000, depth=10, height=20, width=10)
-
-pool_3d_1 = img_pool3d_layer(
-    name="pool_3d_1",
-    input=data_3d,
-    num_channels=30,
-    pool_size=5,
-    stride=3,
-    padding=1,
-    pool_type=AvgPooling())
-outputs(pool_3d_1)
-
-pool_3d_2 = img_pool3d_layer(
-    name="pool_3d_2",
-    input=data_3d,
-    num_channels=30,
-    pool_size=[5, 5, 5],
-    stride=[3, 3, 3],
-    padding=[1, 1, 1],
-    pool_type=MaxPooling())
-outputs(pool_3d_2)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py
deleted file mode 100644
index d803a0d13d..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-data = data_layer(name='input', size=300, height=10, width=10)
-prelu = prelu_layer(input=data, num_channels=3)
-prelu = prelu_layer(input=data, partial_sum=1, num_channels=3)
-prelu = prelu_layer(input=data, partial_sum=5, num_channels=3)
-prelu = prelu_layer(input=data, channel_shared=True, num_channels=3)
-prelu = prelu_layer(input=data, channel_shared=False, num_channels=3)
-
-outputs(prelu)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py
deleted file mode 100644
index ca1f5a4572..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py
+++ /dev/null
@@ -1,23 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(learning_rate=1e-4, batch_size=1000)
-
-din = data_layer(name='input', size=100)
-
-print_layer(input=din)
-
-outputs(din)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_recursive_topology.py b/python/paddle/trainer_config_helpers/tests/configs/test_recursive_topology.py
deleted file mode 100644
index d44870d804..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_recursive_topology.py
+++ /dev/null
@@ -1,30 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-5)
-
-din = data_layer(name='data', size=100)
-
-enc = din
-for i in range(32):
-    enc = addto_layer([enc, enc])
-
-pred = fc_layer(
-    input=fc_layer(
-        input=enc, size=32, act=ReluActivation()),
-    size=10,
-    act=SoftmaxActivation())
-outputs(pred)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_repeat_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_repeat_layer.py
deleted file mode 100644
index ee90e830df..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_repeat_layer.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-5)
-
-din = data_layer(name='data', size=30)
-
-outputs(
-    repeat_layer(
-        input=din, num_repeats=10, as_row_vector=True),
-    repeat_layer(
-        input=din, num_repeats=10, act=TanhActivation(), as_row_vector=False))
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_resize_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_resize_layer.py
deleted file mode 100644
index 4aa81919df..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_resize_layer.py
+++ /dev/null
@@ -1,20 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-data = data_layer(name='input', size=300)
-resized = resize_layer(input=data, size=150)
-
-outputs(resized)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py b/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
deleted file mode 100644
index 3824ef5995..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
+++ /dev/null
@@ -1,62 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(learning_rate=1e-4, batch_size=1000)
-
-seq = data_layer(name='seq_input', size=100)
-sub_seq = data_layer(name='sub_seq_input', size=100)
-lbl = data_layer(name='label', size=1)
-
-
-def generate_rnn_simple(name):
-    def rnn_simple(s):
-        m = memory(name=name, size=200)
-        fc = fc_layer(input=[s, m], size=200, name=name)
-        return fc
-
-    return rnn_simple
-
-
-def generate_rnn_simple_no_name():
-    def rnn_simple(s):
-        m = memory(name=None, size=200)
-        fc = fc_layer(input=[s, m], size=200)
-        m.set_input(fc)
-        return fc
-
-    return rnn_simple
-
-
-with mixed_layer() as lstm_param:  # test lstm unit, rnn group
-    lstm_param += full_matrix_projection(input=seq, size=100 * 4)
-
-with mixed_layer() as gru_param:
-    gru_param += full_matrix_projection(input=seq, size=100 * 3)
-
-outputs(
-    last_seq(input=recurrent_group(
-        step=generate_rnn_simple('rnn_forward'), input=seq)),
-    first_seq(input=recurrent_group(
-        step=generate_rnn_simple('rnn_back'), input=seq, reverse=True)),
-    last_seq(input=recurrent_group(
-        step=generate_rnn_simple('rnn_subseq_forward'),
-        input=SubsequenceInput(input=sub_seq))),
-    last_seq(input=lstmemory_group(
-        input=lstm_param, size=100)),
-    last_seq(input=gru_group(
-        input=gru_param, size=100)),
-    last_seq(input=recurrent_group(
-        step=generate_rnn_simple_no_name(), input=seq)), )
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py
deleted file mode 100644
index 6929d106c6..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-data = data_layer(name='data', size=3 * 14 * 14, height=14, width=14)
-
-rois = data_layer(name='rois', size=10)
-
-conv = img_conv_layer(
-    input=data,
-    filter_size=3,
-    num_channels=3,
-    num_filters=16,
-    padding=1,
-    act=LinearActivation(),
-    bias_attr=True)
-
-roi_pool = roi_pool_layer(
-    input=conv,
-    rois=rois,
-    pooled_width=7,
-    pooled_height=7,
-    spatial_scale=1. / 16)
-
-outputs(roi_pool)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_row_conv.py b/python/paddle/trainer_config_helpers/tests/configs/test_row_conv.py
deleted file mode 100644
index 6381a26fe8..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_row_conv.py
+++ /dev/null
@@ -1,23 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-5)
-
-data = data_layer(name='data', size=2560)
-
-row_conv = row_conv_layer(input=data, context_len=19, act=ReluActivation())
-
-outputs(row_conv)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_row_l2_norm_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_row_l2_norm_layer.py
deleted file mode 100644
index 3c17d2ccfd..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_row_l2_norm_layer.py
+++ /dev/null
@@ -1,20 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-data = data_layer(name='input', size=300)
-row_l2_norm = row_l2_norm_layer(input=data)
-
-outputs(row_l2_norm)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_scale_shift_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_scale_shift_layer.py
deleted file mode 100644
index ae8a25ba94..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_scale_shift_layer.py
+++ /dev/null
@@ -1,23 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-data = data_layer(name='data', size=100)
-
-scale = scale_shift_layer(input=data, bias_attr=False)
-
-scale_shift = scale_shift_layer(input=data)
-
-outputs(scale, scale_shift)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_scale_sub_region_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_scale_sub_region_layer.py
deleted file mode 100644
index e4f7120bcc..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_scale_sub_region_layer.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-5)
-
-data = data_layer(name='data', size=2016, height=48, width=42)
-indices = data_layer(name='indices', size=6)
-
-scale_sub_region = scale_sub_region_layer(
-    input=data, indices=indices, value=0.0)
-
-outputs(scale_sub_region)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_seq_concat_reshape.py b/python/paddle/trainer_config_helpers/tests/configs/test_seq_concat_reshape.py
deleted file mode 100644
index a6be069e7e..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_seq_concat_reshape.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-5)
-
-din1 = data_layer(name='data1', size=30)
-din2 = data_layer(name='data2', size=30)
-
-opts = []
-opts.append(seq_concat_layer(a=din1, b=din2))
-opts.append(seq_reshape_layer(input=din1, reshape_size=5))
-
-outputs(opts)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py
deleted file mode 100644
index 510ad32208..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python
-#coding=utf-8
-from paddle.trainer_config_helpers import *
-
-input_seq = data_layer("word", size=128)
-starts = data_layer("starts", size=5)
-ends = data_layer("ends", size=5)
-
-seq_slice1 = seq_slice_layer(input=input_seq, starts=starts, ends=ends)
-seq_slice2 = seq_slice_layer(input=input_seq, starts=starts, ends=None)
-seq_slice3 = seq_slice_layer(input=input_seq, starts=None, ends=ends)
-
-outputs(seq_slice1, seq_slice2, seq_slice3)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py b/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py
deleted file mode 100644
index 7b951a4cd7..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py
+++ /dev/null
@@ -1,43 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(learning_rate=1e-4, batch_size=1000)
-
-din = data_layer(name='dat_in', size=100)
-
-POOL_TYPE = [MaxPooling, AvgPooling, SumPooling]
-
-AGG_LEVEL = [AggregateLevel.TO_SEQUENCE, AggregateLevel.TO_NO_SEQUENCE]
-
-opts = []
-
-for pt in POOL_TYPE:
-    for al in AGG_LEVEL:
-        opts.append(pooling_layer(input=din, agg_level=al, pooling_type=pt()))
-
-for pt in POOL_TYPE:
-    opts.append(
-        pooling_layer(
-            input=din,
-            agg_level=AggregateLevel.TO_NO_SEQUENCE,
-            pooling_type=pt(),
-            stride=5))
-
-opts.append(
-    pooling_layer(
-        input=din, pooling_type=MaxPooling(output_max_index=True)))
-
-outputs(opts)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py b/python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py
deleted file mode 100644
index 32a4e6f6d0..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-data = data_layer(name='input', size=300)
-lbl = data_layer(name='label', size=300)
-smooth_l1 = smooth_l1_cost(input=data, label=lbl)
-
-outputs(smooth_l1)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_split_datasource.py b/python/paddle/trainer_config_helpers/tests/configs/test_split_datasource.py
deleted file mode 100644
index ea68b5493e..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_split_datasource.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-define_py_data_sources2(
-    train_list="train.list",
-    test_list="test.list",
-    module=["a", "b"],
-    obj=("c", "d"))
-settings(learning_rate=1e-3, batch_size=1000)
-
-outputs(data_layer(name="a", size=10))
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py
deleted file mode 100644
index 0e692d4b62..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=100, learning_rate=1e-5)
-
-data = data_layer(name='data', size=3200, height=20, width=10)
-
-spp = spp_layer(
-    input=data, pyramid_height=2, num_channels=16, pool_type=MaxPooling())
-
-outputs(spp)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_sub_nested_seq_select_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_sub_nested_seq_select_layer.py
deleted file mode 100644
index 6d1c3175ba..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/test_sub_nested_seq_select_layer.py
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/usr/bin/env python
-#coding=utf-8
-from paddle.trainer_config_helpers import *
-
-beam_size = 5
-
-data = data_layer(name='input_seq', size=300)
-selected_ids = data_layer(name='input', size=beam_size)
-sub_nest_seq = sub_nested_seq_layer(input=data, selected_indices=selected_ids)
-
-outputs(sub_nest_seq)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/unused_layers.py b/python/paddle/trainer_config_helpers/tests/configs/unused_layers.py
deleted file mode 100644
index 8878e73fff..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/unused_layers.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-settings(batch_size=1000, learning_rate=1e-4)
-
-probs = data_layer(name='probs', size=100)
-
-outputs(
-    sampling_id_layer(input=probs),  # It seems not support training
-
-    # It seems this layer is not correct, and should be rewrite.
-    # block_expand_layer(input=probs, channel=1, block_x=1, block_y=3),
-)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/util_layers.py b/python/paddle/trainer_config_helpers/tests/configs/util_layers.py
deleted file mode 100644
index da134f100b..0000000000
--- a/python/paddle/trainer_config_helpers/tests/configs/util_layers.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(learning_rate=1e-4, batch_size=1000)
-
-a = data_layer(name='a', size=10)
-b = data_layer(name='b', size=10)
-
-result = addto_layer(input=[a, b])
-concat1 = concat_layer(input=[a, b])
-concat2 = concat_layer(
-    input=[identity_projection(input=a), identity_projection(input=b)])
-
-outputs(result, concat1, concat2)
diff --git a/python/paddle/trainer_config_helpers/tests/layers_test.py b/python/paddle/trainer_config_helpers/tests/layers_test.py
deleted file mode 100644
index b3dd8f8fc7..0000000000
--- a/python/paddle/trainer_config_helpers/tests/layers_test.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer.config_parser import parse_config_and_serialize
-
-if __name__ == '__main__':
-    parse_config_and_serialize(
-        'trainer_config_helpers/tests/layers_test_config.py', '')
-# layers_test_config.py
diff --git a/python/paddle/trainer_config_helpers/tests/layers_test_config.py b/python/paddle/trainer_config_helpers/tests/layers_test_config.py
deleted file mode 100644
index e6cd35ee76..0000000000
--- a/python/paddle/trainer_config_helpers/tests/layers_test_config.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-num_classes = 5
-
-x = data_layer(name="input1", size=3)
-y = data_layer(name="input2", size=5)
-
-z = out_prod_layer(input1=x, input2=y)
-
-x1 = fc_layer(input=x, size=5)
-y1 = fc_layer(input=y, size=5)
-
-z1 = mixed_layer(
-    act=LinearActivation(),
-    input=[
-        conv_operator(
-            img=x1,
-            filter=y1,
-            filter_size=1,
-            num_filters=5,
-            num_channels=5,
-            stride=1)
-    ])
-
-assert z1.size > 0
-
-y2 = fc_layer(input=y, size=15)
-z2 = rotate_layer(input=y2, height=5, width=3)
-
-cos1 = cos_sim(a=x1, b=y1)
-cos3 = cos_sim(a=x1, b=y2, size=3)
-
-linear_comb = linear_comb_layer(weights=x1, vectors=y2, size=3)
-
-out = fc_layer(
-    input=[cos1, cos3, linear_comb, z, z1, z2],
-    size=num_classes,
-    act=SoftmaxActivation())
-
-print_layer(input=[out])
-
-outputs(classification_cost(out, data_layer(name="label", size=num_classes)))
-
-dotmul = mixed_layer(
-    input=[dotmul_operator(
-        a=x1, b=x1), dotmul_projection(input=y1)])
-
-proj_with_attr_init = mixed_layer(
-    input=full_matrix_projection(
-        input=y1,
-        param_attr=ParamAttr(
-            learning_rate=0, initial_mean=0, initial_std=0)),
-    bias_attr=ParamAttr(
-        initial_mean=0, initial_std=0, learning_rate=0),
-    act=LinearActivation(),
-    size=5,
-    name='proj_with_attr_init')
-
-# for ctc
-tmp = fc_layer(
-    input=[x1, dotmul, proj_with_attr_init],
-    size=num_classes + 1,
-    act=SoftmaxActivation())
-ctc = ctc_layer(input=tmp, label=y, size=num_classes + 1)
-ctc_eval = ctc_error_evaluator(input=tmp, label=y)
-
-settings(
-    batch_size=10,
-    learning_rate=2e-3,
-    learning_method=AdamOptimizer(),
-    regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25)
diff --git a/python/paddle/trainer_config_helpers/tests/test_reset_hook.py b/python/paddle/trainer_config_helpers/tests/test_reset_hook.py
deleted file mode 100644
index 4d7542c35b..0000000000
--- a/python/paddle/trainer_config_helpers/tests/test_reset_hook.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-from paddle.trainer.config_parser import parse_config
-
-
-class TestParse(unittest.TestCase):
-    def test_parse(self):
-        a = parse_config('trainer_config_helpers/tests/layers_test_config.py',
-                         '')
-        b = parse_config('trainer_config_helpers/tests/layers_test_config.py',
-                         '')
-        self.assertEqual(a, b)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/trainer_config_helpers/utils.py b/python/paddle/trainer_config_helpers/utils.py
deleted file mode 100644
index fe6e9cd53c..0000000000
--- a/python/paddle/trainer_config_helpers/utils.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer.config_parser import logger
-import functools
-
-__all__ = ['deprecated']
-
-
-def deprecated(instead):
-    def __impl__(func):
-        @functools.wraps(func)
-        def __wrapper__(*args, **kwargs):
-            logger.warning("The interface %s is deprecated, "
-                           "will be removed soon. Please use %s instead." %
-                           (func.__name__, instead))
-
-            return func(*args, **kwargs)
-
-        return __wrapper__
-
-    return __impl__
diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py
deleted file mode 100644
index df710c33d0..0000000000
--- a/python/paddle/v2/__init__.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import optimizer
-import layer
-import activation
-import parameters
-import trainer
-import event
-import data_type
-import topology
-import networks
-import evaluator
-from . import dataset
-from . import reader
-from . import plot
-import attr
-import op
-import pooling
-import inference
-import networks
-import minibatch
-import plot
-import image
-import paddle.trainer.config_parser as cp
-
-__all__ = [
-    'default_startup_program',
-    'default_main_program',
-    'optimizer',
-    'layer',
-    'activation',
-    'parameters',
-    'init',
-    'trainer',
-    'event',
-    'data_type',
-    'attr',
-    'pooling',
-    'dataset',
-    'reader',
-    'topology',
-    'networks',
-    'infer',
-    'plot',
-    'evaluator',
-    'image',
-    'master',
-]
-
-cp.begin_parse()
-
-
-def set_env_vars(trainer_count):
-    '''Auto set CPU environment if have not set before.
-       For MKL:
-         export KMP_AFFINITY, OMP_DYNAMIC according to the Hyper Threading status.
-         export OMP_NUM_THREADS, MKL_NUM_THREADS according to trainer_count.
-       For OpenBLAS:
-         export OPENBLAS_NUM_THREADS, OPENBLAS_MAIN_FREE according to trainer_count. 
-    '''
-    import platform, paddle
-    if not platform.system() in ['Linux', 'Darwin']:
-        return
-
-    def set_env(key, value):
-        '''If the key has not been set in the environment, set it with value.'''
-        assert isinstance(key, str)
-        assert isinstance(value, str)
-        envset = os.environ.get(key)
-        if envset is None:
-            os.environ[key] = value
-
-    def num_physical_cores():
-        '''Get the number of physical cores'''
-        if platform.system() == "Linux":
-            num_sockets = int(
-                os.popen("grep 'physical id' /proc/cpuinfo | sort -u | wc -l")
-                .read())
-            num_cores_per_socket = int(
-                os.popen("grep 'core id' /proc/cpuinfo | sort -u | wc -l")
-                .read())
-            return num_sockets * num_cores_per_socket
-        else:
-            cmds = {"Darwin": "sysctl -n hw.physicalcpu"}
-            return int(os.popen(cmds.get(platform.system(), "expr 1")).read())
-
-    def num_logical_processors():
-        '''Get the number of logical processors'''
-        cmds = {
-            "Linux": "grep \"processor\" /proc/cpuinfo|sort -u|wc -l",
-            "Darwin": "sysctl -n hw.logicalcpu"
-        }
-        return int(os.popen(cmds.get(platform.system(), "expr 1")).read())
-
-    num_cores = num_physical_cores()
-    num_processors = num_logical_processors()
-    if paddle.version.mkl() == 'ON':
-        if num_processors > num_cores:  # Hyper Threading is enabled
-            set_env("OMP_DYNAMIC", "true")
-            set_env("KMP_AFFINITY", "granularity=fine,compact,1,0")
-        else:
-            set_env("OMP_DYNAMIC", "false")
-            set_env("KMP_AFFINITY", "granularity=fine,compact,0,0")
-    threads = num_processors / trainer_count
-    threads = '1' if threads < 1 else str(threads)
-    if paddle.version.mkl() == 'ON':
-        set_env("OMP_NUM_THREADS", threads)
-        set_env("MKL_NUM_THREADS", threads)
-    else:
-        set_env("OPENBLAS_NUM_THREADS", threads)
-        if threads > 1:
-            set_env("OPENBLAS_MAIN_FREE", '1')
-
-
-def init(**kwargs):
-    import py_paddle.swig_paddle as api
-    args = []
-    args_dict = {}
-    # NOTE: append arguments if they are in ENV
-    for ek, ev in os.environ.iteritems():
-        if ek.startswith("PADDLE_INIT_"):
-            args_dict[ek.replace("PADDLE_INIT_", "").lower()] = str(ev)
-
-    args_dict.update(kwargs)
-    # NOTE: overwrite arguments from ENV if it is in kwargs
-    for key in args_dict.keys():
-        args.append('--%s=%s' % (key, str(args_dict[key])))
-
-    set_env_vars(kwargs.get('trainer_count', 1))
-
-    if 'use_gpu' in kwargs:
-        cp.g_command_config_args['use_gpu'] = kwargs['use_gpu']
-    if 'use_mkldnn' in kwargs:
-        cp.g_command_config_args['use_mkldnn'] = kwargs['use_mkldnn']
-    if 'use_mkl_packed' in kwargs:
-        cp.g_command_config_args['use_mkl_packed'] = kwargs['use_mkl_packed']
-    assert 'parallel_nn' not in kwargs, ("currently 'parallel_nn' is not "
-                                         "supported in v2 APIs.")
-
-    api.initPaddle(*args)
-
-
-infer = inference.infer
-batch = minibatch.batch
diff --git a/python/paddle/v2/activation.py b/python/paddle/v2/activation.py
deleted file mode 100644
index 21261a1782..0000000000
--- a/python/paddle/v2/activation.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle.trainer_config_helpers.activations
-import copy
-
-__all__ = []
-
-suffix = 'Activation'
-for act in paddle.trainer_config_helpers.activations.__all__:
-    new_name = act[:-len(suffix)]
-    globals()[new_name] = copy.copy(
-        getattr(paddle.trainer_config_helpers.activations, act))
-    globals()[new_name].__name__ = new_name
-    __all__.append(new_name)
diff --git a/python/paddle/v2/attr.py b/python/paddle/v2/attr.py
deleted file mode 100644
index 5d23894d73..0000000000
--- a/python/paddle/v2/attr.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle.trainer_config_helpers.attrs
-
-__all__ = [
-    "Param",
-    "Extra",
-    "Hook",
-]
-
-Param = paddle.trainer_config_helpers.attrs.ParameterAttribute
-Extra = paddle.trainer_config_helpers.attrs.ExtraLayerAttribute
-Hook = paddle.trainer_config_helpers.attrs.HookAttribute
-
-for each in paddle.trainer_config_helpers.attrs.__all__:
-    globals()[each] = getattr(paddle.trainer_config_helpers.attrs, each)
-    __all__.append(each)
diff --git a/python/paddle/v2/config_base.py b/python/paddle/v2/config_base.py
deleted file mode 100644
index d9613e001a..0000000000
--- a/python/paddle/v2/config_base.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import collections
-import re
-import paddle.trainer_config_helpers as conf_helps
-
-__layer_map__ = {}
-
-
-def __map_docstr__(doc, name):
-    if doc is None:
-        return doc
-
-    assert isinstance(doc, basestring)
-
-    # replace LayerOutput to paddle.v2.config_base.Layer
-    doc = doc.replace("LayerOutput", "paddle.v2.config_base.Layer")
-
-    doc = doc.replace('ParameterAttribute', 'paddle.v2.attr.ParameterAttribute')
-
-    doc = re.sub(r'ExtraLayerAttribute[^\s]?', 'paddle.v2.attr.ExtraAttribute',
-                 doc)
-
-    # xxx_layer to xxx
-    doc = re.sub(r"(?P<name>[a-z]+)_layer", r"\g<name>", doc)
-
-    # XxxxActivation to paddle.v2.activation.Xxxx
-    doc = re.sub(r"(?P<name>[A-Z][a-zA-Z]+)Activation",
-                 r"paddle.v2.activation.\g<name>", doc)
-
-    # xxx_evaluator to paddle.v2.evaluator.xxx
-    doc = re.sub(r"(?P<name>[a-z]+)_evaluator", r"evaluator.\g<name>", doc)
-
-    # TODO(yuyang18): Add more rules if needed.
-    return doc
-
-
-def __convert_to_v2__(f, name, module):
-    def wrapped(*args, **xargs):
-        out = f(*args, **xargs)
-        outs = out
-        if not isinstance(out, collections.Sequence):
-            outs = [out]
-        for l in outs:
-            if isinstance(l, conf_helps.LayerOutput):
-                __layer_map__[l.full_name] = l
-        return out
-
-    wrapped.__doc__ = __map_docstr__(f.__doc__, name)
-    wrapped.__name__ = name
-    wrapped.__module__ = module
-
-    return wrapped
-
-
-Layer = conf_helps.LayerOutput
diff --git a/python/paddle/v2/data_feeder.py b/python/paddle/v2/data_feeder.py
deleted file mode 100644
index 98dfb85a0e..0000000000
--- a/python/paddle/v2/data_feeder.py
+++ /dev/null
@@ -1,133 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from py_paddle import DataProviderConverter
-import collections
-import paddle.trainer.PyDataProvider2 as pydp2
-
-__all__ = ['DataFeeder']
-
-
-def default_feeding_map(data_types):
-    reader_dict = dict()
-    for i, tp in enumerate(data_types):
-        reader_dict[tp[0]] = i
-    return reader_dict
-
-
-class DataFeeder(DataProviderConverter):
-    """
-    DataFeeder converts the data returned by paddle.reader into a data structure
-    of Arguments which is defined in the API. The paddle.reader usually returns
-    a list of mini-batch data entries. Each data entry in the list is one sample.
-    Each sample is a list or a tuple with one feature or multiple features.
-    DataFeeder converts this mini-batch data entries into Arguments in order
-    to feed it to C++ interface.
-    
-    The simple usage shows below
-
-    ..  code-block:: python
-
-        feeding = ['image', 'label']
-        data_types = enumerate_data_types_of_data_layers(topology)
-        feeder = DataFeeder(data_types=data_types, feeding=feeding)
-
-        minibatch_data = [([1.0, 2.0, 3.0, ...], 5)]
-
-        arg = feeder(minibatch_data)
-
-
-    If mini-batch data and data layers are not one to one mapping, we
-    could pass a dictionary to feeding parameter to represent the mapping
-    relationship.
-
-
-    ..  code-block:: python
-
-        data_types = [('image', paddle.data_type.dense_vector(784)),
-                      ('label', paddle.data_type.integer_value(10))]
-        feeding = {'image':0, 'label':1}
-        feeder = DataFeeder(data_types=data_types, feeding=feeding)
-        minibatch_data = [
-                           ( [1.0,2.0,3.0,4.0], 5, [6,7,8] ),  # first sample
-                           ( [1.0,2.0,3.0,4.0], 5, [6,7,8] )   # second sample
-                         ]
-        # or minibatch_data = [
-        #                       [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ],  # first sample
-        #                       [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ]   # second sample
-        #                     ]
-        arg = feeder.convert(minibatch_data)
-
-    ..  note::
-
-        This module is for internal use only. Users should use the `reader`
-        interface.
-
-
-
-    :param data_types: A list to specify data name and type. Each item is
-                       a tuple of (data_name, data_type).
-
-    :type data_types: list
-    :param feeding: A dictionary or a sequence to specify the position of each
-                    data in the input data.
-    :type feeding: dict|collections.Sequence|None
-    """
-
-    def __init__(self, data_types, feeding=None):
-        self.input_names = []
-        input_types = []
-        if feeding is None:
-            feeding = default_feeding_map(data_types)
-        elif isinstance(feeding, collections.Sequence):
-            feed_list = feeding
-            feeding = dict()
-            for i, name in enumerate(feed_list):
-                feeding[name] = i
-        elif not isinstance(feeding, dict):
-            raise TypeError("Feeding should be dict or sequence or None.")
-
-        self.feeding = feeding
-        for each in data_types:
-            self.input_names.append(each[0])
-            if not isinstance(each[1], pydp2.InputType):
-                raise TypeError("second item in each data_type should be an "
-                                "InputType")
-            input_types.append(each[1])
-        DataProviderConverter.__init__(self, input_types)
-
-    def __len__(self):
-        return len(self.input_names)
-
-    def convert(self, dat, argument=None):
-        """
-        :param dat: A list of mini-batch data. Each sample is a list or tuple
-                    one feature or multiple features.
-
-        :type dat: list
-        :param argument: An Arguments object contains this mini-batch data with
-                         one or multiple features. The Arguments definition is
-                         in the API.
-        :type argument: py_paddle.swig_paddle.Arguments
-        """
-
-        def reorder_data(data):
-            retv = []
-            for each in data:
-                reorder = []
-                for name in self.input_names:
-                    reorder.append(each[self.feeding[name]])
-                retv.append(reorder)
-            return retv
-
-        return DataProviderConverter.convert(self, reorder_data(dat), argument)
diff --git a/python/paddle/v2/data_type.py b/python/paddle/v2/data_type.py
deleted file mode 100644
index 226997465f..0000000000
--- a/python/paddle/v2/data_type.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle.trainer.PyDataProvider2 as pydp2
-
-import_list = [
-    nm for nm in dir(pydp2)
-    if '_' in nm and nm[0] != '_' and ('value' in nm or 'vector' in nm or
-                                       'array' in nm)
-]
-import_list.extend(['InputType'])
-
-for nm in import_list:
-    globals()[nm] = getattr(pydp2, nm)
-
-__all__ = import_list
diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/v2/dataset/__init__.py
deleted file mode 100644
index 38056fe0a9..0000000000
--- a/python/paddle/v2/dataset/__init__.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Dataset package.
-"""
-
-import mnist
-import imikolov
-import imdb
-import cifar
-import movielens
-import conll05
-import uci_housing
-import sentiment
-import wmt14
-import wmt16
-import mq2007
-import flowers
-import voc2012
-
-__all__ = [
-    'mnist',
-    'imikolov',
-    'imdb',
-    'cifar',
-    'movielens',
-    'conll05',
-    'sentiment',
-    'uci_housing',
-    'wmt14',
-    'wmt16',
-    'mq2007',
-    'flowers',
-    'voc2012',
-]
diff --git a/python/paddle/v2/dataset/cifar.py b/python/paddle/v2/dataset/cifar.py
deleted file mode 100644
index 662655c836..0000000000
--- a/python/paddle/v2/dataset/cifar.py
+++ /dev/null
@@ -1,148 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-CIFAR dataset.
-
-This module will download dataset from
-https://www.cs.toronto.edu/~kriz/cifar.html and parse train/test set into
-paddle reader creators.
-
-The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes,
-with 6000 images per class. There are 50000 training images and 10000 test
-images.
-
-The CIFAR-100 dataset is just like the CIFAR-10, except it has 100 classes
-containing 600 images each. There are 500 training images and 100 testing
-images per class.
-
-"""
-
-import cPickle
-import itertools
-import numpy
-import paddle.v2.dataset.common
-import tarfile
-
-__all__ = ['train100', 'test100', 'train10', 'test10', 'convert']
-
-URL_PREFIX = 'https://www.cs.toronto.edu/~kriz/'
-CIFAR10_URL = URL_PREFIX + 'cifar-10-python.tar.gz'
-CIFAR10_MD5 = 'c58f30108f718f92721af3b95e74349a'
-CIFAR100_URL = URL_PREFIX + 'cifar-100-python.tar.gz'
-CIFAR100_MD5 = 'eb9058c3a382ffc7106e4002c42a8d85'
-
-
-def reader_creator(filename, sub_name, cycle=False):
-    def read_batch(batch):
-        data = batch['data']
-        labels = batch.get('labels', batch.get('fine_labels', None))
-        assert labels is not None
-        for sample, label in itertools.izip(data, labels):
-            yield (sample / 255.0).astype(numpy.float32), int(label)
-
-    def reader():
-        with tarfile.open(filename, mode='r') as f:
-            names = (each_item.name for each_item in f
-                     if sub_name in each_item.name)
-
-            while True:
-                for name in names:
-                    batch = cPickle.load(f.extractfile(name))
-                    for item in read_batch(batch):
-                        yield item
-                if not cycle:
-                    break
-
-    return reader
-
-
-def train100():
-    """
-    CIFAR-100 training set creator.
-
-    It returns a reader creator, each sample in the reader is image pixels in
-    [0, 1] and label in [0, 99].
-
-    :return: Training reader creator
-    :rtype: callable
-    """
-    return reader_creator(
-        paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5),
-        'train')
-
-
-def test100():
-    """
-    CIFAR-100 test set creator.
-
-    It returns a reader creator, each sample in the reader is image pixels in
-    [0, 1] and label in [0, 9].
-
-    :return: Test reader creator.
-    :rtype: callable
-    """
-    return reader_creator(
-        paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5),
-        'test')
-
-
-def train10(cycle=False):
-    """
-    CIFAR-10 training set creator.
-
-    It returns a reader creator, each sample in the reader is image pixels in
-    [0, 1] and label in [0, 9].
-
-    :param cycle: whether to cycle through the dataset
-    :type cycle: bool
-    :return: Training reader creator
-    :rtype: callable
-    """
-    return reader_creator(
-        paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
-        'data_batch',
-        cycle=cycle)
-
-
-def test10(cycle=False):
-    """
-    CIFAR-10 test set creator.
-
-    It returns a reader creator, each sample in the reader is image pixels in
-    [0, 1] and label in [0, 9].
-
-    :param cycle: whether to cycle through the dataset
-    :type cycle: bool
-    :return: Test reader creator.
-    :rtype: callable
-    """
-    return reader_creator(
-        paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
-        'test_batch',
-        cycle=cycle)
-
-
-def fetch():
-    paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5)
-    paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5)
-
-
-def convert(path):
-    """
-    Converts dataset to recordio format
-    """
-    paddle.v2.dataset.common.convert(path, train100(), 1000, "cifar_train100")
-    paddle.v2.dataset.common.convert(path, test100(), 1000, "cifar_test100")
-    paddle.v2.dataset.common.convert(path, train10(), 1000, "cifar_train10")
-    paddle.v2.dataset.common.convert(path, test10(), 1000, "cifar_test10")
diff --git a/python/paddle/v2/dataset/common.py b/python/paddle/v2/dataset/common.py
deleted file mode 100644
index c6ff09a1d1..0000000000
--- a/python/paddle/v2/dataset/common.py
+++ /dev/null
@@ -1,236 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import requests
-import hashlib
-import os
-import errno
-import shutil
-import sys
-import importlib
-import paddle.v2.dataset
-import cPickle
-import glob
-import cPickle as pickle
-
-__all__ = [
-    'DATA_HOME',
-    'download',
-    'md5file',
-    'split',
-    'cluster_files_reader',
-    'convert',
-]
-
-DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
-
-
-# When running unit tests, there could be multiple processes that
-# trying to create DATA_HOME directory simultaneously, so we cannot
-# use a if condition to check for the existence of the directory;
-# instead, we use the filesystem as the synchronization mechanism by
-# catching returned errors.
-def must_mkdirs(path):
-    try:
-        os.makedirs(DATA_HOME)
-    except OSError as exc:
-        if exc.errno != errno.EEXIST:
-            raise
-        pass
-
-
-must_mkdirs(DATA_HOME)
-
-
-def md5file(fname):
-    hash_md5 = hashlib.md5()
-    f = open(fname, "rb")
-    for chunk in iter(lambda: f.read(4096), b""):
-        hash_md5.update(chunk)
-    f.close()
-    return hash_md5.hexdigest()
-
-
-def download(url, module_name, md5sum, save_name=None):
-    dirname = os.path.join(DATA_HOME, module_name)
-    if not os.path.exists(dirname):
-        os.makedirs(dirname)
-
-    filename = os.path.join(dirname,
-                            url.split('/')[-1]
-                            if save_name is None else save_name)
-
-    retry = 0
-    retry_limit = 3
-    while not (os.path.exists(filename) and md5file(filename) == md5sum):
-        if os.path.exists(filename):
-            print "file md5", md5file(filename), md5sum
-        if retry < retry_limit:
-            retry += 1
-        else:
-            raise RuntimeError("Cannot download {0} within retry limit {1}".
-                               format(url, retry_limit))
-        print "Cache file %s not found, downloading %s" % (filename, url)
-        r = requests.get(url, stream=True)
-        total_length = r.headers.get('content-length')
-
-        if total_length is None:
-            with open(filename, 'w') as f:
-                shutil.copyfileobj(r.raw, f)
-        else:
-            with open(filename, 'w') as f:
-                dl = 0
-                total_length = int(total_length)
-                for data in r.iter_content(chunk_size=4096):
-                    dl += len(data)
-                    f.write(data)
-                    done = int(50 * dl / total_length)
-                    sys.stdout.write("\r[%s%s]" % ('=' * done,
-                                                   ' ' * (50 - done)))
-                    sys.stdout.flush()
-
-    return filename
-
-
-def fetch_all():
-    for module_name in filter(lambda x: not x.startswith("__"),
-                              dir(paddle.v2.dataset)):
-        if "fetch" in dir(
-                importlib.import_module("paddle.v2.dataset.%s" % module_name)):
-            getattr(
-                importlib.import_module("paddle.v2.dataset.%s" % module_name),
-                "fetch")()
-
-
-def fetch_all_recordio(path):
-    for module_name in filter(lambda x: not x.startswith("__"),
-                              dir(paddle.v2.dataset)):
-        if "convert" in dir(
-                importlib.import_module("paddle.v2.dataset.%s" % module_name)) and \
-                not module_name == "common":
-            ds_path = os.path.join(path, module_name)
-            must_mkdirs(ds_path)
-            getattr(
-                importlib.import_module("paddle.v2.dataset.%s" % module_name),
-                "convert")(ds_path)
-
-
-def split(reader, line_count, suffix="%05d.pickle", dumper=cPickle.dump):
-    """
-    you can call the function as:
-
-    split(paddle.v2.dataset.cifar.train10(), line_count=1000,
-        suffix="imikolov-train-%05d.pickle")
-
-    the output files as:
-
-    |-imikolov-train-00000.pickle
-    |-imikolov-train-00001.pickle
-    |- ...
-    |-imikolov-train-00480.pickle
-
-    :param reader: is a reader creator
-    :param line_count: line count for each file
-    :param suffix: the suffix for the output files, should contain "%d"
-                means the id for each file. Default is "%05d.pickle"
-    :param dumper: is a callable function that dump object to file, this
-                function will be called as dumper(obj, f) and obj is the object
-                will be dumped, f is a file object. Default is cPickle.dump.
-    """
-    if not callable(dumper):
-        raise TypeError("dumper should be callable.")
-    lines = []
-    indx_f = 0
-    for i, d in enumerate(reader()):
-        lines.append(d)
-        if i >= line_count and i % line_count == 0:
-            with open(suffix % indx_f, "w") as f:
-                dumper(lines, f)
-                lines = []
-                indx_f += 1
-    if lines:
-        with open(suffix % indx_f, "w") as f:
-            dumper(lines, f)
-
-
-def cluster_files_reader(files_pattern,
-                         trainer_count,
-                         trainer_id,
-                         loader=cPickle.load):
-    """
-    Create a reader that yield element from the given files, select
-    a file set according trainer count and trainer_id
-
-    :param files_pattern: the files which generating by split(...)
-    :param trainer_count: total trainer count
-    :param trainer_id: the trainer rank id
-    :param loader: is a callable function that load object from file, this
-                function will be called as loader(f) and f is a file object.
-                Default is cPickle.load
-    """
-
-    def reader():
-        if not callable(loader):
-            raise TypeError("loader should be callable.")
-        file_list = glob.glob(files_pattern)
-        file_list.sort()
-        my_file_list = []
-        for idx, fn in enumerate(file_list):
-            if idx % trainer_count == trainer_id:
-                print "append file: %s" % fn
-                my_file_list.append(fn)
-        for fn in my_file_list:
-            with open(fn, "r") as f:
-                lines = loader(f)
-                for line in lines:
-                    yield line
-
-    return reader
-
-
-def convert(output_path, reader, line_count, name_prefix):
-    import recordio
-    """
-    Convert data from reader to recordio format files.
-
-    :param output_path: directory in which output files will be saved.
-    :param reader: a data reader, from which the convert program will read
-                   data instances.
-    :param name_prefix: the name prefix of generated files.
-    :param max_lines_to_shuffle: the max lines numbers to shuffle before
-                                 writing.
-    """
-
-    assert line_count >= 1
-    indx_f = 0
-
-    def write_data(indx_f, lines):
-        filename = "%s/%s-%05d" % (output_path, name_prefix, indx_f)
-        writer = recordio.writer(filename)
-        for l in lines:
-            # FIXME(Yancey1989):
-            # dumps with protocol: pickle.HIGHEST_PROTOCOL
-            writer.write(cPickle.dumps(l))
-        writer.close()
-
-    lines = []
-    for i, d in enumerate(reader()):
-        lines.append(d)
-        if i % line_count == 0 and i >= line_count:
-            write_data(indx_f, lines)
-            lines = []
-            indx_f += 1
-            continue
-
-    write_data(indx_f, lines)
diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/v2/dataset/conll05.py
deleted file mode 100644
index 8312900dc4..0000000000
--- a/python/paddle/v2/dataset/conll05.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Conll05 dataset.
-Paddle semantic role labeling Book and demo use this dataset as an example.
-Because Conll05 is not free in public, the default downloaded URL is test set
-of Conll05 (which is public). Users can change URL and MD5 to their Conll
-dataset. And a pre-trained word vector model based on Wikipedia corpus is used
-to initialize SRL model.
-"""
-
-import tarfile
-import gzip
-import itertools
-import paddle.v2.dataset.common
-
-__all__ = ['test, get_dict', 'get_embedding', 'convert']
-
-DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz'
-DATA_MD5 = '387719152ae52d60422c016e92a742fc'
-WORDDICT_URL = 'http://paddlemodels.bj.bcebos.com/conll05st%2FwordDict.txt'
-WORDDICT_MD5 = 'ea7fb7d4c75cc6254716f0177a506baa'
-VERBDICT_URL = 'http://paddlemodels.bj.bcebos.com/conll05st%2FverbDict.txt'
-VERBDICT_MD5 = '0d2977293bbb6cbefab5b0f97db1e77c'
-TRGDICT_URL = 'http://paddlemodels.bj.bcebos.com/conll05st%2FtargetDict.txt'
-TRGDICT_MD5 = 'd8c7f03ceb5fc2e5a0fa7503a4353751'
-EMB_URL = 'http://paddlemodels.bj.bcebos.com/conll05st%2Femb'
-EMB_MD5 = 'bf436eb0faa1f6f9103017f8be57cdb7'
-
-UNK_IDX = 0
-
-
-def load_label_dict(filename):
-    d = dict()
-    tag_dict = set()
-    with open(filename, 'r') as f:
-        for i, line in enumerate(f):
-            line = line.strip()
-            if line.startswith("B-"):
-                tag_dict.add(line[2:])
-            elif line.startswith("I-"):
-                tag_dict.add(line[2:])
-        index = 0
-        for tag in tag_dict:
-            d["B-" + tag] = index
-            index += 1
-            d["I-" + tag] = index
-            index += 1
-        d["O"] = index
-    return d
-
-
-def load_dict(filename):
-    d = dict()
-    with open(filename, 'r') as f:
-        for i, line in enumerate(f):
-            d[line.strip()] = i
-    return d
-
-
-def corpus_reader(data_path, words_name, props_name):
-    """
-    Read one corpus. It returns an iterator. Each element of
-    this iterator is a tuple including sentence and labels. The sentence is
-    consist of a list of word IDs. The labels include a list of label IDs.
-    :return: a iterator of data.
-    :rtype: iterator
-    """
-
-    def reader():
-        tf = tarfile.open(data_path)
-        wf = tf.extractfile(words_name)
-        pf = tf.extractfile(props_name)
-        with gzip.GzipFile(fileobj=wf) as words_file, gzip.GzipFile(
-                fileobj=pf) as props_file:
-            sentences = []
-            labels = []
-            one_seg = []
-            for word, label in itertools.izip(words_file, props_file):
-                word = word.strip()
-                label = label.strip().split()
-
-                if len(label) == 0:  # end of sentence
-                    for i in xrange(len(one_seg[0])):
-                        a_kind_lable = [x[i] for x in one_seg]
-                        labels.append(a_kind_lable)
-
-                    if len(labels) >= 1:
-                        verb_list = []
-                        for x in labels[0]:
-                            if x != '-':
-                                verb_list.append(x)
-
-                        for i, lbl in enumerate(labels[1:]):
-                            cur_tag = 'O'
-                            is_in_bracket = False
-                            lbl_seq = []
-                            verb_word = ''
-                            for l in lbl:
-                                if l == '*' and is_in_bracket == False:
-                                    lbl_seq.append('O')
-                                elif l == '*' and is_in_bracket == True:
-                                    lbl_seq.append('I-' + cur_tag)
-                                elif l == '*)':
-                                    lbl_seq.append('I-' + cur_tag)
-                                    is_in_bracket = False
-                                elif l.find('(') != -1 and l.find(')') != -1:
-                                    cur_tag = l[1:l.find('*')]
-                                    lbl_seq.append('B-' + cur_tag)
-                                    is_in_bracket = False
-                                elif l.find('(') != -1 and l.find(')') == -1:
-                                    cur_tag = l[1:l.find('*')]
-                                    lbl_seq.append('B-' + cur_tag)
-                                    is_in_bracket = True
-                                else:
-                                    raise RuntimeError('Unexpected label: %s' %
-                                                       l)
-
-                            yield sentences, verb_list[i], lbl_seq
-
-                    sentences = []
-                    labels = []
-                    one_seg = []
-                else:
-                    sentences.append(word)
-                    one_seg.append(label)
-
-        pf.close()
-        wf.close()
-        tf.close()
-
-    return reader
-
-
-def reader_creator(corpus_reader,
-                   word_dict=None,
-                   predicate_dict=None,
-                   label_dict=None):
-    def reader():
-        for sentence, predicate, labels in corpus_reader():
-
-            sen_len = len(sentence)
-
-            verb_index = labels.index('B-V')
-            mark = [0] * len(labels)
-            if verb_index > 0:
-                mark[verb_index - 1] = 1
-                ctx_n1 = sentence[verb_index - 1]
-            else:
-                ctx_n1 = 'bos'
-
-            if verb_index > 1:
-                mark[verb_index - 2] = 1
-                ctx_n2 = sentence[verb_index - 2]
-            else:
-                ctx_n2 = 'bos'
-
-            mark[verb_index] = 1
-            ctx_0 = sentence[verb_index]
-
-            if verb_index < len(labels) - 1:
-                mark[verb_index + 1] = 1
-                ctx_p1 = sentence[verb_index + 1]
-            else:
-                ctx_p1 = 'eos'
-
-            if verb_index < len(labels) - 2:
-                mark[verb_index + 2] = 1
-                ctx_p2 = sentence[verb_index + 2]
-            else:
-                ctx_p2 = 'eos'
-
-            word_idx = [word_dict.get(w, UNK_IDX) for w in sentence]
-
-            ctx_n2_idx = [word_dict.get(ctx_n2, UNK_IDX)] * sen_len
-            ctx_n1_idx = [word_dict.get(ctx_n1, UNK_IDX)] * sen_len
-            ctx_0_idx = [word_dict.get(ctx_0, UNK_IDX)] * sen_len
-            ctx_p1_idx = [word_dict.get(ctx_p1, UNK_IDX)] * sen_len
-            ctx_p2_idx = [word_dict.get(ctx_p2, UNK_IDX)] * sen_len
-
-            pred_idx = [predicate_dict.get(predicate)] * sen_len
-            label_idx = [label_dict.get(w) for w in labels]
-
-            yield word_idx, ctx_n2_idx, ctx_n1_idx, \
-              ctx_0_idx, ctx_p1_idx, ctx_p2_idx, pred_idx, mark, label_idx
-
-    return reader
-
-
-def get_dict():
-    """
-    Get the word, verb and label dictionary of Wikipedia corpus.
-    """
-    word_dict = load_dict(
-        paddle.v2.dataset.common.download(WORDDICT_URL, 'conll05st',
-                                          WORDDICT_MD5))
-    verb_dict = load_dict(
-        paddle.v2.dataset.common.download(VERBDICT_URL, 'conll05st',
-                                          VERBDICT_MD5))
-    label_dict = load_label_dict(
-        paddle.v2.dataset.common.download(TRGDICT_URL, 'conll05st',
-                                          TRGDICT_MD5))
-    return word_dict, verb_dict, label_dict
-
-
-def get_embedding():
-    """
-    Get the trained word vector based on Wikipedia corpus.
-    """
-    return paddle.v2.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5)
-
-
-def test():
-    """
-    Conll05 test set creator.
-
-    Because the training dataset is not free, the test dataset is used for
-    training. It returns a reader creator, each sample in the reader is nine
-    features, including sentence sequence, predicate, predicate context,
-    predicate context flag and tagged sequence.
-
-    :return: Training reader creator
-    :rtype: callable
-    """
-    word_dict, verb_dict, label_dict = get_dict()
-    reader = corpus_reader(
-        paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5),
-        words_name='conll05st-release/test.wsj/words/test.wsj.words.gz',
-        props_name='conll05st-release/test.wsj/props/test.wsj.props.gz')
-    return reader_creator(reader, word_dict, verb_dict, label_dict)
-
-
-def fetch():
-    paddle.v2.dataset.common.download(WORDDICT_URL, 'conll05st', WORDDICT_MD5)
-    paddle.v2.dataset.common.download(VERBDICT_URL, 'conll05st', VERBDICT_MD5)
-    paddle.v2.dataset.common.download(TRGDICT_URL, 'conll05st', TRGDICT_MD5)
-    paddle.v2.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5)
-    paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5)
-
-
-def convert(path):
-    """
-    Converts dataset to recordio format
-    """
-    paddle.v2.dataset.common.convert(path, test(), 1000, "conl105_train")
-    paddle.v2.dataset.common.convert(path, test(), 1000, "conl105_test")
diff --git a/python/paddle/v2/dataset/flowers.py b/python/paddle/v2/dataset/flowers.py
deleted file mode 100644
index db12076d54..0000000000
--- a/python/paddle/v2/dataset/flowers.py
+++ /dev/null
@@ -1,218 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-This module will download dataset from
-http://www.robots.ox.ac.uk/~vgg/data/flowers/102/index.html
-and parse train/test set intopaddle reader creators.
-
-This set contains images of flowers belonging to 102 different categories.
-The images were acquired by searching the web and taking pictures. There are a
-minimum of 40 images for each category.
-
-The database was used in:
-
-Nilsback, M-E. and Zisserman, A. Automated flower classification over a large
- number of classes.Proceedings of the Indian Conference on Computer Vision,
-Graphics and Image Processing (2008)
-http://www.robots.ox.ac.uk/~vgg/publications/papers/nilsback08.{pdf,ps.gz}.
-
-"""
-import cPickle
-import itertools
-import functools
-from common import download
-import tarfile
-import scipy.io as scio
-from paddle.v2.image import *
-from paddle.v2.reader import *
-import os
-import numpy as np
-from multiprocessing import cpu_count
-__all__ = ['train', 'test', 'valid']
-
-DATA_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz'
-LABEL_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat'
-SETID_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/setid.mat'
-DATA_MD5 = '33bfc11892f1e405ca193ae9a9f2a118'
-LABEL_MD5 = 'e0620be6f572b9609742df49c70aed4d'
-SETID_MD5 = 'a5357ecc9cb78c4bef273ce3793fc85c'
-# In official 'readme', tstid is the flag of test data
-# and trnid is the flag of train data. But test data is more than train data.
-# So we exchange the train data and test data.
-TRAIN_FLAG = 'tstid'
-TEST_FLAG = 'trnid'
-VALID_FLAG = 'valid'
-
-
-def default_mapper(is_train, sample):
-    '''
-    map image bytes data to type needed by model input layer
-    '''
-    img, label = sample
-    img = load_image_bytes(img)
-    img = simple_transform(
-        img, 256, 224, is_train, mean=[103.94, 116.78, 123.68])
-    return img.flatten().astype('float32'), label
-
-
-train_mapper = functools.partial(default_mapper, True)
-test_mapper = functools.partial(default_mapper, False)
-
-
-def reader_creator(data_file,
-                   label_file,
-                   setid_file,
-                   dataset_name,
-                   mapper,
-                   buffered_size=1024,
-                   use_xmap=True,
-                   cycle=False):
-    '''
-    1. read images from tar file and
-        merge images into batch files in 102flowers.tgz_batch/
-    2. get a reader to read sample from batch file
-
-    :param data_file: downloaded data file
-    :type data_file: string
-    :param label_file: downloaded label file
-    :type label_file: string
-    :param setid_file: downloaded setid file containing information
-                        about how to split dataset
-    :type setid_file: string
-    :param dataset_name: data set name (tstid|trnid|valid)
-    :type dataset_name: string
-    :param mapper: a function to map image bytes data to type
-                    needed by model input layer
-    :type mapper: callable
-    :param buffered_size: the size of buffer used to process images
-    :type buffered_size: int
-    :param cycle: whether to cycle through the dataset
-    :type cycle: bool
-    :return: data reader
-    :rtype: callable
-    '''
-    labels = scio.loadmat(label_file)['labels'][0]
-    indexes = scio.loadmat(setid_file)[dataset_name][0]
-    img2label = {}
-    for i in indexes:
-        img = "jpg/image_%05d.jpg" % i
-        img2label[img] = labels[i - 1]
-    file_list = batch_images_from_tar(data_file, dataset_name, img2label)
-
-    def reader():
-        while True:
-            for file in open(file_list):
-                file = file.strip()
-                batch = None
-                with open(file, 'r') as f:
-                    batch = cPickle.load(f)
-                data = batch['data']
-                labels = batch['label']
-                for sample, label in itertools.izip(data, batch['label']):
-                    yield sample, int(label) - 1
-            if not cycle:
-                break
-
-    if use_xmap:
-        cpu_num = int(os.environ.get('CPU_NUM', cpu_count()))
-        return xmap_readers(mapper, reader, cpu_num, buffered_size)
-    else:
-        return map_readers(mapper, reader)
-
-
-def train(mapper=train_mapper, buffered_size=1024, use_xmap=True, cycle=False):
-    '''
-    Create flowers training set reader.
-    It returns a reader, each sample in the reader is
-    image pixels in [0, 1] and label in [1, 102]
-    translated from original color image by steps:
-    1. resize to 256*256
-    2. random crop to 224*224
-    3. flatten
-    :param mapper:  a function to map sample.
-    :type mapper: callable
-    :param buffered_size: the size of buffer used to process images
-    :type buffered_size: int
-    :param cycle: whether to cycle through the dataset
-    :type cycle: bool
-    :return: train data reader
-    :rtype: callable
-    '''
-    return reader_creator(
-        download(DATA_URL, 'flowers', DATA_MD5),
-        download(LABEL_URL, 'flowers', LABEL_MD5),
-        download(SETID_URL, 'flowers', SETID_MD5),
-        TRAIN_FLAG,
-        mapper,
-        buffered_size,
-        use_xmap,
-        cycle=cycle)
-
-
-def test(mapper=test_mapper, buffered_size=1024, use_xmap=True, cycle=False):
-    '''
-    Create flowers test set reader.
-    It returns a reader, each sample in the reader is
-    image pixels in [0, 1] and label in [1, 102]
-    translated from original color image by steps:
-    1. resize to 256*256
-    2. random crop to 224*224
-    3. flatten
-    :param mapper:  a function to map sample.
-    :type mapper: callable
-    :param buffered_size: the size of buffer used to process images
-    :type buffered_size: int
-    :param cycle: whether to cycle through the dataset
-    :type cycle: bool
-    :return: test data reader
-    :rtype: callable
-    '''
-    return reader_creator(
-        download(DATA_URL, 'flowers', DATA_MD5),
-        download(LABEL_URL, 'flowers', LABEL_MD5),
-        download(SETID_URL, 'flowers', SETID_MD5),
-        TEST_FLAG,
-        mapper,
-        buffered_size,
-        use_xmap,
-        cycle=cycle)
-
-
-def valid(mapper=test_mapper, buffered_size=1024, use_xmap=True):
-    '''
-    Create flowers validation set reader.
-    It returns a reader, each sample in the reader is
-    image pixels in [0, 1] and label in [1, 102]
-    translated from original color image by steps:
-    1. resize to 256*256
-    2. random crop to 224*224
-    3. flatten
-    :param mapper:  a function to map sample.
-    :type mapper: callable
-    :param buffered_size: the size of buffer used to process images
-    :type buffered_size: int
-    :return: test data reader
-    :rtype: callable
-    '''
-    return reader_creator(
-        download(DATA_URL, 'flowers', DATA_MD5),
-        download(LABEL_URL, 'flowers', LABEL_MD5),
-        download(SETID_URL, 'flowers', SETID_MD5), VALID_FLAG, mapper,
-        buffered_size, use_xmap)
-
-
-def fetch():
-    download(DATA_URL, 'flowers', DATA_MD5)
-    download(LABEL_URL, 'flowers', LABEL_MD5)
-    download(SETID_URL, 'flowers', SETID_MD5)
diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/v2/dataset/imdb.py
deleted file mode 100644
index 00c2a3b992..0000000000
--- a/python/paddle/v2/dataset/imdb.py
+++ /dev/null
@@ -1,148 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-IMDB dataset.
-
-This module downloads IMDB dataset from
-http://ai.stanford.edu/%7Eamaas/data/sentiment/. This dataset contains a set
-of 25,000 highly polar movie reviews for training, and 25,000 for testing.
-Besides, this module also provides API for building dictionary.
-"""
-
-import paddle.v2.dataset.common
-import collections
-import tarfile
-import re
-import string
-
-__all__ = ['build_dict', 'train', 'test', 'convert']
-
-URL = 'http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz'
-MD5 = '7c2ac02c03563afcf9b574c7e56c153a'
-
-
-def tokenize(pattern):
-    """
-    Read files that match the given pattern.  Tokenize and yield each file.
-    """
-
-    with tarfile.open(paddle.v2.dataset.common.download(URL, 'imdb',
-                                                        MD5)) as tarf:
-        # Note that we should use tarfile.next(), which does
-        # sequential access of member files, other than
-        # tarfile.extractfile, which does random access and might
-        # destroy hard disks.
-        tf = tarf.next()
-        while tf != None:
-            if bool(pattern.match(tf.name)):
-                # newline and punctuations removal and ad-hoc tokenization.
-                yield tarf.extractfile(tf).read().rstrip("\n\r").translate(
-                    None, string.punctuation).lower().split()
-            tf = tarf.next()
-
-
-def build_dict(pattern, cutoff):
-    """
-    Build a word dictionary from the corpus. Keys of the dictionary are words,
-    and values are zero-based IDs of these words.
-    """
-    word_freq = collections.defaultdict(int)
-    for doc in tokenize(pattern):
-        for word in doc:
-            word_freq[word] += 1
-
-    # Not sure if we should prune less-frequent words here.
-    word_freq = filter(lambda x: x[1] > cutoff, word_freq.items())
-
-    dictionary = sorted(word_freq, key=lambda x: (-x[1], x[0]))
-    words, _ = list(zip(*dictionary))
-    word_idx = dict(zip(words, xrange(len(words))))
-    word_idx['<unk>'] = len(words)
-    return word_idx
-
-
-def reader_creator(pos_pattern, neg_pattern, word_idx):
-    UNK = word_idx['<unk>']
-    INS = []
-
-    def load(pattern, out, label):
-        for doc in tokenize(pattern):
-            out.append(([word_idx.get(w, UNK) for w in doc], label))
-
-    load(pos_pattern, INS, 0)
-    load(neg_pattern, INS, 1)
-
-    def reader():
-        for doc, label in INS:
-            yield doc, label
-
-    return reader
-
-
-def train(word_idx):
-    """
-    IMDB training set creator.
-
-    It returns a reader creator, each sample in the reader is an zero-based ID
-    sequence and label in [0, 1].
-
-    :param word_idx: word dictionary
-    :type word_idx: dict
-    :return: Training reader creator
-    :rtype: callable
-    """
-    return reader_creator(
-        re.compile("aclImdb/train/pos/.*\.txt$"),
-        re.compile("aclImdb/train/neg/.*\.txt$"), word_idx)
-
-
-def test(word_idx):
-    """
-    IMDB test set creator.
-
-    It returns a reader creator, each sample in the reader is an zero-based ID
-    sequence and label in [0, 1].
-
-    :param word_idx: word dictionary
-    :type word_idx: dict
-    :return: Test reader creator
-    :rtype: callable
-    """
-    return reader_creator(
-        re.compile("aclImdb/test/pos/.*\.txt$"),
-        re.compile("aclImdb/test/neg/.*\.txt$"), word_idx)
-
-
-def word_dict(cutoff=150):
-    """
-    Build a word dictionary from the corpus.
-
-    :return: Word dictionary
-    :rtype: dict
-    """
-    return build_dict(
-        re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), cutoff)
-
-
-def fetch():
-    paddle.v2.dataset.common.download(URL, 'imdb', MD5)
-
-
-def convert(path):
-    """
-    Converts dataset to recordio format
-    """
-    w = word_dict()
-    paddle.v2.dataset.common.convert(path, lambda: train(w), 1000, "imdb_train")
-    paddle.v2.dataset.common.convert(path, lambda: test(w), 1000, "imdb_test")
diff --git a/python/paddle/v2/dataset/imikolov.py b/python/paddle/v2/dataset/imikolov.py
deleted file mode 100644
index 617c722c41..0000000000
--- a/python/paddle/v2/dataset/imikolov.py
+++ /dev/null
@@ -1,161 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-imikolov's simple dataset.
-
-This module will download dataset from 
-http://www.fit.vutbr.cz/~imikolov/rnnlm/ and parse training set and test set
-into paddle reader creators.
-"""
-import paddle.v2.dataset.common
-import collections
-import tarfile
-
-__all__ = ['train', 'test', 'build_dict', 'convert']
-
-URL = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz'
-MD5 = '30177ea32e27c525793142b6bf2c8e2d'
-
-
-class DataType(object):
-    NGRAM = 1
-    SEQ = 2
-
-
-def word_count(f, word_freq=None):
-    if word_freq is None:
-        word_freq = collections.defaultdict(int)
-
-    for l in f:
-        for w in l.strip().split():
-            word_freq[w] += 1
-        word_freq['<s>'] += 1
-        word_freq['<e>'] += 1
-
-    return word_freq
-
-
-def build_dict(min_word_freq=50):
-    """
-    Build a word dictionary from the corpus,  Keys of the dictionary are words,
-    and values are zero-based IDs of these words.
-    """
-    train_filename = './simple-examples/data/ptb.train.txt'
-    test_filename = './simple-examples/data/ptb.valid.txt'
-    with tarfile.open(
-            paddle.v2.dataset.common.download(
-                paddle.v2.dataset.imikolov.URL, 'imikolov',
-                paddle.v2.dataset.imikolov.MD5)) as tf:
-        trainf = tf.extractfile(train_filename)
-        testf = tf.extractfile(test_filename)
-        word_freq = word_count(testf, word_count(trainf))
-        if '<unk>' in word_freq:
-            # remove <unk> for now, since we will set it as last index
-            del word_freq['<unk>']
-
-        word_freq = filter(lambda x: x[1] > min_word_freq, word_freq.items())
-
-        word_freq_sorted = sorted(word_freq, key=lambda x: (-x[1], x[0]))
-        words, _ = list(zip(*word_freq_sorted))
-        word_idx = dict(zip(words, xrange(len(words))))
-        word_idx['<unk>'] = len(words)
-
-    return word_idx
-
-
-def reader_creator(filename, word_idx, n, data_type):
-    def reader():
-        with tarfile.open(
-                paddle.v2.dataset.common.download(
-                    paddle.v2.dataset.imikolov.URL, 'imikolov',
-                    paddle.v2.dataset.imikolov.MD5)) as tf:
-            f = tf.extractfile(filename)
-
-            UNK = word_idx['<unk>']
-            for l in f:
-                if DataType.NGRAM == data_type:
-                    assert n > -1, 'Invalid gram length'
-                    l = ['<s>'] + l.strip().split() + ['<e>']
-                    if len(l) >= n:
-                        l = [word_idx.get(w, UNK) for w in l]
-                        for i in range(n, len(l) + 1):
-                            yield tuple(l[i - n:i])
-                elif DataType.SEQ == data_type:
-                    l = l.strip().split()
-                    l = [word_idx.get(w, UNK) for w in l]
-                    src_seq = [word_idx['<s>']] + l
-                    trg_seq = l + [word_idx['<e>']]
-                    if n > 0 and len(src_seq) > n: continue
-                    yield src_seq, trg_seq
-                else:
-                    assert False, 'Unknow data type'
-
-    return reader
-
-
-def train(word_idx, n, data_type=DataType.NGRAM):
-    """
-    imikolov training set creator.
-
-    It returns a reader creator, each sample in the reader is a word ID
-    tuple.
-
-    :param word_idx: word dictionary
-    :type word_idx: dict
-    :param n: sliding window size if type is ngram, otherwise max length of sequence
-    :type n: int
-    :param data_type: data type (ngram or sequence)
-    :type data_type: member variable of DataType (NGRAM or SEQ)
-    :return: Training reader creator
-    :rtype: callable
-    """
-    return reader_creator('./simple-examples/data/ptb.train.txt', word_idx, n,
-                          data_type)
-
-
-def test(word_idx, n, data_type=DataType.NGRAM):
-    """
-    imikolov test set creator.
-
-    It returns a reader creator, each sample in the reader is a word ID
-    tuple.
-
-    :param word_idx: word dictionary
-    :type word_idx: dict
-    :param n: sliding window size if type is ngram, otherwise max length of sequence
-    :type n: int
-    :param data_type: data type (ngram or sequence)
-    :type data_type: member variable of DataType (NGRAM or SEQ)
-    :return: Test reader creator
-    :rtype: callable
-    """
-    return reader_creator('./simple-examples/data/ptb.valid.txt', word_idx, n,
-                          data_type)
-
-
-def fetch():
-    paddle.v2.dataset.common.download(URL, "imikolov", MD5)
-
-
-def convert(path):
-    """
-    Converts dataset to recordio format
-    """
-    N = 5
-    word_dict = build_dict()
-    paddle.v2.dataset.common.convert(path,
-                                     train(word_dict, N), 1000,
-                                     "imikolov_train")
-    paddle.v2.dataset.common.convert(path,
-                                     test(word_dict, N), 1000, "imikolov_test")
diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/v2/dataset/mnist.py
deleted file mode 100644
index 026cf501cf..0000000000
--- a/python/paddle/v2/dataset/mnist.py
+++ /dev/null
@@ -1,129 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-MNIST dataset.
-
-This module will download dataset from http://yann.lecun.com/exdb/mnist/ and
-parse training set and test set into paddle reader creators.
-"""
-import paddle.v2.dataset.common
-import subprocess
-import numpy
-import platform
-__all__ = ['train', 'test', 'convert']
-
-URL_PREFIX = 'http://yann.lecun.com/exdb/mnist/'
-TEST_IMAGE_URL = URL_PREFIX + 't10k-images-idx3-ubyte.gz'
-TEST_IMAGE_MD5 = '9fb629c4189551a2d022fa330f9573f3'
-TEST_LABEL_URL = URL_PREFIX + 't10k-labels-idx1-ubyte.gz'
-TEST_LABEL_MD5 = 'ec29112dd5afa0611ce80d1b7f02629c'
-TRAIN_IMAGE_URL = URL_PREFIX + 'train-images-idx3-ubyte.gz'
-TRAIN_IMAGE_MD5 = 'f68b3c2dcbeaaa9fbdd348bbdeb94873'
-TRAIN_LABEL_URL = URL_PREFIX + 'train-labels-idx1-ubyte.gz'
-TRAIN_LABEL_MD5 = 'd53e105ee54ea40749a09fcbcd1e9432'
-
-
-def reader_creator(image_filename, label_filename, buffer_size):
-    def reader():
-        if platform.system() == 'Darwin':
-            zcat_cmd = 'gzcat'
-        elif platform.system() == 'Linux':
-            zcat_cmd = 'zcat'
-        else:
-            raise NotImplementedError()
-
-        # According to http://stackoverflow.com/a/38061619/724872, we
-        # cannot use standard package gzip here.
-        m = subprocess.Popen([zcat_cmd, image_filename], stdout=subprocess.PIPE)
-        m.stdout.read(16)  # skip some magic bytes
-
-        l = subprocess.Popen([zcat_cmd, label_filename], stdout=subprocess.PIPE)
-        l.stdout.read(8)  # skip some magic bytes
-
-        try:  # reader could be break.
-            while True:
-                labels = numpy.fromfile(
-                    l.stdout, 'ubyte', count=buffer_size).astype("int")
-
-                if labels.size != buffer_size:
-                    break  # numpy.fromfile returns empty slice after EOF.
-
-                images = numpy.fromfile(
-                    m.stdout, 'ubyte', count=buffer_size * 28 * 28).reshape(
-                        (buffer_size, 28 * 28)).astype('float32')
-
-                images = images / 255.0 * 2.0 - 1.0
-
-                for i in xrange(buffer_size):
-                    yield images[i, :], int(labels[i])
-        finally:
-            try:
-                m.terminate()
-            except:
-                pass
-            try:
-                l.terminate()
-            except:
-                pass
-
-    return reader
-
-
-def train():
-    """
-    MNIST training set creator.
-
-    It returns a reader creator, each sample in the reader is image pixels in
-    [0, 1] and label in [0, 9].
-
-    :return: Training reader creator
-    :rtype: callable
-    """
-    return reader_creator(
-        paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist',
-                                          TRAIN_IMAGE_MD5),
-        paddle.v2.dataset.common.download(TRAIN_LABEL_URL, 'mnist',
-                                          TRAIN_LABEL_MD5), 100)
-
-
-def test():
-    """
-    MNIST test set creator.
-
-    It returns a reader creator, each sample in the reader is image pixels in
-    [0, 1] and label in [0, 9].
-
-    :return: Test reader creator.
-    :rtype: callable
-    """
-    return reader_creator(
-        paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist',
-                                          TEST_IMAGE_MD5),
-        paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist',
-                                          TEST_LABEL_MD5), 100)
-
-
-def fetch():
-    paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', TRAIN_IMAGE_MD5)
-    paddle.v2.dataset.common.download(TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5)
-    paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist', TEST_IMAGE_MD5)
-    paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist', TEST_LABEL_MD5)
-
-
-def convert(path):
-    """
-    Converts dataset to recordio format
-    """
-    paddle.v2.dataset.common.convert(path, train(), 1000, "minist_train")
-    paddle.v2.dataset.common.convert(path, test(), 1000, "minist_test")
diff --git a/python/paddle/v2/dataset/movielens.py b/python/paddle/v2/dataset/movielens.py
deleted file mode 100644
index 5b61a9420a..0000000000
--- a/python/paddle/v2/dataset/movielens.py
+++ /dev/null
@@ -1,262 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Movielens 1-M dataset.
-
-Movielens 1-M dataset contains 1 million ratings from 6000 users on 4000
-movies, which was collected by GroupLens Research. This module will download
-Movielens 1-M dataset from 
-http://files.grouplens.org/datasets/movielens/ml-1m.zip and parse training
-set and test set into paddle reader creators.
-
-"""
-
-import zipfile
-import paddle.v2.dataset.common
-import re
-import random
-import functools
-
-__all__ = [
-    'train', 'test', 'get_movie_title_dict', 'max_movie_id', 'max_user_id',
-    'age_table', 'movie_categories', 'max_job_id', 'user_info', 'movie_info',
-    'convert'
-]
-
-age_table = [1, 18, 25, 35, 45, 50, 56]
-
-URL = 'http://files.grouplens.org/datasets/movielens/ml-1m.zip'
-MD5 = 'c4d9eecfca2ab87c1945afe126590906'
-
-
-class MovieInfo(object):
-    """
-    Movie id, title and categories information are stored in MovieInfo.
-    """
-
-    def __init__(self, index, categories, title):
-        self.index = int(index)
-        self.categories = categories
-        self.title = title
-
-    def value(self):
-        """
-        Get information from a movie.
-        """
-        return [
-            self.index, [CATEGORIES_DICT[c] for c in self.categories],
-            [MOVIE_TITLE_DICT[w.lower()] for w in self.title.split()]
-        ]
-
-    def __str__(self):
-        return "<MovieInfo id(%d), title(%s), categories(%s)>" % (
-            self.index, self.title, self.categories)
-
-    def __repr__(self):
-        return self.__str__()
-
-
-class UserInfo(object):
-    """
-    User id, gender, age, and job information are stored in UserInfo.
-    """
-
-    def __init__(self, index, gender, age, job_id):
-        self.index = int(index)
-        self.is_male = gender == 'M'
-        self.age = age_table.index(int(age))
-        self.job_id = int(job_id)
-
-    def value(self):
-        """
-        Get information from a user.
-        """
-        return [self.index, 0 if self.is_male else 1, self.age, self.job_id]
-
-    def __str__(self):
-        return "<UserInfo id(%d), gender(%s), age(%d), job(%d)>" % (
-            self.index, "M"
-            if self.is_male else "F", age_table[self.age], self.job_id)
-
-    def __repr__(self):
-        return str(self)
-
-
-MOVIE_INFO = None
-MOVIE_TITLE_DICT = None
-CATEGORIES_DICT = None
-USER_INFO = None
-
-
-def __initialize_meta_info__():
-    fn = paddle.v2.dataset.common.download(URL, "movielens", MD5)
-    global MOVIE_INFO
-    if MOVIE_INFO is None:
-        pattern = re.compile(r'^(.*)\((\d+)\)$')
-        with zipfile.ZipFile(file=fn) as package:
-            for info in package.infolist():
-                assert isinstance(info, zipfile.ZipInfo)
-                MOVIE_INFO = dict()
-                title_word_set = set()
-                categories_set = set()
-                with package.open('ml-1m/movies.dat') as movie_file:
-                    for i, line in enumerate(movie_file):
-                        movie_id, title, categories = line.strip().split('::')
-                        categories = categories.split('|')
-                        for c in categories:
-                            categories_set.add(c)
-                        title = pattern.match(title).group(1)
-                        MOVIE_INFO[int(movie_id)] = MovieInfo(
-                            index=movie_id, categories=categories, title=title)
-                        for w in title.split():
-                            title_word_set.add(w.lower())
-
-                global MOVIE_TITLE_DICT
-                MOVIE_TITLE_DICT = dict()
-                for i, w in enumerate(title_word_set):
-                    MOVIE_TITLE_DICT[w] = i
-
-                global CATEGORIES_DICT
-                CATEGORIES_DICT = dict()
-                for i, c in enumerate(categories_set):
-                    CATEGORIES_DICT[c] = i
-
-                global USER_INFO
-                USER_INFO = dict()
-                with package.open('ml-1m/users.dat') as user_file:
-                    for line in user_file:
-                        uid, gender, age, job, _ = line.strip().split("::")
-                        USER_INFO[int(uid)] = UserInfo(
-                            index=uid, gender=gender, age=age, job_id=job)
-    return fn
-
-
-def __reader__(rand_seed=0, test_ratio=0.1, is_test=False):
-    fn = __initialize_meta_info__()
-    rand = random.Random(x=rand_seed)
-    with zipfile.ZipFile(file=fn) as package:
-        with package.open('ml-1m/ratings.dat') as rating:
-            for line in rating:
-                if (rand.random() < test_ratio) == is_test:
-                    uid, mov_id, rating, _ = line.strip().split("::")
-                    uid = int(uid)
-                    mov_id = int(mov_id)
-                    rating = float(rating) * 2 - 5.0
-
-                    mov = MOVIE_INFO[mov_id]
-                    usr = USER_INFO[uid]
-                    yield usr.value() + mov.value() + [[rating]]
-
-
-def __reader_creator__(**kwargs):
-    return lambda: __reader__(**kwargs)
-
-
-train = functools.partial(__reader_creator__, is_test=False)
-test = functools.partial(__reader_creator__, is_test=True)
-
-
-def get_movie_title_dict():
-    """
-    Get movie title dictionary.
-    """
-    __initialize_meta_info__()
-    return MOVIE_TITLE_DICT
-
-
-def __max_index_info__(a, b):
-    if a.index > b.index:
-        return a
-    else:
-        return b
-
-
-def max_movie_id():
-    """
-    Get the maximum value of movie id.
-    """
-    __initialize_meta_info__()
-    return reduce(__max_index_info__, MOVIE_INFO.viewvalues()).index
-
-
-def max_user_id():
-    """
-    Get the maximum value of user id.
-    """
-    __initialize_meta_info__()
-    return reduce(__max_index_info__, USER_INFO.viewvalues()).index
-
-
-def __max_job_id_impl__(a, b):
-    if a.job_id > b.job_id:
-        return a
-    else:
-        return b
-
-
-def max_job_id():
-    """
-    Get the maximum value of job id.
-    """
-    __initialize_meta_info__()
-    return reduce(__max_job_id_impl__, USER_INFO.viewvalues()).job_id
-
-
-def movie_categories():
-    """
-    Get movie categoriges dictionary.
-    """
-    __initialize_meta_info__()
-    return CATEGORIES_DICT
-
-
-def user_info():
-    """
-    Get user info dictionary.
-    """
-    __initialize_meta_info__()
-    return USER_INFO
-
-
-def movie_info():
-    """
-    Get movie info dictionary.
-    """
-    __initialize_meta_info__()
-    return MOVIE_INFO
-
-
-def unittest():
-    for train_count, _ in enumerate(train()()):
-        pass
-    for test_count, _ in enumerate(test()()):
-        pass
-
-    print train_count, test_count
-
-
-def fetch():
-    paddle.v2.dataset.common.download(URL, "movielens", MD5)
-
-
-def convert(path):
-    """
-    Converts dataset to recordio format
-    """
-    paddle.v2.dataset.common.convert(path, train(), 1000, "movielens_train")
-    paddle.v2.dataset.common.convert(path, test(), 1000, "movielens_test")
-
-
-if __name__ == '__main__':
-    unittest()
diff --git a/python/paddle/v2/dataset/mq2007.py b/python/paddle/v2/dataset/mq2007.py
deleted file mode 100644
index d3b3dd524c..0000000000
--- a/python/paddle/v2/dataset/mq2007.py
+++ /dev/null
@@ -1,333 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-MQ2007 dataset
-
-MQ2007 is a query set from Million Query track of TREC 2007. There are about 1700 queries in it with labeled documents. In MQ2007, the 5-fold cross
-validation strategy is adopted and the 5-fold partitions are included in the package. In each fold, there are three subsets for learning: training set,
-validation set and testing set.
-
-MQ2007 dataset from website
-http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ2007.rar and parse training set and test set into paddle reader creators
-
-"""
-
-import os
-import functools
-import rarfile
-from common import download
-import numpy as np
-
-# URL = "http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ2007.rar"
-URL = "http://www.bigdatalab.ac.cn/benchmark/upload/download_source/7b6dbbe2-842c-11e4-a536-bcaec51b9163_MQ2007.rar"
-MD5 = "7be1640ae95c6408dab0ae7207bdc706"
-
-
-def __initialize_meta_info__():
-    """
-  download and extract the MQ2007 dataset
-  """
-    fn = fetch()
-    rar = rarfile.RarFile(fn)
-    dirpath = os.path.dirname(fn)
-    rar.extractall(path=dirpath)
-    return dirpath
-
-
-class Query(object):
-    """
-  queries used for learning to rank algorithms. It is created from relevance scores,  query-document feature vectors
-
-  Parameters:
-  ----------
-  query_id : int
-    query_id in dataset, mapping from query to relevance documents
-  relevance_score : int 
-    relevance score of query and document pair
-  feature_vector : array, dense feature
-    feature in vector format
-  description : string
-    comment section in query doc pair data
-  """
-
-    def __init__(self,
-                 query_id=-1,
-                 relevance_score=-1,
-                 feature_vector=None,
-                 description=""):
-        self.query_id = query_id
-        self.relevance_score = relevance_score
-        if feature_vector is None:
-            self.feature_vector = []
-        else:
-            self.feature_vector = feature_vector
-        self.description = description
-
-    def __str__(self):
-        string = "%s %s %s" % (str(self.relevance_score), str(self.query_id),
-                               " ".join(str(f) for f in self.feature_vector))
-        return string
-
-    # @classmethod
-    def _parse_(self, text):
-        """
-    parse line into Query
-    """
-        comment_position = text.find('#')
-        line = text[:comment_position].strip()
-        self.description = text[comment_position + 1:].strip()
-        parts = line.split()
-        if len(parts) != 48:
-            sys.stdout.write("expect 48 space split parts, get %d" %
-                             (len(parts)))
-            return None
-        # format : 0 qid:10 1:0.000272 2:0.000000 .... 
-        self.relevance_score = int(parts[0])
-        self.query_id = int(parts[1].split(':')[1])
-        for p in parts[2:]:
-            pair = p.split(':')
-            self.feature_vector.append(float(pair[1]))
-        return self
-
-
-class QueryList(object):
-    """
-  group query into list, every item in list is a Query
-  """
-
-    def __init__(self, querylist=None):
-        self.query_id = -1
-        if querylist is None:
-            self.querylist = []
-        else:
-            self.querylist = querylist
-            for query in self.querylist:
-                if self.query_id == -1:
-                    self.query_id = query.query_id
-                else:
-                    if self.query_id != query.query_id:
-                        raise ValueError("query in list must be same query_id")
-
-    def __iter__(self):
-        for query in self.querylist:
-            yield query
-
-    def __len__(self):
-        return len(self.querylist)
-
-    def __getitem__(self, i):
-        return self.querylist[i]
-
-    def _correct_ranking_(self):
-        if self.querylist is None:
-            return
-        self.querylist.sort(key=lambda x: x.relevance_score, reverse=True)
-
-    def _add_query(self, query):
-        if self.query_id == -1:
-            self.query_id = query.query_id
-        else:
-            if self.query_id != query.query_id:
-                raise ValueError("query in list must be same query_id")
-        self.querylist.append(query)
-
-
-def gen_plain_txt(querylist):
-    """
-  gen plain text in list for other usage
-  Paramters:
-  --------
-  querylist : querylist, one query match many docment pairs in list, see QueryList
-
-  return :
-  ------
-  query_id : np.array, shape=(samples_num, )
-  label : np.array, shape=(samples_num, )
-  querylist : np.array, shape=(samples_num, feature_dimension)
-    """
-    if not isinstance(querylist, QueryList):
-        querylist = QueryList(querylist)
-    querylist._correct_ranking_()
-    for query in querylist:
-        yield querylist.query_id, query.relevance_score, np.array(
-            query.feature_vector)
-
-
-def gen_point(querylist):
-    """
-  gen item in list for point-wise learning to rank algorithm
-  Paramters:
-  --------
-  querylist : querylist, one query match many docment pairs in list, see QueryList
-
-  return :
-  ------
-  label : np.array, shape=(samples_num, )
-  querylist : np.array, shape=(samples_num, feature_dimension)
-  """
-    if not isinstance(querylist, QueryList):
-        querylist = QueryList(querylist)
-    querylist._correct_ranking_()
-    for query in querylist:
-        yield query.relevance_score, np.array(query.feature_vector)
-
-
-def gen_pair(querylist, partial_order="full"):
-    """
-  gen pair for pair-wise learning to rank algorithm
-  Paramters:
-  --------
-  querylist : querylist, one query match many docment pairs in list, see QueryList
-  pairtial_order : "full" or "neighbour"
-    there is redudant in all possiable pair combinations, which can be simplifed
-  gen pairs for neighbour items or the full partial order pairs
-
-  return :
-  ------
-  label : np.array, shape=(1)
-  query_left : np.array, shape=(1, feature_dimension)
-  query_right : same as left
-  """
-    if not isinstance(querylist, QueryList):
-        querylist = QueryList(querylist)
-    querylist._correct_ranking_()
-    labels = []
-    docpairs = []
-
-    # C(n,2)
-    for i in range(len(querylist)):
-        query_left = querylist[i]
-        for j in range(i + 1, len(querylist)):
-            query_right = querylist[j]
-            if query_left.relevance_score > query_right.relevance_score:
-                labels.append([1])
-                docpairs.append([
-                    np.array(query_left.feature_vector),
-                    np.array(query_right.feature_vector)
-                ])
-            elif query_left.relevance_score < query_right.relevance_score:
-                labels.append([1])
-                docpairs.append([
-                    np.array(query_right.feature_vector),
-                    np.array(query_left.feature_vector)
-                ])
-    for label, pair in zip(labels, docpairs):
-        yield np.array(label), pair[0], pair[1]
-
-
-def gen_list(querylist):
-    """
-  gen item in list for list-wise learning to rank algorithm
-  Paramters:
-  --------
-  querylist : querylist, one query match many docment pairs in list, see QueryList
-
-  return :
-  ------
-  label : np.array, shape=(samples_num, )
-  querylist : np.array, shape=(samples_num, feature_dimension)
-  """
-    if not isinstance(querylist, QueryList):
-        querylist = QueryList(querylist)
-    querylist._correct_ranking_()
-    relevance_score_list = [[query.relevance_score] for query in querylist]
-    feature_vector_list = [query.feature_vector for query in querylist]
-    yield np.array(relevance_score_list), np.array(feature_vector_list)
-
-
-def query_filter(querylists):
-    """
-    filter query get only document with label 0.
-    label 0, 1, 2 means the relevance score document with query
-    parameters :
-      querylist : QueyList list
-
-    return :
-      querylist : QueyList list
-    """
-    filter_query = []
-    for querylist in querylists:
-        relevance_score_list = [query.relevance_score for query in querylist]
-        if sum(relevance_score_list) != .0:
-            filter_query.append(querylist)
-    return filter_query
-
-
-def load_from_text(filepath, shuffle=False, fill_missing=-1):
-    """
-  parse data file into querys
-  """
-    prev_query_id = -1
-    querylists = []
-    querylist = None
-    fn = __initialize_meta_info__()
-    with open(os.path.join(fn, filepath)) as f:
-        for line in f:
-            query = Query()
-            query = query._parse_(line)
-            if query == None:
-                continue
-            if query.query_id != prev_query_id:
-                if querylist is not None:
-                    querylists.append(querylist)
-                querylist = QueryList()
-                prev_query_id = query.query_id
-            querylist._add_query(query)
-    if querylist is not None:
-        querylists.append(querylist)
-    return querylists
-
-
-def __reader__(filepath, format="pairwise", shuffle=False, fill_missing=-1):
-    """
-  Parameters
-  --------
-  filename : string
-  fill_missing : fill the missing value. default in MQ2007 is -1
-  
-  Returns
-  ------
-  yield
-    label query_left, query_right  # format = "pairwise"
-    label querylist # format = "listwise"
-  """
-    querylists = query_filter(
-        load_from_text(
-            filepath, shuffle=shuffle, fill_missing=fill_missing))
-    for querylist in querylists:
-        if format == "plain_txt":
-            yield next(gen_plain_txt(querylist))
-        elif format == "pointwise":
-            yield next(gen_point(querylist))
-        elif format == "pairwise":
-            for pair in gen_pair(querylist):
-                yield pair
-        elif format == "listwise":
-            yield next(gen_list(querylist))
-
-
-train = functools.partial(__reader__, filepath="MQ2007/MQ2007/Fold1/train.txt")
-test = functools.partial(__reader__, filepath="MQ2007/MQ2007/Fold1/test.txt")
-
-
-def fetch():
-    return download(URL, "MQ2007", MD5)
-
-
-if __name__ == "__main__":
-    fetch()
-    mytest = functools.partial(
-        __reader__, filepath="MQ2007/MQ2007/Fold1/sample", format="listwise")
-    for label, query in mytest():
-        print label, query
diff --git a/python/paddle/v2/dataset/sentiment.py b/python/paddle/v2/dataset/sentiment.py
deleted file mode 100644
index b0b9757c1a..0000000000
--- a/python/paddle/v2/dataset/sentiment.py
+++ /dev/null
@@ -1,141 +0,0 @@
-# /usr/bin/env python
-# -*- coding:utf-8 -*-
-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-The script fetch and preprocess movie_reviews data set that provided by NLTK
-
-TODO(yuyang18): Complete dataset.
-"""
-
-import collections
-from itertools import chain
-
-import nltk
-from nltk.corpus import movie_reviews
-
-import paddle.v2.dataset.common
-
-__all__ = ['train', 'test', 'get_word_dict', 'convert']
-NUM_TRAINING_INSTANCES = 1600
-NUM_TOTAL_INSTANCES = 2000
-
-
-def download_data_if_not_yet():
-    """
-    Download the data set, if the data set is not download.
-    """
-    try:
-        # make sure that nltk can find the data
-        if paddle.v2.dataset.common.DATA_HOME not in nltk.data.path:
-            nltk.data.path.append(paddle.v2.dataset.common.DATA_HOME)
-        movie_reviews.categories()
-    except LookupError:
-        print "Downloading movie_reviews data set, please wait....."
-        nltk.download(
-            'movie_reviews', download_dir=paddle.v2.dataset.common.DATA_HOME)
-        print "Download data set success....."
-        print "Path is " + nltk.data.find('corpora/movie_reviews').path
-
-
-def get_word_dict():
-    """
-    Sorted the words by the frequency of words which occur in sample
-    :return:
-        words_freq_sorted
-    """
-    words_freq_sorted = list()
-    word_freq_dict = collections.defaultdict(int)
-    download_data_if_not_yet()
-
-    for category in movie_reviews.categories():
-        for field in movie_reviews.fileids(category):
-            for words in movie_reviews.words(field):
-                word_freq_dict[words] += 1
-    words_sort_list = word_freq_dict.items()
-    words_sort_list.sort(cmp=lambda a, b: b[1] - a[1])
-    for index, word in enumerate(words_sort_list):
-        words_freq_sorted.append((word[0], index))
-    return words_freq_sorted
-
-
-def sort_files():
-    """
-    Sorted the sample for cross reading the sample
-    :return:
-        files_list
-    """
-    files_list = list()
-    neg_file_list = movie_reviews.fileids('neg')
-    pos_file_list = movie_reviews.fileids('pos')
-    files_list = list(chain.from_iterable(zip(neg_file_list, pos_file_list)))
-    return files_list
-
-
-def load_sentiment_data():
-    """
-    Load the data set
-    :return:
-        data_set
-    """
-    data_set = list()
-    download_data_if_not_yet()
-    words_ids = dict(get_word_dict())
-    for sample_file in sort_files():
-        words_list = list()
-        category = 0 if 'neg' in sample_file else 1
-        for word in movie_reviews.words(sample_file):
-            words_list.append(words_ids[word.lower()])
-        data_set.append((words_list, category))
-    return data_set
-
-
-def reader_creator(data):
-    """
-    Reader creator, generate an iterator for data set
-    :param data:
-        train data set or test data set
-    """
-    for each in data:
-        yield each[0], each[1]
-
-
-def train():
-    """
-    Default training set reader creator
-    """
-    data_set = load_sentiment_data()
-    return reader_creator(data_set[0:NUM_TRAINING_INSTANCES])
-
-
-def test():
-    """
-    Default test set reader creator
-    """
-    data_set = load_sentiment_data()
-    return reader_creator(data_set[NUM_TRAINING_INSTANCES:])
-
-
-def fetch():
-    nltk.download(
-        'movie_reviews', download_dir=paddle.v2.dataset.common.DATA_HOME)
-
-
-def convert(path):
-    """
-    Converts dataset to recordio format
-    """
-    paddle.v2.dataset.common.convert(path, train, 1000, "sentiment_train")
-    paddle.v2.dataset.common.convert(path, test, 1000, "sentiment_test")
diff --git a/python/paddle/v2/dataset/tests/cifar_test.py b/python/paddle/v2/dataset/tests/cifar_test.py
deleted file mode 100644
index e0e18229da..0000000000
--- a/python/paddle/v2/dataset/tests/cifar_test.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle.v2.dataset.cifar
-import unittest
-
-
-class TestCIFAR(unittest.TestCase):
-    def check_reader(self, reader):
-        sum = 0
-        label = 0
-        for l in reader():
-            self.assertEqual(l[0].size, 3072)
-            if l[1] > label:
-                label = l[1]
-            sum += 1
-        return sum, label
-
-    def test_test10(self):
-        instances, max_label_value = self.check_reader(
-            paddle.v2.dataset.cifar.test10())
-        self.assertEqual(instances, 10000)
-        self.assertEqual(max_label_value, 9)
-
-    def test_train10(self):
-        instances, max_label_value = self.check_reader(
-            paddle.v2.dataset.cifar.train10())
-        self.assertEqual(instances, 50000)
-        self.assertEqual(max_label_value, 9)
-
-    def test_test100(self):
-        instances, max_label_value = self.check_reader(
-            paddle.v2.dataset.cifar.test100())
-        self.assertEqual(instances, 10000)
-        self.assertEqual(max_label_value, 99)
-
-    def test_train100(self):
-        instances, max_label_value = self.check_reader(
-            paddle.v2.dataset.cifar.train100())
-        self.assertEqual(instances, 50000)
-        self.assertEqual(max_label_value, 99)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/v2/dataset/tests/common_test.py b/python/paddle/v2/dataset/tests/common_test.py
deleted file mode 100644
index cfa194eba3..0000000000
--- a/python/paddle/v2/dataset/tests/common_test.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle.v2.dataset.common
-import unittest
-import tempfile
-import glob
-
-
-class TestCommon(unittest.TestCase):
-    def test_md5file(self):
-        _, temp_path = tempfile.mkstemp()
-        with open(temp_path, 'w') as f:
-            f.write("Hello\n")
-        self.assertEqual('09f7e02f1290be211da707a266f153b3',
-                         paddle.v2.dataset.common.md5file(temp_path))
-
-    def test_download(self):
-        yi_avatar = 'https://avatars0.githubusercontent.com/u/1548775?v=3&s=460'
-        self.assertEqual(
-            paddle.v2.dataset.common.DATA_HOME + '/test/1548775?v=3&s=460',
-            paddle.v2.dataset.common.download(
-                yi_avatar, 'test', 'f75287202d6622414c706c36c16f8e0d'))
-
-    def test_split(self):
-        def test_reader():
-            def reader():
-                for x in xrange(10):
-                    yield x
-
-            return reader
-
-        _, temp_path = tempfile.mkstemp()
-        paddle.v2.dataset.common.split(
-            test_reader(), 4, suffix=temp_path + '/test-%05d.pickle')
-        files = glob.glob(temp_path + '/test-%05d.pickle')
-        self.assertEqual(len(files), 3)
-
-    def test_cluster_file_reader(self):
-        _, temp_path = tempfile.mkstemp()
-        for x in xrange(5):
-            with open(temp_path + '/%05d.test' % x) as f:
-                f.write('%d\n' % x)
-        reader = paddle.v2.dataset.common.cluster_files_reader(
-            temp_path + '/*.test', 5, 0)
-        for idx, e in enumerate(reader()):
-            self.assertEqual(e, str("0"))
-
-    def test_convert(self):
-        record_num = 10
-        num_shards = 4
-
-        def test_reader():
-            def reader():
-                for x in xrange(record_num):
-                    yield x
-
-            return reader
-
-        path = tempfile.mkdtemp()
-        paddle.v2.dataset.common.convert(path,
-                                         test_reader(), num_shards,
-                                         'random_images')
-
-        files = glob.glob(path + '/random_images-*')
-        self.assertEqual(len(files), num_shards)
-
-        recs = []
-        for i in range(0, num_shards):
-            n = "%s/random_images-%05d-of-%05d" % (path, i, num_shards - 1)
-            r = recordio.reader(n)
-            while True:
-                d = r.read()
-                if d is None:
-                    break
-                recs.append(d)
-
-        recs.sort()
-        self.assertEqual(total, record_num)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/v2/dataset/tests/flowers_test.py b/python/paddle/v2/dataset/tests/flowers_test.py
deleted file mode 100644
index a8ae9a07ac..0000000000
--- a/python/paddle/v2/dataset/tests/flowers_test.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle.v2.dataset.flowers
-import unittest
-
-
-class TestFlowers(unittest.TestCase):
-    def check_reader(self, reader):
-        sum = 0
-        label = 0
-        size = 224 * 224 * 3
-        for l in reader():
-            self.assertEqual(l[0].size, size)
-            if l[1] > label:
-                label = l[1]
-            sum += 1
-        return sum, label
-
-    def test_train(self):
-        instances, max_label_value = self.check_reader(
-            paddle.v2.dataset.flowers.train())
-        self.assertEqual(instances, 6149)
-        self.assertEqual(max_label_value, 102)
-
-    def test_test(self):
-        instances, max_label_value = self.check_reader(
-            paddle.v2.dataset.flowers.test())
-        self.assertEqual(instances, 1020)
-        self.assertEqual(max_label_value, 102)
-
-    def test_valid(self):
-        instances, max_label_value = self.check_reader(
-            paddle.v2.dataset.flowers.valid())
-        self.assertEqual(instances, 1020)
-        self.assertEqual(max_label_value, 102)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/v2/dataset/tests/imdb_test.py b/python/paddle/v2/dataset/tests/imdb_test.py
deleted file mode 100644
index c4d82f2689..0000000000
--- a/python/paddle/v2/dataset/tests/imdb_test.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle.v2.dataset.imdb
-import unittest
-import re
-
-TRAIN_POS_PATTERN = re.compile("aclImdb/train/pos/.*\.txt$")
-TRAIN_NEG_PATTERN = re.compile("aclImdb/train/neg/.*\.txt$")
-TRAIN_PATTERN = re.compile("aclImdb/train/.*\.txt$")
-
-TEST_POS_PATTERN = re.compile("aclImdb/test/pos/.*\.txt$")
-TEST_NEG_PATTERN = re.compile("aclImdb/test/neg/.*\.txt$")
-TEST_PATTERN = re.compile("aclImdb/test/.*\.txt$")
-
-
-class TestIMDB(unittest.TestCase):
-    word_idx = None
-
-    def test_build_dict(self):
-        if self.word_idx == None:
-            self.word_idx = paddle.v2.dataset.imdb.build_dict(TRAIN_PATTERN,
-                                                              150)
-
-        self.assertEqual(len(self.word_idx), 7036)
-
-    def check_dataset(self, dataset, expected_size):
-        if self.word_idx == None:
-            self.word_idx = paddle.v2.dataset.imdb.build_dict(TRAIN_PATTERN,
-                                                              150)
-
-        sum = 0
-        for l in dataset(self.word_idx):
-            self.assertEqual(l[1], sum % 2)
-            sum += 1
-        self.assertEqual(sum, expected_size)
-
-    def test_train(self):
-        self.check_dataset(paddle.v2.dataset.imdb.train, 25000)
-
-    def test_test(self):
-        self.check_dataset(paddle.v2.dataset.imdb.test, 25000)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/v2/dataset/tests/imikolov_test.py b/python/paddle/v2/dataset/tests/imikolov_test.py
deleted file mode 100644
index 714a75d6f1..0000000000
--- a/python/paddle/v2/dataset/tests/imikolov_test.py
+++ /dev/null
@@ -1,67 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle.v2.dataset.imikolov
-import unittest
-
-WORD_DICT = paddle.v2.dataset.imikolov.build_dict()
-
-
-class TestMikolov(unittest.TestCase):
-    def check_reader(self, reader, n):
-        for l in reader():
-            self.assertEqual(len(l), n)
-
-    def test_train(self):
-        n = 5
-        self.check_reader(paddle.v2.dataset.imikolov.train(WORD_DICT, n), n)
-
-        first_line = 'aer banknote berlitz calloway centrust cluett fromstein '\
-            'gitano guterman hydro-quebec ipo kia memotec mlx nahb punts '\
-            'rake regatta rubens sim snack-food ssangyong swapo wachter'
-        first_line = [
-            WORD_DICT.get(ch, WORD_DICT['<unk>'])
-            for ch in first_line.split(' ')
-        ]
-        for l in paddle.v2.dataset.imikolov.train(
-                WORD_DICT, n=-1,
-                data_type=paddle.v2.dataset.imikolov.DataType.SEQ)():
-            read_line = l[0][1:]
-            break
-        self.assertEqual(first_line, read_line)
-
-    def test_test(self):
-        n = 5
-        self.check_reader(paddle.v2.dataset.imikolov.test(WORD_DICT, n), n)
-
-        first_line = 'consumers may want to move their telephones a little '\
-                'closer to the tv set'
-        first_line = [
-            WORD_DICT.get(ch, WORD_DICT['<unk>'])
-            for ch in first_line.split(' ')
-        ]
-        for l in paddle.v2.dataset.imikolov.test(
-                WORD_DICT, n=-1,
-                data_type=paddle.v2.dataset.imikolov.DataType.SEQ)():
-            read_line = l[0][1:]
-            break
-        self.assertEqual(first_line, read_line)
-
-    def test_total(self):
-        _, idx = zip(*WORD_DICT.items())
-        self.assertEqual(sorted(idx)[-1], len(WORD_DICT) - 1)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/v2/dataset/tests/mnist_test.py b/python/paddle/v2/dataset/tests/mnist_test.py
deleted file mode 100644
index 1d344cac3e..0000000000
--- a/python/paddle/v2/dataset/tests/mnist_test.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle.v2.dataset.mnist
-import unittest
-
-
-class TestMNIST(unittest.TestCase):
-    def check_reader(self, reader):
-        sum = 0
-        label = 0
-        for l in reader():
-            self.assertEqual(l[0].size, 784)
-            if l[1] > label:
-                label = l[1]
-            sum += 1
-        return sum, label
-
-    def test_train(self):
-        instances, max_label_value = self.check_reader(
-            paddle.v2.dataset.mnist.train())
-        self.assertEqual(instances, 60000)
-        self.assertEqual(max_label_value, 9)
-
-    def test_test(self):
-        instances, max_label_value = self.check_reader(
-            paddle.v2.dataset.mnist.test())
-        self.assertEqual(instances, 10000)
-        self.assertEqual(max_label_value, 9)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/v2/dataset/tests/mq2007_test.py b/python/paddle/v2/dataset/tests/mq2007_test.py
deleted file mode 100644
index 59847b6c18..0000000000
--- a/python/paddle/v2/dataset/tests/mq2007_test.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle.v2.dataset.mq2007
-import unittest
-
-
-class TestMQ2007(unittest.TestCase):
-    def test_pairwise(self):
-        for label, query_left, query_right in paddle.v2.dataset.mq2007.test(
-                format="pairwise"):
-            self.assertEqual(query_left.shape(), (46, ))
-            self.assertEqual(query_right.shape(), (46, ))
-
-    def test_listwise(self):
-        for label_array, query_array in paddle.v2.dataset.mq2007.test(
-                format="listwise"):
-            self.assertEqual(len(label_array), len(query_array))
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/python/paddle/v2/dataset/tests/test_sentiment.py b/python/paddle/v2/dataset/tests/test_sentiment.py
deleted file mode 100644
index 4074052907..0000000000
--- a/python/paddle/v2/dataset/tests/test_sentiment.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# /usr/bin/env python
-# -*- coding:utf-8 -*-
-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-import nltk
-import paddle.v2.dataset.sentiment as st
-from nltk.corpus import movie_reviews
-
-
-class TestSentimentMethods(unittest.TestCase):
-    def test_get_word_dict(self):
-        word_dict = st.get_word_dict()[0:10]
-        test_word_list = [(u',', 0), (u'the', 1), (u'.', 2), (u'a', 3),
-                          (u'and', 4), (u'of', 5), (u'to', 6), (u"'", 7),
-                          (u'is', 8), (u'in', 9)]
-        for idx, each in enumerate(word_dict):
-            self.assertEqual(each, test_word_list[idx])
-        self.assertTrue("/root/.cache/paddle/dataset" in nltk.data.path)
-
-    def test_sort_files(self):
-        last_label = ''
-        for sample_file in st.sort_files():
-            current_label = sample_file.split("/")[0]
-            self.assertNotEqual(current_label, last_label)
-            last_label = current_label
-
-    def test_data_set(self):
-        data_set = st.load_sentiment_data()
-        last_label = -1
-        for each in st.test():
-            self.assertNotEqual(each[1], last_label)
-            last_label = each[1]
-        self.assertEqual(len(data_set), st.NUM_TOTAL_INSTANCES)
-        self.assertEqual(len(list(st.train())), st.NUM_TRAINING_INSTANCES)
-        self.assertEqual(
-            len(list(st.test())),
-            (st.NUM_TOTAL_INSTANCES - st.NUM_TRAINING_INSTANCES))
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/v2/dataset/tests/voc2012_test.py b/python/paddle/v2/dataset/tests/voc2012_test.py
deleted file mode 100644
index 31e72ebf5e..0000000000
--- a/python/paddle/v2/dataset/tests/voc2012_test.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle.v2.dataset.voc2012
-import unittest
-
-
-class TestVOC(unittest.TestCase):
-    def check_reader(self, reader):
-        sum = 0
-        label = 0
-        for l in reader():
-            self.assertEqual(l[0].size, 3 * l[1].size)
-            sum += 1
-        return sum
-
-    def test_train(self):
-        count = self.check_reader(paddle.v2.dataset.voc_seg.train())
-        self.assertEqual(count, 2913)
-
-    def test_test(self):
-        count = self.check_reader(paddle.v2.dataset.voc_seg.test())
-        self.assertEqual(count, 1464)
-
-    def test_val(self):
-        count = self.check_reader(paddle.v2.dataset.voc_seg.val())
-        self.assertEqual(count, 1449)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/v2/dataset/tests/wmt16_test.py b/python/paddle/v2/dataset/tests/wmt16_test.py
deleted file mode 100644
index cef6c3216e..0000000000
--- a/python/paddle/v2/dataset/tests/wmt16_test.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle.v2.dataset.wmt16
-import unittest
-
-
-class TestWMT16(unittest.TestCase):
-    def checkout_one_sample(self, sample):
-        # train data has 3 field: source language word indices,
-        # target language word indices, and target next word indices.
-        self.assertEqual(len(sample), 3)
-
-        # test start mark and end mark in source word indices.
-        self.assertEqual(sample[0][0], 0)
-        self.assertEqual(sample[0][-1], 1)
-
-        # test start mask in target word indices
-        self.assertEqual(sample[1][0], 0)
-
-        # test en mask in target next word indices
-        self.assertEqual(sample[2][-1], 1)
-
-    def test_train(self):
-        for idx, sample in enumerate(
-                paddle.v2.dataset.wmt16.train(
-                    src_dict_size=100000, trg_dict_size=100000)()):
-            if idx >= 10: break
-            self.checkout_one_sample(sample)
-
-    def test_test(self):
-        for idx, sample in enumerate(
-                paddle.v2.dataset.wmt16.test(
-                    src_dict_size=1000, trg_dict_size=1000)()):
-            if idx >= 10: break
-            self.checkout_one_sample(sample)
-
-    def test_val(self):
-        for idx, sample in enumerate(
-                paddle.v2.dataset.wmt16.validation(
-                    src_dict_size=1000, trg_dict_size=1000)()):
-            if idx >= 10: break
-            self.checkout_one_sample(sample)
-
-    def test_get_dict(self):
-        dict_size = 1000
-        word_dict = paddle.v2.dataset.wmt16.get_dict("en", dict_size, True)
-        self.assertEqual(len(word_dict), dict_size)
-        self.assertEqual(word_dict[0], "<s>")
-        self.assertEqual(word_dict[1], "<e>")
-        self.assertEqual(word_dict[2], "<unk>")
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/python/paddle/v2/dataset/uci_housing.py b/python/paddle/v2/dataset/uci_housing.py
deleted file mode 100644
index f10bf7e42a..0000000000
--- a/python/paddle/v2/dataset/uci_housing.py
+++ /dev/null
@@ -1,134 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-UCI Housing dataset.
-
-This module will download dataset from
-https://archive.ics.uci.edu/ml/machine-learning-databases/housing/ and
-parse training set and test set into paddle reader creators.
-"""
-
-import numpy as np
-import os
-import paddle.v2.dataset.common
-from paddle.v2.parameters import Parameters
-
-__all__ = ['train', 'test']
-
-URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data'
-MD5 = 'd4accdce7a25600298819f8e28e8d593'
-feature_names = [
-    'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
-    'PTRATIO', 'B', 'LSTAT', 'convert'
-]
-
-UCI_TRAIN_DATA = None
-UCI_TEST_DATA = None
-URL_MODEL = 'https://github.com/PaddlePaddle/book/raw/develop/01.fit_a_line/fit_a_line.tar'
-MD5_MODEL = '52fc3da8ef3937822fcdd87ee05c0c9b'
-
-
-def feature_range(maximums, minimums):
-    import matplotlib
-    matplotlib.use('Agg')
-    import matplotlib.pyplot as plt
-    fig, ax = plt.subplots()
-    feature_num = len(maximums)
-    ax.bar(range(feature_num), maximums - minimums, color='r', align='center')
-    ax.set_title('feature scale')
-    plt.xticks(range(feature_num), feature_names)
-    plt.xlim([-1, feature_num])
-    fig.set_figheight(6)
-    fig.set_figwidth(10)
-    if not os.path.exists('./image'):
-        os.makedirs('./image')
-    fig.savefig('image/ranges.png', dpi=48)
-    plt.close(fig)
-
-
-def load_data(filename, feature_num=14, ratio=0.8):
-    global UCI_TRAIN_DATA, UCI_TEST_DATA
-    if UCI_TRAIN_DATA is not None and UCI_TEST_DATA is not None:
-        return
-
-    data = np.fromfile(filename, sep=' ')
-    data = data.reshape(data.shape[0] / feature_num, feature_num)
-    maximums, minimums, avgs = data.max(axis=0), data.min(axis=0), data.sum(
-        axis=0) / data.shape[0]
-    feature_range(maximums[:-1], minimums[:-1])
-    for i in xrange(feature_num - 1):
-        data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i])
-    offset = int(data.shape[0] * ratio)
-    UCI_TRAIN_DATA = data[:offset]
-    UCI_TEST_DATA = data[offset:]
-
-
-def train():
-    """
-    UCI_HOUSING training set creator.
-
-    It returns a reader creator, each sample in the reader is features after
-    normalization and price number.
-
-    :return: Training reader creator
-    :rtype: callable
-    """
-    global UCI_TRAIN_DATA
-    load_data(paddle.v2.dataset.common.download(URL, 'uci_housing', MD5))
-
-    def reader():
-        for d in UCI_TRAIN_DATA:
-            yield d[:-1], d[-1:]
-
-    return reader
-
-
-def test():
-    """
-    UCI_HOUSING test set creator.
-
-    It returns a reader creator, each sample in the reader is features after
-    normalization and price number.
-
-    :return: Test reader creator
-    :rtype: callable
-    """
-    global UCI_TEST_DATA
-    load_data(paddle.v2.dataset.common.download(URL, 'uci_housing', MD5))
-
-    def reader():
-        for d in UCI_TEST_DATA:
-            yield d[:-1], d[-1:]
-
-    return reader
-
-
-def model():
-    tar_file = paddle.v2.dataset.common.download(URL_MODEL, 'fit_a_line.tar',
-                                                 MD5_MODEL)
-    with open(tar_file, 'r') as f:
-        parameters = Parameters.from_tar(f)
-    return parameters
-
-
-def fetch():
-    paddle.v2.dataset.common.download(URL, 'uci_housing', MD5)
-
-
-def convert(path):
-    """
-    Converts dataset to recordio format
-    """
-    paddle.v2.dataset.common.convert(path, train(), 1000, "uci_housing_train")
-    paddle.v2.dataset.common.convert(path, test(), 1000, "uci_houseing_test")
diff --git a/python/paddle/v2/dataset/voc2012.py b/python/paddle/v2/dataset/voc2012.py
deleted file mode 100644
index 617e212d67..0000000000
--- a/python/paddle/v2/dataset/voc2012.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Image dataset for segmentation.
-The 2012 dataset contains images from 2008-2011 for which additional
-segmentations have been prepared. As in previous years the assignment
-to training/test sets has been maintained. The total number of images
-with segmentation has been increased from 7,062 to 9,993.
-"""
-
-import tarfile
-import io
-import numpy as np
-from paddle.v2.dataset.common import download
-from paddle.v2.image import *
-from PIL import Image
-
-__all__ = ['train', 'test', 'val']
-
-VOC_URL = 'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/\
-VOCtrainval_11-May-2012.tar'
-
-VOC_MD5 = '6cd6e144f989b92b3379bac3b3de84fd'
-SET_FILE = 'VOCdevkit/VOC2012/ImageSets/Segmentation/{}.txt'
-DATA_FILE = 'VOCdevkit/VOC2012/JPEGImages/{}.jpg'
-LABEL_FILE = 'VOCdevkit/VOC2012/SegmentationClass/{}.png'
-
-CACHE_DIR = 'voc2012'
-
-
-def reader_creator(filename, sub_name):
-
-    tarobject = tarfile.open(filename)
-    name2mem = {}
-    for ele in tarobject.getmembers():
-        name2mem[ele.name] = ele
-
-    def reader():
-        set_file = SET_FILE.format(sub_name)
-        sets = tarobject.extractfile(name2mem[set_file])
-        for line in sets:
-            line = line.strip()
-            data_file = DATA_FILE.format(line)
-            label_file = LABEL_FILE.format(line)
-            data = tarobject.extractfile(name2mem[data_file]).read()
-            label = tarobject.extractfile(name2mem[label_file]).read()
-            data = Image.open(io.BytesIO(data))
-            label = Image.open(io.BytesIO(label))
-            data = np.array(data)
-            label = np.array(label)
-            yield data, label
-
-    return reader
-
-
-def train():
-    """
-    Create a train dataset reader containing 2913 images in HWC order.
-    """
-    return reader_creator(download(VOC_URL, CACHE_DIR, VOC_MD5), 'trainval')
-
-
-def test():
-    """
-    Create a test dataset reader containing 1464 images in HWC order.
-    """
-    return reader_creator(download(VOC_URL, CACHE_DIR, VOC_MD5), 'train')
-
-
-def val():
-    """
-    Create a val dataset reader containing 1449 images in HWC order.
-    """
-    return reader_creator(download(VOC_URL, CACHE_DIR, VOC_MD5), 'val')
diff --git a/python/paddle/v2/dataset/wmt14.py b/python/paddle/v2/dataset/wmt14.py
deleted file mode 100644
index b9e602f324..0000000000
--- a/python/paddle/v2/dataset/wmt14.py
+++ /dev/null
@@ -1,181 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-WMT14 dataset.
-The original WMT14 dataset is too large and a small set of data for set is
-provided. This module will download dataset from
-http://paddlemodels.bj.bcebos.com/wmt/wmt14.tgz and
-parse training set and test set into paddle reader creators.
-
-"""
-import tarfile
-import gzip
-
-import paddle.v2.dataset.common
-from paddle.v2.parameters import Parameters
-
-__all__ = [
-    'train',
-    'test',
-    'get_dict',
-    'convert',
-]
-
-URL_DEV_TEST = ('http://www-lium.univ-lemans.fr/~schwenk/'
-                'cslm_joint_paper/data/dev+test.tgz')
-MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5'
-# this is a small set of data for test. The original data is too large and
-# will be add later.
-URL_TRAIN = ('http://paddlemodels.bj.bcebos.com/wmt/wmt14.tgz')
-MD5_TRAIN = '0791583d57d5beb693b9414c5b36798c'
-# BLEU of this trained model is 26.92
-URL_MODEL = 'http://paddlemodels.bj.bcebos.com/wmt%2Fwmt14.tgz'
-MD5_MODEL = '0cb4a5366189b6acba876491c8724fa3'
-
-START = "<s>"
-END = "<e>"
-UNK = "<unk>"
-UNK_IDX = 2
-
-
-def __read_to_dict(tar_file, dict_size):
-    def __to_dict(fd, size):
-        out_dict = dict()
-        for line_count, line in enumerate(fd):
-            if line_count < size:
-                out_dict[line.strip()] = line_count
-            else:
-                break
-        return out_dict
-
-    with tarfile.open(tar_file, mode='r') as f:
-        names = [
-            each_item.name for each_item in f
-            if each_item.name.endswith("src.dict")
-        ]
-        assert len(names) == 1
-        src_dict = __to_dict(f.extractfile(names[0]), dict_size)
-        names = [
-            each_item.name for each_item in f
-            if each_item.name.endswith("trg.dict")
-        ]
-        assert len(names) == 1
-        trg_dict = __to_dict(f.extractfile(names[0]), dict_size)
-        return src_dict, trg_dict
-
-
-def reader_creator(tar_file, file_name, dict_size):
-    def reader():
-        src_dict, trg_dict = __read_to_dict(tar_file, dict_size)
-        with tarfile.open(tar_file, mode='r') as f:
-            names = [
-                each_item.name for each_item in f
-                if each_item.name.endswith(file_name)
-            ]
-            for name in names:
-                for line in f.extractfile(name):
-                    line_split = line.strip().split('\t')
-                    if len(line_split) != 2:
-                        continue
-                    src_seq = line_split[0]  # one source sequence
-                    src_words = src_seq.split()
-                    src_ids = [
-                        src_dict.get(w, UNK_IDX)
-                        for w in [START] + src_words + [END]
-                    ]
-
-                    trg_seq = line_split[1]  # one target sequence
-                    trg_words = trg_seq.split()
-                    trg_ids = [trg_dict.get(w, UNK_IDX) for w in trg_words]
-
-                    # remove sequence whose length > 80 in training mode
-                    if len(src_ids) > 80 or len(trg_ids) > 80:
-                        continue
-                    trg_ids_next = trg_ids + [trg_dict[END]]
-                    trg_ids = [trg_dict[START]] + trg_ids
-
-                    yield src_ids, trg_ids, trg_ids_next
-
-    return reader
-
-
-def train(dict_size):
-    """
-    WMT14 training set creator.
-
-    It returns a reader creator, each sample in the reader is source language
-    word ID sequence, target language word ID sequence and next word ID
-    sequence.
-
-    :return: Training reader creator
-    :rtype: callable
-    """
-    return reader_creator(
-        paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN),
-        'train/train', dict_size)
-
-
-def test(dict_size):
-    """
-    WMT14 test set creator.
-
-    It returns a reader creator, each sample in the reader is source language
-    word ID sequence, target language word ID sequence and next word ID
-    sequence.
-
-    :return: Test reader creator
-    :rtype: callable
-    """
-    return reader_creator(
-        paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN),
-        'test/test', dict_size)
-
-
-def gen(dict_size):
-    return reader_creator(
-        paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN),
-        'gen/gen', dict_size)
-
-
-def model():
-    tar_file = paddle.v2.dataset.common.download(URL_MODEL, 'wmt14', MD5_MODEL)
-    with gzip.open(tar_file, 'r') as f:
-        parameters = Parameters.from_tar(f)
-    return parameters
-
-
-def get_dict(dict_size, reverse=True):
-    # if reverse = False, return dict = {'a':'001', 'b':'002', ...}
-    # else reverse = true, return dict = {'001':'a', '002':'b', ...}
-    tar_file = paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN)
-    src_dict, trg_dict = __read_to_dict(tar_file, dict_size)
-    if reverse:
-        src_dict = {v: k for k, v in src_dict.items()}
-        trg_dict = {v: k for k, v in trg_dict.items()}
-    return src_dict, trg_dict
-
-
-def fetch():
-    paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN)
-    paddle.v2.dataset.common.download(URL_MODEL, 'wmt14', MD5_MODEL)
-
-
-def convert(path):
-    """
-    Converts dataset to recordio format
-    """
-    dict_size = 30000
-    paddle.v2.dataset.common.convert(path,
-                                     train(dict_size), 1000, "wmt14_train")
-    paddle.v2.dataset.common.convert(path, test(dict_size), 1000, "wmt14_test")
diff --git a/python/paddle/v2/dataset/wmt16.py b/python/paddle/v2/dataset/wmt16.py
deleted file mode 100644
index 5793002091..0000000000
--- a/python/paddle/v2/dataset/wmt16.py
+++ /dev/null
@@ -1,352 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-ACL2016 Multimodal Machine Translation. Please see this website for more
-details: http://www.statmt.org/wmt16/multimodal-task.html#task1
-
-If you use the dataset created for your task, please cite the following paper:
-Multi30K: Multilingual English-German Image Descriptions.
-
-@article{elliott-EtAl:2016:VL16,
- author    = {{Elliott}, D. and {Frank}, S. and {Sima"an}, K. and {Specia}, L.},
- title     = {Multi30K: Multilingual English-German Image Descriptions},
- booktitle = {Proceedings of the 6th Workshop on Vision and Language},
- year      = {2016},
- pages     = {70--74},
- year      = 2016
-}
-"""
-
-import os
-import tarfile
-import gzip
-from collections import defaultdict
-
-import paddle.v2.dataset.common
-
-__all__ = [
-    "train",
-    "test",
-    "validation",
-    "convert",
-    "fetch",
-    "get_dict",
-]
-
-DATA_URL = ("http://cloud.dlnel.org/filepub/"
-            "?uuid=46a0808e-ddd8-427c-bacd-0dbc6d045fed")
-DATA_MD5 = "0c38be43600334966403524a40dcd81e"
-
-TOTAL_EN_WORDS = 11250
-TOTAL_DE_WORDS = 19220
-
-START_MARK = "<s>"
-END_MARK = "<e>"
-UNK_MARK = "<unk>"
-
-
-def __build_dict(tar_file, dict_size, save_path, lang):
-    word_dict = defaultdict(int)
-    with tarfile.open(tar_file, mode="r") as f:
-        for line in f.extractfile("wmt16/train"):
-            line_split = line.strip().split("\t")
-            if len(line_split) != 2: continue
-            sen = line_split[0] if lang == "en" else line_split[1]
-            for w in sen.split():
-                word_dict[w] += 1
-
-    with open(save_path, "w") as fout:
-        fout.write("%s\n%s\n%s\n" % (START_MARK, END_MARK, UNK_MARK))
-        for idx, word in enumerate(
-                sorted(
-                    word_dict.iteritems(), key=lambda x: x[1], reverse=True)):
-            if idx + 3 == dict_size: break
-            fout.write(word[0].encode('utf-8'))
-            fout.write('\n')
-
-
-def __load_dict(tar_file, dict_size, lang, reverse=False):
-    dict_path = os.path.join(paddle.v2.dataset.common.DATA_HOME,
-                             "wmt16/%s_%d.dict" % (lang, dict_size))
-    if not os.path.exists(dict_path) or (
-            len(open(dict_path, "r").readlines()) != dict_size):
-        __build_dict(tar_file, dict_size, dict_path, lang)
-
-    word_dict = {}
-    with open(dict_path, "r") as fdict:
-        for idx, line in enumerate(fdict):
-            if reverse:
-                word_dict[idx] = line.strip()
-            else:
-                word_dict[line.strip()] = idx
-    return word_dict
-
-
-def __get_dict_size(src_dict_size, trg_dict_size, src_lang):
-    src_dict_size = min(src_dict_size, (TOTAL_EN_WORDS if src_lang == "en" else
-                                        TOTAL_DE_WORDS))
-    trg_dict_size = min(trg_dict_size, (TOTAL_DE_WORDS if src_lang == "en" else
-                                        TOTAL_ENG_WORDS))
-    return src_dict_size, trg_dict_size
-
-
-def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang):
-    def reader():
-        src_dict = __load_dict(tar_file, src_dict_size, src_lang)
-        trg_dict = __load_dict(tar_file, trg_dict_size,
-                               ("de" if src_lang == "en" else "en"))
-
-        # the indice for start mark, end mark, and unk are the same in source
-        # language and target language. Here uses the source language
-        # dictionary to determine their indices.
-        start_id = src_dict[START_MARK]
-        end_id = src_dict[END_MARK]
-        unk_id = src_dict[UNK_MARK]
-
-        src_col = 0 if src_lang == "en" else 1
-        trg_col = 1 - src_col
-
-        with tarfile.open(tar_file, mode="r") as f:
-            for line in f.extractfile(file_name):
-                line_split = line.strip().split("\t")
-                if len(line_split) != 2:
-                    continue
-                src_words = line_split[src_col].split()
-                src_ids = [start_id] + [
-                    src_dict.get(w, unk_id) for w in src_words
-                ] + [end_id]
-
-                trg_words = line_split[trg_col].split()
-                trg_ids = [trg_dict.get(w, unk_id) for w in trg_words]
-
-                trg_ids_next = trg_ids + [end_id]
-                trg_ids = [start_id] + trg_ids
-
-                yield src_ids, trg_ids, trg_ids_next
-
-    return reader
-
-
-def train(src_dict_size, trg_dict_size, src_lang="en"):
-    """
-    WMT16 train set reader.
-
-    This function returns the reader for train data. Each sample the reader
-    returns is made up of three fields: the source language word index sequence,
-    target language word index sequence and next word index sequence.
-
-
-    NOTE:
-    The original like for training data is:
-    http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/training.tar.gz
-
-    paddle.dataset.wmt16 provides a tokenized version of the original dataset by
-    using moses's tokenization script:
-    https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/tokenizer.perl
-
-    Args:
-        src_dict_size(int): Size of the source language dictionary. Three
-                            special tokens will be added into the dictionary:
-                            <s> for start mark, <e> for end mark, and <unk> for
-                            unknown word.
-        trg_dict_size(int): Size of the target language dictionary. Three
-                            special tokens will be added into the dictionary:
-                            <s> for start mark, <e> for end mark, and <unk> for
-                            unknown word.
-        src_lang(string): A string indicating which language is the source
-                          language. Available options are: "en" for English
-                          and "de" for Germany.
-
-    Returns:
-        callable: The train reader.
-    """
-
-    if src_lang not in ["en", "de"]:
-        raise ValueError("An error language type.  Only support: "
-                         "en (for English); de(for Germany).")
-    src_dict_size, trg_dict_size = __get_dict_size(src_dict_size, trg_dict_size,
-                                                   src_lang)
-
-    return reader_creator(
-        tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5,
-                                                   "wmt16.tar.gz"),
-        file_name="wmt16/train",
-        src_dict_size=src_dict_size,
-        trg_dict_size=trg_dict_size,
-        src_lang=src_lang)
-
-
-def test(src_dict_size, trg_dict_size, src_lang="en"):
-    """
-    WMT16 test set reader.
-
-    This function returns the reader for test data. Each sample the reader
-    returns is made up of three fields: the source language word index sequence,
-    target language word index sequence and next word index sequence.
-
-    NOTE:
-    The original like for test data is:
-    http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/mmt16_task1_test.tar.gz
-
-    paddle.dataset.wmt16 provides a tokenized version of the original dataset by
-    using moses's tokenization script:
-    https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/tokenizer.perl
-
-    Args:
-        src_dict_size(int): Size of the source language dictionary. Three
-                            special tokens will be added into the dictionary:
-                            <s> for start mark, <e> for end mark, and <unk> for
-                            unknown word.
-        trg_dict_size(int): Size of the target language dictionary. Three
-                            special tokens will be added into the dictionary:
-                            <s> for start mark, <e> for end mark, and <unk> for
-                            unknown word.
-        src_lang(string): A string indicating which language is the source
-                          language. Available options are: "en" for English
-                          and "de" for Germany.
-
-    Returns:
-        callable: The test reader.
-    """
-
-    if src_lang not in ["en", "de"]:
-        raise ValueError("An error language type. "
-                         "Only support: en (for English); de(for Germany).")
-
-    src_dict_size, trg_dict_size = __get_dict_size(src_dict_size, trg_dict_size,
-                                                   src_lang)
-
-    return reader_creator(
-        tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5,
-                                                   "wmt16.tar.gz"),
-        file_name="wmt16/test",
-        src_dict_size=src_dict_size,
-        trg_dict_size=trg_dict_size,
-        src_lang=src_lang)
-
-
-def validation(src_dict_size, trg_dict_size, src_lang="en"):
-    """
-    WMT16 validation set reader.
-
-    This function returns the reader for validation data. Each sample the reader
-    returns is made up of three fields: the source language word index sequence,
-    target language word index sequence and next word index sequence.
-
-    NOTE:
-    The original like for validation data is:
-    http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/validation.tar.gz
-
-    paddle.dataset.wmt16 provides a tokenized version of the original dataset by
-    using moses's tokenization script:
-    https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/tokenizer.perl
-
-    Args:
-        src_dict_size(int): Size of the source language dictionary. Three
-                            special tokens will be added into the dictionary:
-                            <s> for start mark, <e> for end mark, and <unk> for
-                            unknown word.
-        trg_dict_size(int): Size of the target language dictionary. Three
-                            special tokens will be added into the dictionary:
-                            <s> for start mark, <e> for end mark, and <unk> for
-                            unknown word.
-        src_lang(string): A string indicating which language is the source
-                          language. Available options are: "en" for English
-                          and "de" for Germany.
-
-    Returns:
-        callable: The validation reader.
-    """
-    if src_lang not in ["en", "de"]:
-        raise ValueError("An error language type. "
-                         "Only support: en (for English); de(for Germany).")
-    src_dict_size, trg_dict_size = __get_dict_size(src_dict_size, trg_dict_size,
-                                                   src_lang)
-
-    return reader_creator(
-        tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5,
-                                                   "wmt16.tar.gz"),
-        file_name="wmt16/val",
-        src_dict_size=src_dict_size,
-        trg_dict_size=trg_dict_size,
-        src_lang=src_lang)
-
-
-def get_dict(lang, dict_size, reverse=False):
-    """
-    return the word dictionary for the specified language.
-
-    Args:
-        lang(string): A string indicating which language is the source
-                      language. Available options are: "en" for English
-                      and "de" for Germany.
-        dict_size(int): Size of the specified language dictionary.
-        reverse(bool): If reverse is set to False, the returned python
-                       dictionary will use word as key and use index as value.
-                       If reverse is set to True, the returned python
-                       dictionary will use index as key and word as value.
-
-    Returns:
-        dict: The word dictionary for the specific language.
-    """
-
-    if lang == "en":
-        dict_size = min(dict_size, TOTAL_EN_WORDS)
-    else:
-        dict_size = min(dict_size, TOTAL_DE_WORDS)
-
-    dict_path = os.path.join(paddle.v2.dataset.common.DATA_HOME,
-                             "wmt16/%s_%d.dict" % (lang, dict_size))
-    assert os.path.exists(dict_path), "Word dictionary does not exist. "
-    "Please invoke paddle.dataset.wmt16.train/test/validation first "
-    "to build the dictionary."
-    tar_file = os.path.join(paddle.v2.dataset.common.DATA_HOME, "wmt16.tar.gz")
-    return __load_dict(tar_file, dict_size, lang, reverse)
-
-
-def fetch():
-    """download the entire dataset.
-    """
-    paddle.v4.dataset.common.download(DATA_URL, "wmt16", DATA_MD5,
-                                      "wmt16.tar.gz")
-
-
-def convert(path, src_dict_size, trg_dict_size, src_lang):
-    """Converts dataset to recordio format.
-    """
-
-    paddle.v2.dataset.common.convert(
-        path,
-        train(
-            src_dict_size=src_dict_size,
-            trg_dict_size=trg_dict_size,
-            src_lang=src_lang),
-        1000,
-        "wmt16_train")
-    paddle.v2.dataset.common.convert(
-        path,
-        test(
-            src_dict_size=src_dict_size,
-            trg_dict_size=trg_dict_size,
-            src_lang=src_lang),
-        1000,
-        "wmt16_test")
-    paddle.v2.dataset.common.convert(
-        path,
-        validation(
-            src_dict_size=src_dict_size,
-            trg_dict_size=trg_dict_size,
-            src_lang=src_lang),
-        1000,
-        "wmt16_validation")
diff --git a/python/paddle/v2/evaluator.py b/python/paddle/v2/evaluator.py
deleted file mode 100644
index eaaadbe53b..0000000000
--- a/python/paddle/v2/evaluator.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle.trainer_config_helpers.evaluators as evs
-from config_base import __convert_to_v2__
-import inspect
-
-__all__ = []
-
-
-def initialize():
-    def convert_to_new_name(nm):
-        return nm[:-len("_evaluator")]
-
-    for __ev_name__ in filter(lambda x: x.endswith('_evaluator'), evs.__all__):
-        __ev__ = getattr(evs, __ev_name__)
-        __new_name__ = convert_to_new_name(__ev_name__)
-
-        globals()[__new_name__] = __convert_to_v2__(__ev__, __new_name__,
-                                                    __name__)
-        globals()[__new_name__].__name__ = __new_name__
-        __all__.append(__new_name__)
-
-
-initialize()
diff --git a/python/paddle/v2/event.py b/python/paddle/v2/event.py
deleted file mode 100644
index c11aa121c1..0000000000
--- a/python/paddle/v2/event.py
+++ /dev/null
@@ -1,113 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Testing and training events.
-
-There are:
-
-* TestResult
-* BeginIteration
-* EndIteration
-* BeginPass
-* EndPass
-"""
-__all__ = [
-    'EndIteration', 'BeginIteration', 'BeginPass', 'EndPass', 'TestResult',
-    'EndForwardBackward'
-]
-
-
-class WithMetric(object):
-    def __init__(self, evaluator):
-        import py_paddle.swig_paddle as api
-        if not isinstance(evaluator, api.Evaluator):
-            raise TypeError("Evaluator should be api.Evaluator type")
-        self.__evaluator__ = evaluator
-
-    @property
-    def metrics(self):
-        names = self.__evaluator__.getNames()
-        retv = dict()
-        for each_name in names:
-            val = self.__evaluator__.getValue(each_name)
-            retv[each_name] = val
-        return retv
-
-
-class TestResult(WithMetric):
-    """
-    Result that trainer.test return.
-    """
-
-    def __init__(self, evaluator, cost):
-        super(TestResult, self).__init__(evaluator)
-        self.cost = cost
-
-
-class BeginPass(object):
-    """
-    Event On One Pass Training Start.
-    """
-
-    def __init__(self, pass_id):
-        self.pass_id = pass_id
-
-
-class EndPass(WithMetric):
-    """
-    Event On One Pass Training Complete.
-    To get the output of a specific layer, add "event.gm.getLayerOutputs('predict_layer')"
-    in your event_handler call back
-    """
-
-    def __init__(self, pass_id, evaluator, gm):
-        self.pass_id = pass_id
-        self.gm = gm
-        WithMetric.__init__(self, evaluator)
-
-
-class BeginIteration(object):
-    """
-    Event On One Batch Training Start.
-    """
-
-    def __init__(self, pass_id, batch_id):
-        self.pass_id = pass_id
-        self.batch_id = batch_id
-
-
-class EndForwardBackward(object):
-    """
-    Event On One Batch ForwardBackward Complete.
-    """
-
-    def __init__(self, pass_id, batch_id, gm):
-        self.pass_id = pass_id
-        self.batch_id = batch_id
-        self.gm = gm
-
-
-class EndIteration(WithMetric):
-    """
-    Event On One Batch Training Complete.
-    To get the output of a specific layer, add "event.gm.getLayerOutputs('predict_layer')"
-    in your event_handler call back
-    """
-
-    def __init__(self, pass_id, batch_id, cost, evaluator, gm):
-        self.pass_id = pass_id
-        self.batch_id = batch_id
-        self.cost = cost
-        self.gm = gm
-        WithMetric.__init__(self, evaluator)
diff --git a/python/paddle/v2/image.py b/python/paddle/v2/image.py
deleted file mode 100644
index 08d8bd68f9..0000000000
--- a/python/paddle/v2/image.py
+++ /dev/null
@@ -1,380 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-This file contains some common interfaces for image preprocess.
-Many users are confused about the image layout. We introduce
-the image layout as follows.
-
-- CHW Layout
-
-  - The abbreviations: C=channel, H=Height, W=Width
-  - The default layout of image opened by cv2 or PIL is HWC.
-    PaddlePaddle only supports the CHW layout. And CHW is simply
-    a transpose of HWC. It must transpose the input image.
-
-- Color format: RGB or BGR
-
-  OpenCV use BGR color format. PIL use RGB color format. Both
-  formats can be used for training. Noted that, the format should
-  be keep consistent between the training and inference peroid.
-"""
-import numpy as np
-try:
-    import cv2
-except ImportError:
-    cv2 = None
-import os
-import tarfile
-import cPickle
-
-__all__ = [
-    "load_image_bytes", "load_image", "resize_short", "to_chw", "center_crop",
-    "random_crop", "left_right_flip", "simple_transform", "load_and_transform",
-    "batch_images_from_tar"
-]
-
-
-def batch_images_from_tar(data_file,
-                          dataset_name,
-                          img2label,
-                          num_per_batch=1024):
-    """
-    Read images from tar file and batch them into batch file.
-
-    :param data_file: path of image tar file
-    :type data_file: string
-    :param dataset_name: 'train','test' or 'valid'
-    :type dataset_name: string
-    :param img2label: a dic with image file name as key 
-                    and image's label as value
-    :type img2label: dic
-    :param num_per_batch: image number per batch file
-    :type num_per_batch: int
-    :return: path of list file containing paths of batch file
-    :rtype: string
-    """
-    batch_dir = data_file + "_batch"
-    out_path = "%s/%s" % (batch_dir, dataset_name)
-    meta_file = "%s/%s.txt" % (batch_dir, dataset_name)
-
-    if os.path.exists(out_path):
-        return meta_file
-    else:
-        os.makedirs(out_path)
-
-    tf = tarfile.open(data_file)
-    mems = tf.getmembers()
-    data = []
-    labels = []
-    file_id = 0
-    for mem in mems:
-        if mem.name in img2label:
-            data.append(tf.extractfile(mem).read())
-            labels.append(img2label[mem.name])
-            if len(data) == num_per_batch:
-                output = {}
-                output['label'] = labels
-                output['data'] = data
-                cPickle.dump(
-                    output,
-                    open('%s/batch_%d' % (out_path, file_id), 'w'),
-                    protocol=cPickle.HIGHEST_PROTOCOL)
-                file_id += 1
-                data = []
-                labels = []
-    if len(data) > 0:
-        output = {}
-        output['label'] = labels
-        output['data'] = data
-        cPickle.dump(
-            output,
-            open('%s/batch_%d' % (out_path, file_id), 'w'),
-            protocol=cPickle.HIGHEST_PROTOCOL)
-
-    with open(meta_file, 'a') as meta:
-        for file in os.listdir(out_path):
-            meta.write(os.path.abspath("%s/%s" % (out_path, file)) + "\n")
-    return meta_file
-
-
-def load_image_bytes(bytes, is_color=True):
-    """
-    Load an color or gray image from bytes array.
-
-    Example usage:
-    
-    .. code-block:: python
-
-        with open('cat.jpg') as f:
-            im = load_image_bytes(f.read())
-
-    :param bytes: the input image bytes array.
-    :type bytes: str
-    :param is_color: If set is_color True, it will load and
-                     return a color image. Otherwise, it will
-                     load and return a gray image.
-    :type is_color: bool
-    """
-    flag = 1 if is_color else 0
-    file_bytes = np.asarray(bytearray(bytes), dtype=np.uint8)
-    img = cv2.imdecode(file_bytes, flag)
-    return img
-
-
-def load_image(file, is_color=True):
-    """
-    Load an color or gray image from the file path.
-
-    Example usage:
-    
-    .. code-block:: python
-
-        im = load_image('cat.jpg')
-
-    :param file: the input image path.
-    :type file: string
-    :param is_color: If set is_color True, it will load and
-                     return a color image. Otherwise, it will
-                     load and return a gray image.
-    :type is_color: bool
-    """
-    # cv2.IMAGE_COLOR for OpenCV3
-    # cv2.CV_LOAD_IMAGE_COLOR for older OpenCV Version
-    # cv2.IMAGE_GRAYSCALE for OpenCV3
-    # cv2.CV_LOAD_IMAGE_GRAYSCALE for older OpenCV Version
-    # Here, use constant 1 and 0
-    # 1: COLOR, 0: GRAYSCALE
-    flag = 1 if is_color else 0
-    im = cv2.imread(file, flag)
-    return im
-
-
-def resize_short(im, size):
-    """ 
-    Resize an image so that the length of shorter edge is size.
-
-    Example usage:
-    
-    .. code-block:: python
-
-        im = load_image('cat.jpg')
-        im = resize_short(im, 256)
-    
-    :param im: the input image with HWC layout.
-    :type im: ndarray
-    :param size: the shorter edge size of image after resizing.
-    :type size: int
-    """
-    h, w = im.shape[:2]
-    h_new, w_new = size, size
-    if h > w:
-        h_new = size * h / w
-    else:
-        w_new = size * w / h
-    im = cv2.resize(im, (w_new, h_new), interpolation=cv2.INTER_CUBIC)
-    return im
-
-
-def to_chw(im, order=(2, 0, 1)):
-    """
-    Transpose the input image order. The image layout is HWC format
-    opened by cv2 or PIL. Transpose the input image to CHW layout
-    according the order (2,0,1).
-
-    Example usage:
-    
-    .. code-block:: python
-
-        im = load_image('cat.jpg')
-        im = resize_short(im, 256)
-        im = to_chw(im)
-    
-    :param im: the input image with HWC layout.
-    :type im: ndarray
-    :param order: the transposed order.
-    :type order: tuple|list 
-    """
-    assert len(im.shape) == len(order)
-    im = im.transpose(order)
-    return im
-
-
-def center_crop(im, size, is_color=True):
-    """
-    Crop the center of image with size.
-
-    Example usage:
-    
-    .. code-block:: python
-
-        im = center_crop(im, 224)
-    
-    :param im: the input image with HWC layout.
-    :type im: ndarray
-    :param size: the cropping size.
-    :type size: int
-    :param is_color: whether the image is color or not.
-    :type is_color: bool
-    """
-    h, w = im.shape[:2]
-    h_start = (h - size) / 2
-    w_start = (w - size) / 2
-    h_end, w_end = h_start + size, w_start + size
-    if is_color:
-        im = im[h_start:h_end, w_start:w_end, :]
-    else:
-        im = im[h_start:h_end, w_start:w_end]
-    return im
-
-
-def random_crop(im, size, is_color=True):
-    """
-    Randomly crop input image with size.
-
-    Example usage:
-    
-    .. code-block:: python
-
-        im = random_crop(im, 224)
-    
-    :param im: the input image with HWC layout.
-    :type im: ndarray
-    :param size: the cropping size.
-    :type size: int
-    :param is_color: whether the image is color or not.
-    :type is_color: bool
-    """
-    h, w = im.shape[:2]
-    h_start = np.random.randint(0, h - size + 1)
-    w_start = np.random.randint(0, w - size + 1)
-    h_end, w_end = h_start + size, w_start + size
-    if is_color:
-        im = im[h_start:h_end, w_start:w_end, :]
-    else:
-        im = im[h_start:h_end, w_start:w_end]
-    return im
-
-
-def left_right_flip(im, is_color=True):
-    """
-    Flip an image along the horizontal direction.
-    Return the flipped image.
-
-    Example usage:
-    
-    .. code-block:: python
-
-        im = left_right_flip(im)
-    
-    :param im: input image with HWC layout or HW layout for gray image
-    :type im: ndarray
-    :param is_color: whether input image is color or not
-    :type is_color: bool
-    """
-    if len(im.shape) == 3 and is_color:
-        return im[:, ::-1, :]
-    else:
-        return im[:, ::-1]
-
-
-def simple_transform(im,
-                     resize_size,
-                     crop_size,
-                     is_train,
-                     is_color=True,
-                     mean=None):
-    """
-    Simply data argumentation for training. These operations include
-    resizing, croping and flipping.
-
-    Example usage:
-    
-    .. code-block:: python
-
-        im = simple_transform(im, 256, 224, True)
-
-    :param im: The input image with HWC layout.
-    :type im: ndarray
-    :param resize_size: The shorter edge length of the resized image.
-    :type resize_size: int
-    :param crop_size: The cropping size.
-    :type crop_size: int
-    :param is_train: Whether it is training or not.
-    :type is_train: bool
-    :param is_color: whether the image is color or not.
-    :type is_color: bool
-    :param mean: the mean values, which can be element-wise mean values or 
-                 mean values per channel.
-    :type mean: numpy array | list
-    """
-    im = resize_short(im, resize_size)
-    if is_train:
-        im = random_crop(im, crop_size, is_color=is_color)
-        if np.random.randint(2) == 0:
-            im = left_right_flip(im, is_color)
-    else:
-        im = center_crop(im, crop_size, is_color=is_color)
-    if len(im.shape) == 3:
-        im = to_chw(im)
-
-    im = im.astype('float32')
-    if mean is not None:
-        mean = np.array(mean, dtype=np.float32)
-        # mean value, may be one value per channel 
-        if mean.ndim == 1 and is_color:
-            mean = mean[:, np.newaxis, np.newaxis]
-        elif mean.ndim == 1:
-            mean = mean
-        else:
-            # elementwise mean
-            assert len(mean.shape) == len(im)
-        im -= mean
-
-    return im
-
-
-def load_and_transform(filename,
-                       resize_size,
-                       crop_size,
-                       is_train,
-                       is_color=True,
-                       mean=None):
-    """
-    Load image from the input file `filename` and transform image for
-    data argumentation. Please refer to the `simple_transform` interface
-    for the transform operations.
-
-    Example usage:
-    
-    .. code-block:: python
-
-        im = load_and_transform('cat.jpg', 256, 224, True)
-
-    :param filename: The file name of input image.
-    :type filename: string
-    :param resize_size: The shorter edge length of the resized image.
-    :type resize_size: int
-    :param crop_size: The cropping size.
-    :type crop_size: int
-    :param is_train: Whether it is training or not.
-    :type is_train: bool
-    :param is_color: whether the image is color or not.
-    :type is_color: bool
-    :param mean: the mean values, which can be element-wise mean values or 
-                 mean values per channel.
-    :type mean: numpy array | list
-    """
-    im = load_image(filename, is_color)
-    im = simple_transform(im, resize_size, crop_size, is_train, is_color, mean)
-    return im
diff --git a/python/paddle/v2/inference.py b/python/paddle/v2/inference.py
deleted file mode 100644
index 28ee042282..0000000000
--- a/python/paddle/v2/inference.py
+++ /dev/null
@@ -1,172 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy
-import collections
-import topology
-import paddle
-import cPickle
-
-__all__ = ['infer', 'Inference']
-
-
-class Inference(object):
-    """
-    Inference combines neural network output and parameters together
-    to do inference.
-
-    ..  code-block:: python
-
-        inferer = Inference(output_layer=prediction, parameters=parameters)
-        for data_batch in batches:
-            print inferer.infer(data_batch)
-
-
-    :param output_layer: The neural network that should be inferenced.
-    :type output_layer: paddle.v2.config_base.Layer or the sequence
-                        of paddle.v2.config_base.Layer
-    :param parameters: The parameters dictionary.
-    :type parameters: paddle.v2.parameters.Parameters
-    """
-
-    def __init__(self, parameters, output_layer=None, fileobj=None):
-        import py_paddle.swig_paddle as api
-
-        if output_layer is not None:
-            topo = topology.Topology(output_layer)
-            gm = api.GradientMachine.createFromConfigProto(
-                topo.proto(), api.CREATE_MODE_TESTING, [api.PARAMETER_VALUE])
-            self.__data_types__ = topo.data_type()
-        elif fileobj is not None:
-            tmp = cPickle.load(fileobj)
-            gm = api.GradientMachine.createByConfigProtoStr(
-                tmp['protobin'], api.CREATE_MODE_TESTING,
-                [api.PARAMETER_VALUE])
-            self.__data_types__ = tmp['data_type']
-        else:
-            raise ValueError("Either output_layer or fileobj must be set")
-
-        for param in gm.getParameters():
-            val = param.getBuf(api.PARAMETER_VALUE)
-            name = param.getName()
-            assert isinstance(val, api.Vector)
-            val.copyFromNumpyArray(parameters.get(name).flatten())
-            # the setValueUpdated function is called in randomize, zeroMem,
-            # load function in paddle/legacy/parameter/Parameter.cpp. But in the
-            # inference mode, the setValueUpdated is never called, it will
-            # cause the parameter will not be dispatched
-            # in MultiGradientMachine for multi-GPU. So setValueUpdated is
-            # called here, but it's better to call this function in one place.
-            param.setValueUpdated()
-        self.__gradient_machine__ = gm
-
-    def iter_infer(self, input, feeding=None):
-        from data_feeder import DataFeeder
-        feeder = DataFeeder(self.__data_types__, feeding)
-        batch_size = len(input)
-
-        def __reader_impl__():
-            for each_sample in input:
-                yield each_sample
-
-        reader = paddle.batch(__reader_impl__, batch_size=batch_size)
-
-        self.__gradient_machine__.start()
-        for data_batch in reader():
-            yield self.__gradient_machine__.forwardTest(feeder(data_batch))
-        self.__gradient_machine__.finish()
-
-    def iter_infer_field(self, field, **kwargs):
-        if not isinstance(field, list) and not isinstance(field, tuple):
-            field = [field]
-
-        for result in self.iter_infer(**kwargs):
-            for each_result in result:
-                item = [each_result[each_field] for each_field in field]
-                yield item
-
-    def infer(self, input, field='value', flatten_result=True, **kwargs):
-        """
-        Infer a data by model.
-        :param input: input data batch. Should be python iterable object.
-        :param field: output field.
-        """
-        retv = None
-        kwargs['input'] = input
-        for result in self.iter_infer_field(field=field, **kwargs):
-            if retv is None:
-                retv = [[] for i in xrange(len(result))]
-            for i, item in enumerate(result):
-                retv[i].append(item)
-
-        if retv == None:
-            return []
-
-        if flatten_result:
-            retv = [numpy.concatenate(out) for out in retv]
-
-        if len(retv) == 1:
-            return retv[0]
-        else:
-            return retv
-
-
-def infer(output_layer, parameters, input, feeding=None, field='value'):
-    """
-    Infer a neural network by given neural network output and parameters.  The
-    user should pass either a batch of input data or reader method.
-
-    Example usage for sinlge output_layer:
-
-    ..  code-block:: python
-
-        result = paddle.infer(output_layer=prediction,
-                              parameters=parameters,
-                              input=SomeData)
-        print result
-
-    Example usage for multiple outout_layers and fields:
-
-    ..  code-block:: python
-
-        result = paddle.infer(output_layer=[prediction1, prediction2],
-                              parameters=parameters,
-                              input=SomeData,
-                              field=[id, value]])
-        print result
-
-    :param output_layer: output of the neural network that would be inferred
-    :type output_layer: paddle.v2.config_base.Layer or a list of
-                        paddle.v2.config_base.Layer
-    :param parameters: parameters of the neural network.
-    :type parameters: paddle.v2.parameters.Parameters
-    :param input: input data batch. Should be a python iterable object, and each
-                  element is the data batch.
-    :type input: collections.Iterable
-    :param feeding: Reader dictionary. Default could generate from input
-                        value.
-    :param field: The prediction field. It should in [`value`, `id`, `prob`].
-                  `value` and `prob` mean return the prediction probabilities,
-                  `id` means return the prediction labels. Default is `value`.
-                  Note that `prob` only used when output_layer is beam_search
-                  or max_id.
-    :type field: str
-    :return: The prediction result. If there are multiple outout_layers and fields,
-             the return order is outout_layer1.field1, outout_layer2.field1, ...,
-             outout_layer1.field2, outout_layer2.field2 ...
-    :rtype: numpy.ndarray
-    """
-
-    inferer = Inference(output_layer=output_layer, parameters=parameters)
-    return inferer.infer(field=field, input=input, feeding=feeding)
diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py
deleted file mode 100644
index a188a03eb3..0000000000
--- a/python/paddle/v2/layer.py
+++ /dev/null
@@ -1,326 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-`paddle.v2.layer` is a part of model config packages in paddle.v2. In API v2,
-we want to make Paddle a plain Python package. The model config package defines
-the way how to configure a neural network topology in Paddle Python code.
-
-The primary usage shows below.
-
-..  code-block:: python
-
-    import paddle
-
-    img = paddle.layer.data(name='img', type=paddle.data_type.dense_vector(784))
-    hidden = paddle.layer.fc(input=img, size=200)
-    prediction = paddle.layer.fc(input=hidden, size=10,
-                                 act=paddle.activation.Softmax())
-
-    # use prediction instance where needed.
-    parameters = paddle.parameters.create(cost)
-"""
-import collections
-import copy
-import re
-import paddle.trainer_config_helpers.layers as v1_layers
-import paddle.trainer.config_parser as cp
-from paddle.proto.ModelConfig_pb2 import ModelConfig, SubModelConfig
-from config_base import __convert_to_v2__
-import config_base
-
-__all__ = ['data', 'parse_network']
-
-
-def __need_to_keep__(name):
-    return name in [
-        'StaticInput', 'SubsequenceInput', 'GeneratedInput', 'LayerType',
-        'layer_support', 'BaseGeneratedInput'
-    ]
-
-
-def __need_to_wrap__(name):
-    return name not in ['AggregateLevel', 'ExpandLevel', 'BaseGeneratedInput']
-
-
-def __convert_name__(inname):
-    if __need_to_keep__(inname):
-        return inname
-    if inname == 'maxid_layer':
-        return 'max_id'
-    elif inname.endswith('memory') or inname.endswith(
-            '_seq') or inname.endswith('_sim') or inname == 'hsigmoid':
-        return inname
-    elif inname in [
-            'cross_entropy', 'multi_binary_label_cross_entropy',
-            'cross_entropy_with_selfnorm'
-    ]:
-        return inname + "_cost"
-    elif inname.endswith('_cost'):
-        return inname
-    elif inname.endswith("_layer"):
-        return inname[:-len("_layer")]
-    else:
-        return inname
-
-
-for name in v1_layers.__all__:
-    obj = getattr(v1_layers, name)
-    new_name = __convert_name__(name)
-    if callable(obj) and __need_to_wrap__(name):
-        globals()[new_name] = __convert_to_v2__(obj, new_name, __name__)
-    else:
-        globals()[new_name] = obj
-    __all__.append(new_name)
-
-
-def __data_layer__(name, type, **kwargs):
-    l = v1_layers.data_layer(name, type.dim, **kwargs)
-    l.data_type = type
-    return l
-
-
-def __map_data_docstr__(doc):
-    doc = re.sub(r'(data = [^\)]+)\).*',
-                 "data = paddle.layer.data(name=\"input\", "
-                 "type=paddle.data_type.dense_vector(1000))", doc)
-
-    doc = re.sub(r':param size:.*', ':param type: Data type of this data layer',
-                 doc)
-    doc = re.sub(r':type size:.*', ":type size: paddle.v2.data_type.InputType",
-                 doc)
-    return doc
-
-
-__data_layer__.__doc__ = __map_data_docstr__(v1_layers.data_layer.__doc__)
-
-data = __convert_to_v2__(__data_layer__, 'name', __name__)
-
-
-def __get_used_layers__(output_layers):
-    layer_names = set()
-    parents = {}
-
-    def add_parent(child, parent):
-        if child in parents:
-            parents[child].append(parent)
-        else:
-            parents[child] = [parent]
-
-    def add_additional_parents():
-        for sub_model in cp.g_config.model_config.sub_models:
-            if sub_model.name == 'root':
-                continue
-            for link in sub_model.in_links:
-                add_parent(link.link_name, link.layer_name)
-                add_parent(sub_model.name, link.layer_name)
-            for link in sub_model.out_links:
-                add_parent(link.link_name, link.layer_name)
-                add_parent(link.link_name, sub_model.name)
-            for mem in sub_model.memories:
-                if mem.boot_layer_name:
-                    add_parent(mem.layer_name, mem.boot_layer_name)
-                add_parent(mem.link_name, mem.layer_name)
-
-            if sub_model.HasField('generator'):
-                # according to the implementation of text generation
-                # in recurrent layer group, the generated word must be
-                # the first out link
-                add_parent(sub_model.out_links[0].layer_name,
-                           sub_model.generator.eos_layer_name)
-
-    def dfs_travel(layer_name):
-        if layer_name in layer_names:
-            return
-        layer_names.add(layer_name)
-        layer = cp.g_layer_map[layer_name]
-
-        for inp in layer.inputs:
-            dfs_travel(inp.input_layer_name)
-        if layer.name in parents:
-            for p in parents[layer.name]:
-                dfs_travel(p)
-
-    add_additional_parents()
-
-    for layer in output_layers:
-        dfs_travel(layer.full_name)
-
-    # print layer needs to be specially handled because no other
-    # layer depends on it. It is used to print the result of some
-    # layers when running the model for debug purpose. So we explicitly
-    # add a print layer to the topolty if its input is in the toplogy.
-    for layer in cp.g_config.model_config.layers:
-        if layer.type == 'print':
-            used = True
-            for inp in layer.inputs:
-                if inp.input_layer_name not in layer_names:
-                    used = False
-                    break
-            if used:
-                layer_names.add(layer.name)
-
-    return layer_names
-
-
-def __get_used_parameters__(layer_names, sub_models):
-    parameter_names = set()
-    for name in layer_names:
-        l = cp.g_layer_map[name]
-        for inp in l.inputs:
-            if inp.input_parameter_name:
-                parameter_names.add(inp.input_parameter_name)
-        if l.bias_parameter_name:
-            parameter_names.add(l.bias_parameter_name)
-
-    for sub_model in sub_models:
-        for mem in sub_model.memories:
-            if mem.HasField("boot_bias_parameter_name"):
-                parameter_names.add(mem.boot_bias_parameter_name)
-
-    return parameter_names
-
-
-def __get_used_submodels__(layer_names):
-    submodel_names = set()
-    for submodel in cp.g_config.model_config.sub_models:
-        if submodel.name in layer_names:
-            submodel_names.add(submodel.name)
-    return submodel_names
-
-
-def __get_submodel_data_out_links__():
-    data_links = set()
-    for submodel in cp.g_config.model_config.sub_models:
-        for link in submodel.out_links:
-            if cp.g_layer_map[link.link_name].type == 'data':
-                data_links.add(link.link_name)
-    return data_links
-
-
-def __get_used_evaluators__(layer_names):
-    evaluator_names = set()
-    for e in cp.g_config.model_config.evaluators:
-        used = True
-        for name in e.input_layers:
-            if name not in layer_names:
-                used = False
-                break
-        if used:
-            evaluator_names.add(e.name)
-    return evaluator_names
-
-
-def __trim_submodel__(old_submodel, layer_names, input_layer_names,
-                      output_layer_names, evaluator_names):
-
-    submodel = SubModelConfig()
-    submodel.name = old_submodel.name
-    submodel.layer_names.extend(
-        filter(lambda x: x in layer_names, old_submodel.layer_names))
-    submodel.input_layer_names.extend(
-        filter(lambda x: x in input_layer_names, submodel.layer_names))
-    submodel.output_layer_names.extend(
-        filter(lambda x: x in output_layer_names, submodel.layer_names))
-    submodel.evaluator_names.extend(
-        filter(lambda x: x in evaluator_names, old_submodel.evaluator_names))
-
-    submodel.is_recurrent_layer_group = old_submodel.is_recurrent_layer_group
-    submodel.reversed = old_submodel.reversed
-
-    submodel.memories.extend(
-        filter(lambda x: x.link_name in layer_names, old_submodel.memories))
-    target_inlinkid = (old_submodel.target_inlinkid
-                       if old_submodel.HasField('target_inlinkid') else -1)
-    in_links = []
-    for i, link in enumerate(old_submodel.in_links):
-        if link.link_name in layer_names or i == target_inlinkid:
-            in_links.append(link)
-            if i == target_inlinkid:
-                target_inlinkid = len(in_links) - 1
-    submodel.in_links.extend(in_links)
-
-    submodel.out_links.extend(
-        filter(lambda x: x.link_name in layer_names, old_submodel.out_links))
-    if old_submodel.HasField('generator'):
-        submodel.generator.CopyFrom(old_submodel.generator)
-
-    if old_submodel.HasField('target_inlinkid'):
-        submodel.target_inlinkid = target_inlinkid
-    return submodel
-
-
-def parse_network(output_layers, extra_layers=None):
-    if not isinstance(output_layers, collections.Sequence):
-        output_layers = [output_layers]
-    if extra_layers is not None:
-        if not isinstance(extra_layers, collections.Sequence):
-            extra_layers = [extra_layers]
-    else:
-        extra_layers = []
-
-    layer_names = __get_used_layers__(list(output_layers) + list(extra_layers))
-    submodel_names = __get_used_submodels__(layer_names)
-    submodel_names.add('root')
-    evaluator_names = __get_used_evaluators__(layer_names)
-    data_out_links = __get_submodel_data_out_links__()
-    input_layer_names = set()
-    output_layer_names = set()
-
-    model_config = ModelConfig()
-    model_config.type = cp.g_config.model_config.type
-
-    for layer in output_layers:
-        model_config.output_layer_names.append(layer.full_name)
-        output_layer_names.add(layer.full_name)
-
-    for l in cp.g_config.model_config.layers:
-        if l.name not in layer_names:
-            continue
-        model_config.layers.extend([l])
-        if l.type == 'data':
-            if l.name in data_out_links:
-                """
-                In text generation, the outlink to save the generated word
-                indices is a data_layer defined in recurrent_group. This
-                data_layer is sure to be the output of the network in text
-                generation task, so this statement excludes such a special
-                data_layer from being inputs of the network, otherwise an error
-                will occur during data feeding.
-                """
-                continue
-            model_config.input_layer_names.append(l.name)
-            input_layer_names.add(l.name)
-
-    for e in cp.g_config.model_config.evaluators:
-        if e.name in evaluator_names:
-            model_config.evaluators.extend([e])
-
-    for s in cp.g_config.model_config.sub_models:
-        if s.name in submodel_names:
-            s = __trim_submodel__(s, layer_names, input_layer_names,
-                                  output_layer_names, evaluator_names)
-            model_config.sub_models.extend([s])
-
-    parameter_names = __get_used_parameters__(layer_names,
-                                              model_config.sub_models)
-
-    for p in cp.g_config.model_config.parameters:
-        if p.name in parameter_names:
-            model_config.parameters.extend([p])
-
-    return model_config
-
-
-def get_layer(name):
-    return config_base.__layer_map__.get(name)
diff --git a/python/paddle/v2/master/.gitignore b/python/paddle/v2/master/.gitignore
deleted file mode 100644
index a3ac6e1a33..0000000000
--- a/python/paddle/v2/master/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*.whl
-*.so
-*.pyc
diff --git a/python/paddle/v2/master/__init__.py b/python/paddle/v2/master/__init__.py
deleted file mode 100644
index efaeeabfa2..0000000000
--- a/python/paddle/v2/master/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from client import *
-
-__all__ = ['client']
diff --git a/python/paddle/v2/master/client.py b/python/paddle/v2/master/client.py
deleted file mode 100644
index d62e7cc28e..0000000000
--- a/python/paddle/v2/master/client.py
+++ /dev/null
@@ -1,95 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import ctypes
-import os
-
-__lib__ = None
-
-
-def get_c_lib():
-    global __lib__
-    if __lib__ is None:
-        path = os.path.join(os.path.dirname(__file__), "libpaddle_master.so")
-        __lib__ = ctypes.cdll.LoadLibrary(path)
-    return __lib__
-
-
-class client(object):
-    """
-    client is a client to the master server.
-    """
-
-    def __init__(self, etcd_endpoints, timeout_sec, buf_size=0):
-        self.c = get_c_lib().paddle_new_etcd_master_client(
-            etcd_endpoints, timeout_sec, buf_size)
-
-    def request_save_model(self, trainer_id, block_ms):
-        """request to save model
-
-        Conventionally the 0-th trainer will save model. But in
-        distributed training, any trainer could be killed. This
-        function asks the master server if the trainer should proceed
-        with saving model.
-
-        :param trainer_id: trainer id.
-        :param block_ms: number of millisecond that other save model
-        will be blocked if this save model request succeeded.
-
-        Returns:
-            int: 1 if the save the model request is approved, 0 if
-            does the request is rejected because other trainer is
-            saving the model, -1 if error happened.
-
-        """
-        return get_c_lib().paddle_request_save_model(self.c, trainer_id,
-                                                     block_ms)
-
-    def release(self):
-        get_c_lib().paddle_release_master_client(self.c)
-        self.c = None
-
-    def set_dataset(self, paths):
-        holder_type = ctypes.c_char_p * len(paths)
-        holder = holder_type()
-        for idx, path in enumerate(paths):
-            c_ptr = ctypes.c_char_p(path)
-            holder[idx] = c_ptr
-        get_c_lib().paddle_set_dataset(self.c, holder, len(paths))
-
-    def next_record(self):
-        """gets next record for training
-
-        Returns:
-            string: the record.
-            int: error code, 0 if successful, < 0 otherwise.
-        """
-        p = ctypes.c_char_p()
-        ret = ctypes.pointer(p)
-        size = get_c_lib().paddle_next_record(self.c, ret)
-        if size < 0:
-            # Error
-            return None, size
-
-        if size == 0:
-            # Empty record
-            return "", 0
-
-        record = ret.contents.value[:size]
-        # Memory created from C should be freed.
-        get_c_lib().mem_free(ret.contents)
-        return record, 0
-
-    def paddle_start_get_records(self, pass_id):
-        get_c_lib().paddle_start_get_records(self.c, pass_id)
diff --git a/python/paddle/v2/minibatch.py b/python/paddle/v2/minibatch.py
deleted file mode 100644
index 3c6a53db3c..0000000000
--- a/python/paddle/v2/minibatch.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-__all__ = ['batch']
-
-
-def batch(reader, batch_size, drop_last=True):
-    """
-    Create a batched reader.
-
-    :param reader: the data reader to read from.
-    :type reader: callable
-    :param batch_size: size of each mini-batch
-    :type batch_size: int
-    :param drop_last: drop the last batch, if the size of last batch is not equal to batch_size.
-    :type drop_last: bool
-    :return: the batched reader.
-    :rtype: callable
-    """
-
-    def batch_reader():
-        r = reader()
-        b = []
-        for instance in r:
-            b.append(instance)
-            if len(b) == batch_size:
-                yield b
-                b = []
-        if drop_last == False and len(b) != 0:
-            yield b
-
-    return batch_reader
diff --git a/python/paddle/v2/networks.py b/python/paddle/v2/networks.py
deleted file mode 100644
index 8ae9f3b202..0000000000
--- a/python/paddle/v2/networks.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle.trainer_config_helpers.networks as conf_nw
-import inspect
-from config_base import __convert_to_v2__
-
-__all__ = []
-
-
-def __initialize__():
-    for each_subnetwork in conf_nw.__all__:
-        if each_subnetwork in ['inputs', 'outputs']:
-            continue
-        func = getattr(conf_nw, each_subnetwork)
-        globals()[each_subnetwork] = func
-        globals()[each_subnetwork].__name__ = each_subnetwork
-        global __all__
-        __all__.append(each_subnetwork)
-
-
-__initialize__()
diff --git a/python/paddle/v2/op.py b/python/paddle/v2/op.py
deleted file mode 100644
index 03f3b9b9ef..0000000000
--- a/python/paddle/v2/op.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import layer
-import activation as act
-from config_base import Layer
-from paddle.trainer_config_helpers.attrs import is_compatible_with
-from paddle.trainer_config_helpers.default_decorators import wrap_name_default
-
-__all__ = []
-
-
-def __register_unary_math_op__(op_name, act):
-    def op(input, name=None):
-        return layer.mixed(
-            input=[layer.identity_projection(input=input)], name=name, act=act)
-
-    op = wrap_name_default(op_name)(op)
-    op.__doc__ = type(act).__doc__
-    globals()[op_name] = op
-    __all__.append(op_name)
-
-
-__register_unary_math_op__('exp', act.Exp())
-__register_unary_math_op__('log', act.Log())
-__register_unary_math_op__('abs', act.Abs())
-__register_unary_math_op__('sigmoid', act.Sigmoid())
-__register_unary_math_op__('tanh', act.Tanh())
-__register_unary_math_op__('square', act.Square())
-__register_unary_math_op__('relu', act.Relu())
-__register_unary_math_op__('sqrt', act.Sqrt())
-__register_unary_math_op__('reciprocal', act.Reciprocal())
-__register_unary_math_op__('softmax', act.Softmax())
-
-
-def __add__(layeroutput, other):
-    if is_compatible_with(other, float):
-        return layer.slope_intercept(input=layeroutput, intercept=other)
-    if not isinstance(other, Layer):
-        raise TypeError("Layer can only be added with"
-                        " another Layer or a number")
-    if layeroutput.size == other.size:
-        return layer.mixed(input=[
-            layer.identity_projection(input=layeroutput),
-            layer.identity_projection(input=other)
-        ])
-    if other.size != 1 and layeroutput.size != 1:
-        raise TypeError("Two Layer can be added only if they have equal size"
-                        " or one of their sizes is 1. sizes are %s and %s" %
-                        (layeroutput.size, other.size))
-    elif layeroutput.size == 1:
-        tmp = layeroutput
-        layeroutput = other
-        other = tmp
-    other = layer.repeat(other, layeroutput.size)
-    return layer.mixed(input=[
-        layer.identity_projection(input=layeroutput),
-        layer.identity_projection(input=other)
-    ])
-
-
-Layer.__radd__ = __add__
-Layer.__add__ = __add__
-
-
-def __neg__(layeroutput):
-    return layer.slope_intercept(input=layeroutput, slope=-1.0)
-
-
-Layer.__neg__ = __neg__
-
-
-def __sub__(layeroutput, other):
-    if is_compatible_with(other, float):
-        return layer.slope_intercept(input=layeroutput, intercept=other)
-    if not isinstance(other, Layer):
-        raise TypeError("Layer can only be subtracted with"
-                        " another Layeroutput or a number")
-    return __add__(layeroutput, -other)
-
-
-Layer.__sub__ = __sub__
-
-
-def __rsub__(layeroutput, other):
-    neg = layer.slope_intercept(input=layeroutput, slope=-1.0)
-    return __add__(neg, other)
-
-
-Layer.__rsub__ = __rsub__
-
-
-def __mul__(layeroutput, other):
-    if is_compatible_with(other, float):
-        return layer.slope_intercept(input=layeroutput, slope=other)
-    if not isinstance(other, Layer):
-        raise TypeError("Layer can only be multiplied with"
-                        " another Layer or a number")
-    elif layeroutput.size == 1:
-        return layer.scaling(input=other, weight=layeroutput)
-    elif other.size == 1:
-        return layer.scaling(input=layeroutput, weight=other)
-    else:
-        raise TypeError("At least one of the operand of '*' must be a number"
-                        " or a Layer with size=1")
-
-
-Layer.__mul__ = __mul__
-Layer.__rmul__ = __mul__
diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py
deleted file mode 100644
index caef5f484e..0000000000
--- a/python/paddle/v2/optimizer.py
+++ /dev/null
@@ -1,297 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils
-import paddle.trainer_config_helpers.optimizers as v1_optimizers
-from paddle.proto.OptimizerConfig_pb2 import OptimizerConfig
-
-__all__ = [
-    'Momentum', 'Adam', 'Adamax', 'AdaGrad', 'DecayedAdaGrad', 'AdaDelta',
-    'RMSProp', 'ModelAverage', 'L2Regularization'
-]
-
-
-class Optimizer(object):
-    def __init__(self, **kwargs):
-        import py_paddle.swig_paddle as swig_api
-        if 'batch_size' in kwargs:
-            del kwargs['batch_size']  # not important for python library.
-
-        def __impl__():
-            v1_optimizers.settings(batch_size=1, **kwargs)
-
-        self.__opt_conf_proto__ = config_parser_utils.parse_optimizer_config(
-            __impl__)
-        self.__opt_conf__ = swig_api.OptimizationConfig.createFromProto(
-            self.__opt_conf_proto__)
-
-    def enable_types(self):
-        """
-        get enable_types for each optimizer.
-        enable_types = [value, gradient, momentum, etc]
-        For each optimizer(SGD, Adam), GradientMachine should enable different
-        buffers.
-        """
-        import py_paddle.swig_paddle as swig_api
-        tmp = swig_api.ParameterOptimizer.create(self.__opt_conf__)
-        assert isinstance(tmp, swig_api.ParameterOptimizer)
-        return tmp.getParameterTypes()
-
-    def __create_local_updater__(self):
-        import py_paddle.swig_paddle as swig_api
-        return swig_api.ParameterUpdater.createLocalUpdater(self.__opt_conf__)
-
-    def __create_remote_updater__(self, pass_num, use_sparse_updater):
-        import py_paddle.swig_paddle as swig_api
-        return swig_api.ParameterUpdater.createRemoteUpdater(
-            self.__opt_conf__, pass_num, use_sparse_updater)
-
-    def __create_new_remote_updater__(self, pserver_spec, use_etcd):
-        import py_paddle.swig_paddle as swig_api
-        return swig_api.ParameterUpdater.createNewRemoteUpdater(
-            self.__opt_conf__, pserver_spec, use_etcd)
-
-    def create_updater(self, is_local, num_passes, use_sparse_updater,
-                       pserver_spec, use_etcd):
-        """
-        create proper parameter_updater by configuration.
-        :param is_local: create local or remote parameter updater
-        :param num_passes: remote parameter updater will use this to config
-        parameter server.
-        :param use_sparse_updater: when use remote updater, if some parameter is
-        sparse, updater should do some extra thing:
-
-        ..  code-block:: python
-
-            if use_sparse_remote_updater:
-                        gradient_machine.prefetch(in_args)
-                        parameter_updater.getParametersRemote()
-
-        :param pserver_spec: pserver location, eg: localhost:3000, if use etcd,
-        pserver_spec should be the etcd endpoints, eg: http://localhost:2379
-        :return: parameter_updater
-        """
-        if is_local:
-            parameter_updater = self.__create_local_updater__()
-        else:
-            if pserver_spec is None:
-                parameter_updater = self.__create_remote_updater__(
-                    num_passes, use_sparse_updater)
-            else:
-                parameter_updater = self.__create_new_remote_updater__(
-                    pserver_spec, use_etcd)
-        return parameter_updater
-
-
-class Momentum(Optimizer):
-    """
-    Momentum Optimizer.
-
-    When sparse=False, the momentum update formula is as follows:
-
-    ..  math::
-
-        v_{t} &= k * v_{t-1} - \\gamma_t (g_{t} + \\lambda w_{t-1}) \\\\
-        w_{t} &= w_{t-1} + v_{t} \\\\
-
-    where, :math:`k` is momentum, :math:`\\lambda` is decay rate,
-    :math:`\\gamma_t` is learning rate at the t'th iteration.
-    :math:`w_{t}` is the weight as the t'th iteration.
-    And the :math:`v_{t}` is the history momentum variable.
-
-    When sparse=True, the update scheme:
-
-    ..  math::
-
-        \\alpha_t &= \\alpha_{t-1} / k \\\\
-        \\beta_t &= \\beta_{t-1} / (1 + \\lambda \\gamma_t) \\\\
-        u_t &= u_{t-1} - \\alpha_t \\gamma_t g_t \\\\
-        v_t &= v_{t-1} + \\tau_{t-1} \\alpha_t \\gamma_t g_t \\\\
-        \\tau_t &= \\tau_{t-1} + \\beta_t / \\alpha_t
-    
-    where :math:`k` is momentum, :math:`\\lambda` is decay rate, 
-    :math:`\\gamma_t` is learning rate at the t'th iteration.
-
-    :param momentum: the momentum factor.
-    :type momentum: float
-    :param sparse: with sparse support or not, False by default.
-    :type sparse: bool
-    """
-
-    def __init__(self, momentum=None, sparse=False, **kwargs):
-        learning_method = v1_optimizers.MomentumOptimizer(
-            momentum=momentum, sparse=sparse)
-        super(Momentum, self).__init__(
-            learning_method=learning_method, **kwargs)
-
-
-class Adam(Optimizer):
-    """
-    Adam optimizer.
-    The details of please refer `Adam: A Method for Stochastic Optimization
-    <https://arxiv.org/abs/1412.6980>`_
-
-    ..  math::
-
-        m(w, t) & = \\beta_1 m(w, t-1) + (1 - \\beta_1) \\nabla Q_i(w) \\\\
-        v(w, t) & = \\beta_2 v(w, t-1) + (1 - \\beta_2)(\\nabla Q_i(w)) ^2 \\\\
-        w & = w - \\frac{\\eta m(w, t)}{\\sqrt{v(w,t) + \\epsilon}}
-
-    :param beta1: the :math:`\\beta_1` in equation.
-    :type beta1: float
-    :param beta2: the :math:`\\beta_2` in equation.
-    :type beta2: float
-    :param epsilon: the :math:`\\epsilon` in equation. It is used to prevent
-                        divided by zero.
-    :type epsilon: float
-    """
-
-    def __init__(self, beta1=0.9, beta2=0.999, epsilon=1e-8, **kwargs):
-        learning_method = v1_optimizers.AdamOptimizer(
-            beta1=beta1, beta2=beta2, epsilon=epsilon)
-        super(Adam, self).__init__(learning_method=learning_method, **kwargs)
-
-
-class Adamax(Optimizer):
-    """
-    Adamax optimizer.
-
-    The details of please refer this `Adam: A Method for Stochastic Optimization
-    <https://arxiv.org/abs/1412.6980>`_
-
-    ..  math::
-
-        m_t & = \\beta_1 * m_{t-1} + (1-\\beta_1)* \\nabla Q_i(w) \\\\
-        u_t & = max(\\beta_2*u_{t-1}, abs(\\nabla Q_i(w))) \\\\
-        w_t & = w_{t-1} - (\\eta/(1-\\beta_1^t))*m_t/u_t
-
-    :param beta1: the :math:`\\beta_1` in the equation.
-    :type beta1: float
-    :param beta2: the :math:`\\beta_2` in the equation.
-    :type beta2: float
-    """
-
-    def __init__(self, beta1=0.9, beta2=0.999, **kwargs):
-        learning_method = v1_optimizers.AdamaxOptimizer(
-            beta1=beta1, beta2=beta2)
-        super(Adamax, self).__init__(learning_method=learning_method, **kwargs)
-
-
-class AdaGrad(Optimizer):
-    """
-    Adagrad(for ADAptive GRAdient algorithm) optimizer.
-
-    For details please refer this `Adaptive Subgradient Methods for
-    Online Learning and Stochastic Optimization
-    <http://www.magicbroom.info/Papers/DuchiHaSi10.pdf>`_.
-
-    ..  math::
-
-        G &= \\sum_{\\tau=1}^{t} g_{\\tau} g_{\\tau}^T \\\\
-        w & = w - \\eta diag(G)^{-\\frac{1}{2}} \\circ g
-    """
-
-    def __init__(self, **kwargs):
-        learning_method = v1_optimizers.AdaGradOptimizer()
-        super(AdaGrad, self).__init__(learning_method=learning_method, **kwargs)
-
-
-class DecayedAdaGrad(Optimizer):
-    """
-    AdaGrad method with decayed sum gradients. The equations of this method
-    show as follow.
-
-    ..  math::
-
-        E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 \\\\
-        learning\\_rate &= 1/sqrt( ( E(g_t^2) + \\epsilon )
-
-    :param rho: The :math:`\\rho` parameter in that equation
-    :type rho: float
-    :param epsilon: The :math:`\\epsilon` parameter in that equation.
-    :type epsilon: float
-    """
-
-    def __init__(self, rho=0.95, epsilon=1e-06, **kwargs):
-        learning_method = v1_optimizers.DecayedAdaGradOptimizer(
-            rho=rho, epsilon=epsilon)
-        super(DecayedAdaGrad, self).__init__(
-            learning_method=learning_method, **kwargs)
-
-
-class AdaDelta(Optimizer):
-    """
-    AdaDelta method. The details of adadelta please refer to this
-    `ADADELTA: AN ADAPTIVE LEARNING RATE METHOD
-    <http://www.matthewzeiler.com/pubs/googleTR2012/googleTR2012.pdf>`_.
-
-    ..  math::
-
-        E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 \\\\
-        learning\\_rate &= sqrt( ( E(dx_{t-1}^2) + \\epsilon ) / ( \\
-                          E(g_t^2) + \\epsilon ) ) \\\\
-        E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\\_rate)^2
-
-    :param rho: :math:`\\rho` in equation
-    :type rho: float
-    :param epsilon: :math:`\\rho` in equation
-    :type epsilon: float
-    """
-
-    def __init__(self, rho=0.95, epsilon=1e-06, **kwargs):
-        learning_method = v1_optimizers.AdaDeltaOptimizer(
-            rho=rho, epsilon=epsilon)
-        super(AdaDelta, self).__init__(
-            learning_method=learning_method, **kwargs)
-
-
-class RMSProp(Optimizer):
-    """
-    RMSProp(for Root Mean Square Propagation) optimizer. For details please
-    refer this `slide <http://www.cs.toronto.edu/~tijmen/csc321/slides/
-    lecture_slides_lec6.pdf>`_.
-
-    The equations of this method as follows:
-
-    ..  math::
-
-        v(w, t) & = \\rho v(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 \\\\
-        w & = w - \\frac{\\eta} {\\sqrt{v(w,t) + \\epsilon}} \\nabla Q_{i}(w)
-
-    :param rho: the :math:`\\rho` in the equation. The forgetting factor.
-    :type rho: float
-    :param epsilon: the :math:`\\epsilon` in the equation.
-    :type epsilon: float
-    """
-
-    def __init__(self, rho=0.95, epsilon=1e-6, **kwargs):
-        learning_method = v1_optimizers.RMSPropOptimizer(
-            rho=rho, epsilon=epsilon)
-        super(RMSProp, self).__init__(learning_method=learning_method, **kwargs)
-
-
-ModelAverage = v1_optimizers.ModelAverage
-L2Regularization = v1_optimizers.L2Regularization
-
-if __name__ == '__main__':
-    import py_paddle.swig_paddle as swig_api
-    swig_api.initPaddle('--use_gpu=false')
-    for opt in [
-            Momentum(), Adam(), Adamax(), AdaGrad(), DecayedAdaGrad(),
-            AdaDelta(), RMSProp(), Adam(
-                model_average=ModelAverage(average_window=0.5),
-                regularization=L2Regularization(rate=0.5),
-                gradient_clipping_threshold=25)
-    ]:
-        print opt, opt.enable_types()
diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py
deleted file mode 100644
index 7b7d1a1d16..0000000000
--- a/python/paddle/v2/parameters.py
+++ /dev/null
@@ -1,441 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-from paddle.proto.ParameterConfig_pb2 import ParameterConfig
-from collections import OrderedDict
-import paddle.trainer.config_parser as cp
-import struct
-import tarfile
-import cStringIO
-from topology import Topology
-
-__all__ = ['Parameters', 'create']
-
-
-def create(layers):
-    """
-    Create parameter pool by topology.
-
-    :param layers:
-    :return:
-    """
-    topology = Topology(layers)
-    pool = Parameters()
-    initializers = cp.g_parameter_initializer_map
-    for param in topology.proto().parameters:
-        pool.__append_config__(param)
-        if param.name in initializers:
-            pool[param.name] = initializers[param.name](param.name)
-    return pool
-
-
-class Parameters(object):
-    """
-    `Parameters` manages all the learnable parameters in a neural network.
-    It stores parameters' information in an OrderedDict. The key is
-    the name of a parameter, and value is a parameter's configuration(in
-    protobuf format), such as initialization mean and std, its size, whether it
-    is a static parameter, and so on.
-
-    :param __param_conf__: store the configurations of learnable parameters in
-        the network in an OrderedDict. Parameter is added one by one into the
-        dict by following their created order in the network: parameters of
-        the previous layers in a network are careted first. You can visit the
-        parameters from bottom to top by iterating over this dict.
-    :type __param_conf__: OrderedDict
-    :param __gradient_machines__: all of the parameters in a neural network are
-        appended to a PaddlePaddle gradient machine, which is used internally to
-        copy parameter values between C++ and Python end.
-    :type __gradient_machines__: list
-    :param __tmp_params__: a dict to store dummy parameters if no
-        __gradient_machines__ is appended to `Parameters`.
-    :type __tmp_params__: dict
-
-    Basically usage is
-
-    ..  code-block:: python
-
-        data = paddle.layers.data(...)
-        ...
-        out = paddle.layers.fc(...)
-
-        parameters = paddle.parameters.create(out)
-
-        parameter_names = parameters.names()
-        fc_mat = parameters.get('fc')
-        print fc_mat
-    """
-
-    def __init__(self):
-        self.__param_conf__ = OrderedDict()
-        self.__gradient_machines__ = []
-        self.__tmp_params__ = dict()
-
-    def __append_config__(self, param_conf):
-        """
-        Append a parameter configuration. It used to initialize Parameters and
-        should be invoked only in paddle.parameters.create
-
-        :param param_conf: The parameter configuration in protobuf
-        :type param_conf: ParameterConfig
-        :return: Nothing
-        """
-
-        if not isinstance(param_conf, ParameterConfig):
-            raise ValueError("param_conf must be paddle.proto.ParameterConfig")
-
-        if param_conf.name in self.__param_conf__:
-            raise ValueError("duplicated parameter %s" % param_conf.name)
-
-        self.__param_conf__[param_conf.name] = param_conf
-
-    def update_param_conf(self, model_config):
-        for p in model_config.parameters:
-            self.__param_conf__[p.name] = p
-
-    def keys(self):
-        """
-        keys are the names of each parameter.
-
-        :return: list of parameter name
-        :rtype: list
-        """
-        return self.__param_conf__.keys()
-
-    def names(self):
-        """
-        names of each parameter.
-
-        :return: list of parameter name
-        :rtype: list
-        """
-        return self.keys()
-
-    def has_key(self, key):
-        """
-        has_key return true if there are such parameter name == key
-
-        :param key: Parameter name
-        :type key: basestring
-        :return: True if contains such key
-        """
-        return key in self.__param_conf__.keys()
-
-    def __iter__(self):
-        """
-        Return an iterator of parameter name. It is used by `for loop`
-        or `in` operator.
-
-        ..  code-block:: python
-
-            parameters = paddle.parameters.create(...)
-            if "fc_param" in parameters:
-                print 'OK'
-        :return: an iterator of parameter name
-        :rtype: iterator
-        """
-        return iter(self.__param_conf__)
-
-    def __getter_inner(self, key, param_type):
-        import py_paddle.swig_paddle as api
-        shape = self.get_shape(key)
-
-        if len(self.__gradient_machines__) == 0:
-            # create new parameter in python numpy.
-            if key in self.__tmp_params__:
-                return self.__tmp_params__[key]
-            else:
-                return np.ndarray(shape=shape, dtype=np.float32)
-        else:
-            for each_gradient_machine in self.__gradient_machines__:
-                param = __get_parameter_in_gradient_machine__(
-                    each_gradient_machine, key)
-                # for simplify implementation now, we always copy from C++
-                assert isinstance(param, api.Parameter)
-                val = param.getBuf(param_type)
-                assert isinstance(val, api.Vector)
-                val = val.copyToNumpyArray()
-                return val
-                # else continue
-
-            raise RuntimeError("Unexpected branch")
-
-    def __getitem__(self, key):
-        """
-        Get parameter by parameter name. It uses Python dict syntax.
-
-        :note: It will always copy the parameter from C++ side.
-        :param key: Parameter name
-        :type key: basestring
-        :return: parameter value
-        :rtype: np.ndarray
-        """
-        import py_paddle.swig_paddle as api
-        return self.__getter_inner(key, api.PARAMETER_VALUE)
-
-    def get_shape(self, key):
-        """
-        get shape of the parameter.
-
-        :param key: parameter name
-        :type key: basestring
-        :return: parameter's shape
-        :rtype: tuple
-        """
-        if not isinstance(key, basestring):
-            raise ValueError("parameter name should be string")
-        if not self.has_key(key):
-            raise ValueError("No such parameter %s" % key)
-        conf = self.__param_conf__[key]
-        dims = conf.dims if conf.dims else (1, conf.size)
-        return tuple(map(int, dims))
-
-    def __setitem__(self, key, value):
-        """
-        Set parameter by parameter name & value. It use Python dict syntax.
-
-        :note: It will always copy the parameter to C++ side.
-        :param key: Parameter name
-        :type key: basestring
-        :param value: Parameter matrix.
-        :type value: np.ndarray
-        :return: Nothing
-        """
-
-        if not isinstance(value, np.ndarray):
-            raise ValueError("Must return ndarray")
-        value = value.astype(dtype=np.float32)
-        shape = self.get_shape(key)
-        if value.shape != shape:
-            raise ValueError("Value shape mismatch, expect %s, should %s" %
-                             (shape, value.shape))
-
-        if len(self.__gradient_machines__) == 0:
-            self.__tmp_params__[key] = value
-        else:
-            for each_gradient_machine in self.__gradient_machines__:
-                __copy_parameter_to_gradient_machine__(each_gradient_machine,
-                                                       key, value)
-
-    def get(self, parameter_name):
-        """
-        Get parameter by parameter name.
-
-        :note: It will always copy the parameter from C++ side.
-        :param parameter_name: parameter name
-        :type parameter_name: basestring
-        :return: The parameter matrix.
-        :rtype: np.ndarray
-        """
-        return self.__getitem__(key=parameter_name)
-
-    def get_grad(self, key):
-        """
-        Get grandient by parameter name.
-
-        :note: It will always copy the parameter from C++ side.
-        :param key: parameter name
-        :type key: basestring
-        :return: The grandient matrix.
-        :rtype: np.ndarray
-        """
-        import py_paddle.swig_paddle as api
-        if self.__param_conf__[key].is_static:
-            return np.zeros(self.__param_conf__[key].size, dtype=np.float32)
-
-        return self.__getter_inner(key, api.PARAMETER_GRADIENT)
-
-    def set(self, parameter_name, value):
-        """
-        Set parameter by parameter name & matrix.
-
-        :param parameter_name: parameter name
-        :type parameter_name: basestring
-        :param value: parameter matrix
-        :type value: np.ndarray
-        :return: Nothing.
-        """
-        self.__setitem__(key=parameter_name, value=value)
-
-    def append_gradient_machine(self, gradient_machine):
-        """
-        append gradient machine to parameters. This method is used internally in
-        Trainer.train.
-
-        :param gradient_machine: PaddlePaddle C++ GradientMachine object.
-        :type gradient_machine: api.GradientMachine
-        :return:
-        """
-        import py_paddle.swig_paddle as api
-        if not isinstance(gradient_machine, api.GradientMachine):
-            raise ValueError("gradient_machine should be api.GradientMachine")
-
-        if len(self.__tmp_params__) != 0:
-            for name, val in self.__tmp_params__.iteritems():
-                try:
-                    __copy_parameter_to_gradient_machine__(gradient_machine,
-                                                           name, val)
-                except ValueError:
-                    # If no such parameter in gradient machine, then don't copy
-                    pass
-
-        self.__gradient_machines__.append(gradient_machine)
-
-    def serialize(self, name, f):
-        """
-
-        :param name:
-        :param f:
-        :type f: file
-        :return:
-        """
-        param = self.get(name)
-        size = reduce(lambda a, b: a * b, param.shape)
-        f.write(struct.pack("IIQ", 0, 4, size))
-        param = param.astype(np.float32)
-        s = param.tostring()
-        wrote_size = 0
-        buf = buffer(s, wrote_size, 65535)
-        while buf:  # f.write crashes with big data blog.
-            f.write(buf)
-            wrote_size += 65535
-            buf = buffer(s, wrote_size, 65535)
-
-    def deserialize(self, name, f):
-        """
-
-        :param name:
-        :param f:
-        :type f: file
-        :return:
-        """
-        f.read(16)  # header
-        arr = np.frombuffer(f.read(), dtype=np.float32)
-        self.set(name, arr.reshape(self.get_shape(name)))
-
-    def to_tar(self, f):
-        """
-        Save parameters to a tar file.
-
-        WARNING: You should use `paddle.v2.trainer.SGD.save_parameter_to_tar(f)`
-            to save parameters most of the time. Otherwise, some settings such
-            as model average will not take effect.
-
-        :param f:
-        :type f: file
-        :return:
-        """
-        tar = tarfile.TarFile(fileobj=f, mode='w')
-        for nm in self.names():
-            buf = cStringIO.StringIO()
-            self.serialize(nm, buf)
-            tarinfo = tarfile.TarInfo(name=nm)
-            buf.seek(0)
-            tarinfo.size = len(buf.getvalue())
-            tar.addfile(tarinfo, buf)
-
-            conf = self.__param_conf__[nm]
-            confStr = conf.SerializeToString()
-            tarinfo = tarfile.TarInfo(name="%s.protobuf" % nm)
-            tarinfo.size = len(confStr)
-            buf = cStringIO.StringIO(confStr)
-            buf.seek(0)
-            tar.addfile(tarinfo, fileobj=buf)
-
-    @staticmethod
-    def from_tar(f):
-        """
-        Create a `Parameters` object from the given file. And
-        the `Parameters` only contains the parameters in this
-        file. It is adapted the parameters are same in the
-        defined network and the given file. For example, it
-        can be used in the inference.
-
-        :param f: the initialized model file.
-        :type f: tar file
-        :return: A Parameters object.
-        :rtype: Parameters.
-        """
-        params = Parameters()
-        tar = tarfile.TarFile(fileobj=f, mode='r')
-        for finfo in tar:
-            assert isinstance(finfo, tarfile.TarInfo)
-            if finfo.name.endswith('.protobuf'):
-                f = tar.extractfile(finfo)
-                conf = ParameterConfig()
-                conf.ParseFromString(f.read())
-                params.__append_config__(conf)
-
-        for param_name in params.names():
-            f = tar.extractfile(param_name)
-            params.deserialize(param_name, f)
-        return params
-
-    def init_from_tar(self, f, exclude_params=[]):
-        """
-        Different from `from_tar`, this interface can be used to
-        init partial network parameters from another saved model.
-
-        :param f: the initialized model file.
-        :type f: tar file
-        :param exclude_params: the names of parameters that should  
-            not be initialized from the model file.
-        :type exclude_params: list of strings
-        :return: Nothing.
-        """
-
-        tar_param = Parameters.from_tar(f)
-        for pname in tar_param.names():
-            if pname in self.names() and pname not in exclude_params:
-                self.set(pname, tar_param.get(pname))
-
-
-def __get_parameter_in_gradient_machine__(gradient_machine, name):
-    """
-
-    :param gradient_machine:
-    :type gradient_machine: api.GradientMachine
-    :param name:
-    :return:
-    :rtype: api.Parameter
-    """
-    params = filter(lambda p: p.getName() == name,
-                    gradient_machine.getParameters())
-
-    if len(params) == 0:
-        raise ValueError("No such parameter")
-    elif len(params) > 1:
-        raise ValueError("Unexpected branch")
-    else:
-        return params[0]
-
-
-def __copy_parameter_to_gradient_machine__(gradient_machine, name, arr):
-    """
-    Copy a python ndarray into the gradient machine.
-
-    :param gradient_machine:
-    :type gradient_machine: api.GradientMachine
-    :param name:
-    :param arr:
-    :type arr: np.ndarray
-    :return:
-    :rtype: api.Parameter
-    """
-    import py_paddle.swig_paddle as api
-    param = __get_parameter_in_gradient_machine__(gradient_machine, name)
-    vec = param.getBuf(api.PARAMETER_VALUE)
-    assert isinstance(vec, api.Vector)
-    vec.copyFromNumpyArray(arr.flatten())
diff --git a/python/paddle/v2/plot/__init__.py b/python/paddle/v2/plot/__init__.py
deleted file mode 100644
index acd3013db4..0000000000
--- a/python/paddle/v2/plot/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from plot import Ploter
-
-__all__ = ['Ploter']
diff --git a/python/paddle/v2/plot/plot.py b/python/paddle/v2/plot/plot.py
deleted file mode 100644
index c18e63dd5f..0000000000
--- a/python/paddle/v2/plot/plot.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-
-
-class PlotData(object):
-    def __init__(self):
-        self.step = []
-        self.value = []
-
-    def append(self, step, value):
-        self.step.append(step)
-        self.value.append(value)
-
-    def reset(self):
-        self.step = []
-        self.value = []
-
-
-class Ploter(object):
-    def __init__(self, *args):
-        self.__args__ = args
-        self.__plot_data__ = {}
-        for title in args:
-            self.__plot_data__[title] = PlotData()
-        # demo in notebooks will use Ploter to plot figure, but when we convert
-        # the ipydb to py file for testing, the import of matplotlib will make the
-        # script crash. So we can use `export DISABLE_PLOT=True` to disable import
-        # these libs
-        self.__disable_plot__ = os.environ.get("DISABLE_PLOT")
-        if not self.__plot_is_disabled__():
-            import matplotlib.pyplot as plt
-            from IPython import display
-            self.plt = plt
-            self.display = display
-
-    def __plot_is_disabled__(self):
-        return self.__disable_plot__ == "True"
-
-    def append(self, title, step, value):
-        assert isinstance(title, basestring)
-        assert self.__plot_data__.has_key(title)
-        data = self.__plot_data__[title]
-        assert isinstance(data, PlotData)
-        data.append(step, value)
-
-    def plot(self, path=None):
-        if self.__plot_is_disabled__():
-            return
-
-        titles = []
-        for title in self.__args__:
-            data = self.__plot_data__[title]
-            assert isinstance(data, PlotData)
-            if len(data.step) > 0:
-                titles.append(title)
-                self.plt.plot(data.step, data.value)
-        self.plt.legend(titles, loc='upper left')
-        if path is None:
-            self.display.clear_output(wait=True)
-            self.display.display(self.plt.gcf())
-        else:
-            self.plt.savefig(path)
-        self.plt.gcf().clear()
-
-    def reset(self):
-        for key in self.__plot_data__:
-            data = self.__plot_data__[key]
-            assert isinstance(data, PlotData)
-            data.reset()
diff --git a/python/paddle/v2/plot/tests/CMakeLists.txt b/python/paddle/v2/plot/tests/CMakeLists.txt
deleted file mode 100644
index 4b6c1c8096..0000000000
--- a/python/paddle/v2/plot/tests/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-if (NOT APPLE)
-  # The Mac OS X backend will not be able to function correctly if Python is
-  # not installed as a framework.
-  py_test(test_ploter SRCS test_ploter.py)
-endif()
diff --git a/python/paddle/v2/plot/tests/__init__.py b/python/paddle/v2/plot/tests/__init__.py
deleted file mode 100644
index d1abfc08f1..0000000000
--- a/python/paddle/v2/plot/tests/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import test_ploter
-
-__all__ = ['test_ploter.py']
diff --git a/python/paddle/v2/plot/tests/test_ploter.py b/python/paddle/v2/plot/tests/test_ploter.py
deleted file mode 100644
index a75f853ed9..0000000000
--- a/python/paddle/v2/plot/tests/test_ploter.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-from paddle.v2.plot import Ploter
-
-
-class TestCommon(unittest.TestCase):
-    def test_append(self):
-        title1 = "title1"
-        title2 = "title2"
-        plot_test = Ploter(title1, title2)
-        plot_test.append(title1, 1, 2)
-        plot_test.append(title1, 2, 5)
-        plot_test.append(title2, 3, 4)
-        self.assertEqual(plot_test.__plot_data__[title1].step, [1, 2])
-        self.assertEqual(plot_test.__plot_data__[title1].value, [2, 5])
-        self.assertEqual(plot_test.__plot_data__[title2].step, [3])
-        self.assertEqual(plot_test.__plot_data__[title2].value, [4])
-        plot_test.reset()
-        self.assertEqual(plot_test.__plot_data__[title1].step, [])
-        self.assertEqual(plot_test.__plot_data__[title1].value, [])
-        self.assertEqual(plot_test.__plot_data__[title2].step, [])
-        self.assertEqual(plot_test.__plot_data__[title2].value, [])
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/v2/pooling.py b/python/paddle/v2/pooling.py
deleted file mode 100644
index 4881c27d1d..0000000000
--- a/python/paddle/v2/pooling.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle.trainer_config_helpers.poolings
-import copy
-
-__all__ = []
-suffix = 'Pooling'
-
-for name in paddle.trainer_config_helpers.poolings.__all__:
-    new_name = name[:-len(suffix)]
-    globals()[new_name] = copy.copy(
-        getattr(paddle.trainer_config_helpers.poolings, name))
-    globals()[new_name].__name__ = new_name
-    __all__.append(new_name)
diff --git a/python/paddle/v2/reader/__init__.py b/python/paddle/v2/reader/__init__.py
deleted file mode 100644
index 12efdc4a0f..0000000000
--- a/python/paddle/v2/reader/__init__.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-At training and testing time, PaddlePaddle programs need to read data. To ease
-the users' work to write data reading code, we define that
-
-- A *reader* is a function that reads data (from file, network, random number
-  generator, etc) and yields data items.
-- A *reader creator* is a function that returns a reader function.
-- A *reader decorator* is a function, which accepts one or more readers, and
-  returns a reader.
-- A *batch reader* is a function that reads data (from *reader*, file, network,
-  random number generator, etc) and yields a batch of data items.
-
-#####################
-Data Reader Interface
-#####################
-
-Indeed, *data reader* doesn't have to be a function that reads and yields data
-items. It can be any function with no parameter that creates a iterable
-(anything can be used in :code:`for x in iterable`)\:
-
-..  code-block:: python
-
-    iterable = data_reader()
-
-Element produced from the iterable should be a **single** entry of data,
-**not** a mini batch. That entry of data could be a single item, or a tuple of
-items.
-Item should be of `supported type <http://www.paddlepaddle.org/doc/ui/data_provider
-/pydataprovider2.html?highlight=dense_vector#input-types>`_ (e.g., numpy 1d
-array of float32, int, list of int)
-
-An example implementation for single item data reader creator:
-
-..  code-block:: python
-
-    def reader_creator_random_image(width, height):
-        def reader():
-            while True:
-                yield numpy.random.uniform(-1, 1, size=width*height)
-        return reader
-
-An example implementation for multiple item data reader creator:
-
-..  code-block:: python
-
-    def reader_creator_random_image_and_label(width, height, label):
-        def reader():
-            while True:
-                yield numpy.random.uniform(-1, 1, size=width*height), label
-        return reader
-
-
-TODO(yuyang18): Should we add whole design doc here?
-"""
-
-import decorator
-from decorator import *
-
-import creator
-
-__all__ = decorator.__all__ + ['creator']
diff --git a/python/paddle/v2/reader/creator.py b/python/paddle/v2/reader/creator.py
deleted file mode 100644
index fda5246d74..0000000000
--- a/python/paddle/v2/reader/creator.py
+++ /dev/null
@@ -1,130 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Creator package contains some simple reader creator, which could
-be used in user program.
-"""
-
-__all__ = ['np_array', 'text_file', 'recordio', 'cloud_reader']
-
-
-def np_array(x):
-    """
-    Creates a reader that yields elements of x, if it is a
-    numpy vector. Or rows of x, if it is a numpy matrix.
-    Or any sub-hyperplane indexed by the highest dimension.
-
-    :param x: the numpy array to create reader from.
-    :returns: data reader created from x.
-    """
-
-    def reader():
-        if x.ndim < 1:
-            yield x
-
-        for e in x:
-            yield e
-
-    return reader
-
-
-def text_file(path):
-    """
-    Creates a data reader that outputs text line by line from given text file.
-    Trailing new line ('\\\\n') of each line will be removed.
-
-    :path: path of the text file.
-    :returns: data reader of text file
-    """
-
-    def reader():
-        f = open(path, "r")
-        for l in f:
-            yield l.rstrip('\n')
-        f.close()
-
-    return reader
-
-
-def recordio(paths, buf_size=100):
-    """
-    Creates a data reader from given RecordIO file paths separated by ",",
-        glob pattern is supported.
-    :path: path of recordio files, can be a string or a string list.
-    :returns: data reader of recordio files.
-    """
-
-    import recordio as rec
-    import paddle.v2.reader.decorator as dec
-    import cPickle as pickle
-
-    def reader():
-        if isinstance(paths, basestring):
-            path = paths
-        else:
-            path = ",".join(paths)
-        f = rec.reader(path)
-        while True:
-            r = f.read()
-            if r is None:
-                break
-            yield pickle.loads(r)
-        f.close()
-
-    return dec.buffered(reader, buf_size)
-
-
-pass_num = 0
-
-
-def cloud_reader(paths, etcd_endpoints, timeout_sec=5, buf_size=64):
-    """
-    Create a data reader that yield a record one by one from
-        the paths:
-    :paths: path of recordio files, can be a string or a string list.
-    :etcd_endpoints: the endpoints for etcd cluster
-    :returns: data reader of recordio files.
-
-    ..  code-block:: python
-        from paddle.v2.reader.creator import cloud_reader
-        etcd_endpoints = "http://127.0.0.1:2379"
-        trainer.train.(
-            reader=cloud_reader(["/work/dataset/uci_housing/uci_housing*"], etcd_endpoints),
-        )
-    """
-    import os
-    import cPickle as pickle
-    import paddle.v2.master as master
-    c = master.client(etcd_endpoints, timeout_sec, buf_size)
-
-    if isinstance(paths, basestring):
-        path = [paths]
-    else:
-        path = paths
-    c.set_dataset(path)
-
-    def reader():
-        global pass_num
-        c.paddle_start_get_records(pass_num)
-        pass_num += 1
-
-        while True:
-            r, e = c.next_record()
-            if not r:
-                if e != -2:
-                    print "get record error: ", e
-                break
-            yield pickle.loads(r)
-
-    return reader
diff --git a/python/paddle/v2/reader/decorator.py b/python/paddle/v2/reader/decorator.py
deleted file mode 100644
index 44a6e34463..0000000000
--- a/python/paddle/v2/reader/decorator.py
+++ /dev/null
@@ -1,405 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-__all__ = [
-    'map_readers', 'buffered', 'compose', 'chain', 'shuffle',
-    'ComposeNotAligned', 'firstn', 'xmap_readers', 'PipeReader'
-]
-
-from threading import Thread
-import subprocess
-
-from Queue import Queue
-import itertools
-import random
-import zlib
-
-
-def map_readers(func, *readers):
-    """
-    Creates a data reader that outputs return value of function using
-    output of each data readers as arguments.
-
-    :param func: function to use. The type of func should be (Sample) => Sample
-    :type: callable
-    :param readers: readers whose outputs will be used as arguments of func.
-    :return: the created data reader.
-    :rtype: callable
-    """
-
-    def reader():
-        rs = []
-        for r in readers:
-            rs.append(r())
-        for e in itertools.imap(func, *rs):
-            yield e
-
-    return reader
-
-
-def shuffle(reader, buf_size):
-    """
-    Creates a data reader whose data output is shuffled.
-
-    Output from the iterator that created by original reader will be
-    buffered into shuffle buffer, and then shuffled. The size of shuffle buffer
-    is determined by argument buf_size.
-
-    :param reader: the original reader whose output will be shuffled.
-    :type reader: callable
-    :param buf_size: shuffle buffer size.
-    :type buf_size: int
-
-    :return: the new reader whose output is shuffled.
-    :rtype: callable
-    """
-
-    def data_reader():
-        buf = []
-        for e in reader():
-            buf.append(e)
-            if len(buf) >= buf_size:
-                random.shuffle(buf)
-                for b in buf:
-                    yield b
-                buf = []
-
-        if len(buf) > 0:
-            random.shuffle(buf)
-            for b in buf:
-                yield b
-
-    return data_reader
-
-
-def chain(*readers):
-    """
-    Creates a data reader whose output is the outputs of input data
-    readers chained together.
-
-    If input readers output following data entries:
-    [0, 0, 0]
-    [1, 1, 1]
-    [2, 2, 2]
-    The chained reader will output:
-    [0, 0, 0, 1, 1, 1, 2, 2, 2]
-
-    :param readers: input readers.
-    :return: the new data reader.
-    :rtype: callable
-    """
-
-    def reader():
-        rs = []
-        for r in readers:
-            rs.append(r())
-
-        for e in itertools.chain(*rs):
-            yield e
-
-    return reader
-
-
-class ComposeNotAligned(ValueError):
-    pass
-
-
-def compose(*readers, **kwargs):
-    """
-    Creates a data reader whose output is the combination of input readers.
-
-    If input readers output following data entries:
-    (1, 2)    3    (4, 5)
-    The composed reader will output:
-    (1, 2, 3, 4, 5)
-
-    :param readers: readers that will be composed together.
-    :param check_alignment: if True, will check if input readers are aligned
-        correctly. If False, will not check alignment and trailing outputs
-        will be discarded. Defaults to True.
-    :type check_alignment: bool
-
-    :return: the new data reader.
-
-    :raises ComposeNotAligned: outputs of readers are not aligned.
-        Will not raise when check_alignment is set to False.
-    """
-    check_alignment = kwargs.pop('check_alignment', True)
-
-    def make_tuple(x):
-        if isinstance(x, tuple):
-            return x
-        else:
-            return (x, )
-
-    def reader():
-        rs = []
-        for r in readers:
-            rs.append(r())
-        if not check_alignment:
-            for outputs in itertools.izip(*rs):
-                yield sum(map(make_tuple, outputs), ())
-        else:
-            for outputs in itertools.izip_longest(*rs):
-                for o in outputs:
-                    if o is None:
-                        # None will be not be present if compose is aligned
-                        raise ComposeNotAligned(
-                            "outputs of readers are not aligned.")
-                yield sum(map(make_tuple, outputs), ())
-
-    return reader
-
-
-def buffered(reader, size):
-    """
-    Creates a buffered data reader.
-
-    The buffered data reader will read and save data entries into a
-    buffer. Reading from the buffered data reader will proceed as long
-    as the buffer is not empty.
-
-    :param reader: the data reader to read from.
-    :type reader: callable
-    :param size: max buffer size.
-    :type size: int
-
-    :returns: the buffered data reader.
-    """
-
-    class EndSignal():
-        pass
-
-    end = EndSignal()
-
-    def read_worker(r, q):
-        for d in r:
-            q.put(d)
-        q.put(end)
-
-    def data_reader():
-        r = reader()
-        q = Queue(maxsize=size)
-        t = Thread(
-            target=read_worker, args=(
-                r,
-                q, ))
-        t.daemon = True
-        t.start()
-        e = q.get()
-        while e != end:
-            yield e
-            e = q.get()
-
-    return data_reader
-
-
-def firstn(reader, n):
-    """
-    Limit the max number of samples that reader could return.
-
-    :param reader: the data reader to read from.
-    :type reader: callable
-    :param n: the max number of samples that return.
-    :type n: int
-    :return: the decorated reader.
-    :rtype: callable
-    """
-
-    # TODO(yuyang18): Check if just drop the reader, could clean the opened
-    # resource or not?
-
-    def firstn_reader():
-        for i, item in enumerate(reader()):
-            if i == n:
-                break
-            yield item
-
-    return firstn_reader
-
-
-class XmapEndSignal():
-    pass
-
-
-def xmap_readers(mapper, reader, process_num, buffer_size, order=False):
-    """
-    Use multiprocess to map samples from reader by a mapper defined by user.
-    And this function contains a buffered decorator.
-    :param mapper:  a function to map sample.
-    :type mapper: callable
-    :param reader: the data reader to read from
-    :type reader: callable
-    :param process_num: process number to handle original sample
-    :type process_num: int
-    :param buffer_size: max buffer size
-    :type buffer_size: int
-    :param order: keep the order of reader
-    :type order: bool
-    :return: the decarated reader
-    :rtype: callable
-    """
-    end = XmapEndSignal()
-
-    # define a worker to read samples from reader to in_queue
-    def read_worker(reader, in_queue):
-        for i in reader():
-            in_queue.put(i)
-        in_queue.put(end)
-
-    # define a worker to read samples from reader to in_queue with order flag
-    def order_read_worker(reader, in_queue):
-        in_order = 0
-        for i in reader():
-            in_queue.put((in_order, i))
-            in_order += 1
-        in_queue.put(end)
-
-    # define a worker to handle samples from in_queue by mapper
-    # and put mapped samples into out_queue
-    def handle_worker(in_queue, out_queue, mapper):
-        sample = in_queue.get()
-        while not isinstance(sample, XmapEndSignal):
-            r = mapper(sample)
-            out_queue.put(r)
-            sample = in_queue.get()
-        in_queue.put(end)
-        out_queue.put(end)
-
-    # define a worker to handle samples from in_queue by mapper
-    # and put mapped samples into out_queue by order
-    def order_handle_worker(in_queue, out_queue, mapper, out_order):
-        ins = in_queue.get()
-        while not isinstance(ins, XmapEndSignal):
-            order, sample = ins
-            r = mapper(sample)
-            while order != out_order[0]:
-                pass
-            out_queue.put(r)
-            out_order[0] += 1
-            ins = in_queue.get()
-        in_queue.put(end)
-        out_queue.put(end)
-
-    def xreader():
-        in_queue = Queue(buffer_size)
-        out_queue = Queue(buffer_size)
-        out_order = [0]
-        # start a read worker in a thread
-        target = order_read_worker if order else read_worker
-        t = Thread(target=target, args=(reader, in_queue))
-        t.daemon = True
-        t.start()
-        # start several handle_workers
-        target = order_handle_worker if order else handle_worker
-        args = (in_queue, out_queue, mapper, out_order) if order else (
-            in_queue, out_queue, mapper)
-        workers = []
-        for i in xrange(process_num):
-            worker = Thread(target=target, args=args)
-            worker.daemon = True
-            workers.append(worker)
-        for w in workers:
-            w.start()
-
-        sample = out_queue.get()
-        while not isinstance(sample, XmapEndSignal):
-            yield sample
-            sample = out_queue.get()
-        finish = 1
-        while finish < process_num:
-            sample = out_queue.get()
-            if isinstance(sample, XmapEndSignal):
-                finish += 1
-            else:
-                yield sample
-
-    return xreader
-
-
-def _buf2lines(buf, line_break="\n"):
-    # FIXME: line_break should be automatically configured.
-    lines = buf.split(line_break)
-    return lines[:-1], lines[-1]
-
-
-class PipeReader:
-    """
-        PipeReader read data by stream from a command, take it's 
-        stdout into a pipe buffer and redirect it to the parser to
-        parse, then yield data as your desired format.
-
-        You can using standard linux command or call another program
-        to read data, from HDFS, Ceph, URL, AWS S3 etc:
-
-        .. code-block:: python
-           cmd = "hadoop fs -cat /path/to/some/file"
-           cmd = "cat sample_file.tar.gz"
-           cmd = "curl http://someurl"
-           cmd = "python print_s3_bucket.py"
-
-        An example:
-
-        .. code-block:: python
-    
-           def example_reader():
-               for f in myfiles:
-                   pr = PipeReader("cat %s"%f)
-                   for l in pr.get_line():
-                       sample = l.split(" ")
-                       yield sample
-    """
-
-    def __init__(self, command, bufsize=8192, file_type="plain"):
-        if not isinstance(command, str):
-            raise TypeError("left_cmd must be a string")
-        if file_type == "gzip":
-            self.dec = zlib.decompressobj(
-                32 + zlib.MAX_WBITS)  # offset 32 to skip the header
-        self.file_type = file_type
-        self.bufsize = bufsize
-        self.process = subprocess.Popen(
-            command.split(" "), bufsize=bufsize, stdout=subprocess.PIPE)
-
-    def get_line(self, cut_lines=True, line_break="\n"):
-        """
-        :param cut_lines: cut buffer to lines
-        :type cut_lines: bool
-        :param line_break: line break of the file, like \n or \r
-        :type line_break: string
-
-        :return: one line or a buffer of bytes
-        :rtype: string
-        """
-        remained = ""
-        while True:
-            buff = self.process.stdout.read(self.bufsize)
-            if buff:
-                if self.file_type == "gzip":
-                    decomp_buff = self.dec.decompress(buff)
-                elif self.file_type == "plain":
-                    decomp_buff = buff
-                else:
-                    raise TypeError("file_type %s is not allowed" %
-                                    self.file_type)
-
-                if cut_lines:
-                    lines, remained = _buf2lines(''.join(
-                        [remained, decomp_buff]), line_break)
-                    for line in lines:
-                        yield line
-                else:
-                    yield decomp_buff
-            else:
-                break
diff --git a/python/paddle/v2/reader/tests/CMakeLists.txt b/python/paddle/v2/reader/tests/CMakeLists.txt
deleted file mode 100644
index 107d5912e1..0000000000
--- a/python/paddle/v2/reader/tests/CMakeLists.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-py_test(creator_test SRCS creator_test.py)
-py_test(decorator_test SRCS decorator_test.py)
diff --git a/python/paddle/v2/reader/tests/__init__.py b/python/paddle/v2/reader/tests/__init__.py
deleted file mode 100644
index eca2dce114..0000000000
--- a/python/paddle/v2/reader/tests/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/python/paddle/v2/reader/tests/creator_test.py b/python/paddle/v2/reader/tests/creator_test.py
deleted file mode 100644
index 7fe374e663..0000000000
--- a/python/paddle/v2/reader/tests/creator_test.py
+++ /dev/null
@@ -1,74 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Copyright PaddlePaddle contributors. All Rights Reservedd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import unittest
-import numpy as np
-import paddle.v2.reader.creator
-
-
-class TestNumpyArray(unittest.TestCase):
-    def test_numpy_array(self):
-        l = [[1, 2, 3], [4, 5, 6]]
-        x = np.array(l, np.int32)
-        reader = paddle.v2.reader.creator.np_array(x)
-        for idx, e in enumerate(reader()):
-            self.assertItemsEqual(e, l[idx])
-
-
-class TestTextFile(unittest.TestCase):
-    def test_text_file(self):
-        path = os.path.join(os.path.dirname(__file__), "test_data_creator.txt")
-        reader = paddle.v2.reader.creator.text_file(path)
-        for idx, e in enumerate(reader()):
-            self.assertEqual(e, str(idx * 2) + " " + str(idx * 2 + 1))
-
-
-class TestRecordIO(unittest.TestCase):
-    def do_test(self, path):
-        reader = paddle.v2.reader.creator.recordio(path)
-        idx = 0
-        for e in reader():
-            if idx == 0:
-                self.assertEqual(e, (1, 2, 3))
-            elif idx == 1:
-                self.assertEqual(e, (4, 5, 6))
-            idx += 1
-        self.assertEqual(idx, 2)
-
-    def test_recordIO(self):
-        self.do_test(
-            os.path.join(
-                os.path.dirname(__file__), "test_reader_recordio.dat"))
-        self.do_test([
-            os.path.join(
-                os.path.dirname(__file__), "test_reader_recordio.dat")
-        ])
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/v2/reader/tests/decorator_test.py b/python/paddle/v2/reader/tests/decorator_test.py
deleted file mode 100644
index 6b680e39f3..0000000000
--- a/python/paddle/v2/reader/tests/decorator_test.py
+++ /dev/null
@@ -1,178 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import time
-import unittest
-
-import paddle.v2.reader
-
-
-def reader_creator_10(dur):
-    def reader():
-        for i in range(10):
-            # this invocation helps testing paddle.reader.buffer
-            time.sleep(dur)
-            yield i
-
-    return reader
-
-
-class TestMap(unittest.TestCase):
-    def test_map(self):
-        d = {"h": 0, "i": 1}
-
-        def tokenize(x):
-            return d[x]
-
-        def read():
-            yield "h"
-            yield "i"
-
-        r = paddle.v2.reader.map_readers(tokenize, read)
-        for i, e in enumerate(r()):
-            self.assertEqual(e, i)
-
-
-class TestBuffered(unittest.TestCase):
-    def test_read(self):
-        for size in range(20):
-            b = paddle.v2.reader.buffered(reader_creator_10(0), size)
-            c = 0
-            for i in b():
-                self.assertEqual(i, c)
-                c += 1
-            self.assertEqual(c, 10)
-
-    def test_buffering(self):
-        # read have 30ms delay.
-        b = paddle.v2.reader.buffered(reader_creator_10(0.03), 10)
-        last_time = time.time()
-        for idx, i in enumerate(b()):
-            elapsed_time = time.time() - last_time
-            if i == 0:
-                time.sleep(0.3)
-            else:
-                # read time should be short, meaning already buffered.
-                self.assertLess(elapsed_time, 0.05)
-            last_time = time.time()
-
-
-class TestCompose(unittest.TestCase):
-    def test_compse(self):
-        reader = paddle.v2.reader.compose(
-            reader_creator_10(0), reader_creator_10(0))
-        for idx, e in enumerate(reader()):
-            self.assertEqual(e, (idx, idx))
-
-    def test_compose_not_aligned(self):
-        total = 0
-        reader = paddle.v2.reader.compose(
-            paddle.v2.reader.chain(reader_creator_10(0), reader_creator_10(0)),
-            reader_creator_10(0))
-        with self.assertRaises(paddle.v2.reader.ComposeNotAligned):
-            for e in reader():
-                total += 1
-        # expecting 10, not 20
-        self.assertEqual(total, 10)
-
-    def test_compose_not_aligned_no_check(self):
-        total = 0
-        reader = paddle.v2.reader.compose(
-            paddle.v2.reader.chain(reader_creator_10(0), reader_creator_10(0)),
-            reader_creator_10(0),
-            check_alignment=False)
-        for e in reader():
-            total += 1
-        # expecting 10, not 20
-        self.assertEqual(total, 10)
-
-
-class TestChain(unittest.TestCase):
-    def test_chain(self):
-        c = paddle.v2.reader.chain(reader_creator_10(0), reader_creator_10(0))
-        idx = 0
-        for e in c():
-            self.assertEqual(e, idx % 10)
-            idx += 1
-        self.assertEqual(idx, 20)
-
-
-class TestShuffle(unittest.TestCase):
-    def test_shuffle(self):
-        case = [(0, True), (1, True), (10, False), (100, False)]
-        a = reader_creator_10(0)
-        for size, checkEq in case:
-            s = paddle.v2.reader.shuffle(a, size)
-            total = 0
-            for idx, e in enumerate(s()):
-                if checkEq:
-                    self.assertEqual(idx, e)
-                total += 1
-            self.assertEqual(total, 10)
-
-
-class TestXmap(unittest.TestCase):
-    def test_xmap(self):
-        def mapper(x):
-            return (x + 1)
-
-        orders = (True, False)
-        thread_nums = (1, 2, 4, 8, 16)
-        buffered_size = (1, 2, 4, 8, 16)
-        for order in orders:
-            for tNum in thread_nums:
-                for size in buffered_size:
-                    reader = paddle.v2.reader.xmap_readers(mapper,
-                                                           reader_creator_10(0),
-                                                           tNum, size, order)
-                    for n in xrange(3):
-                        result = []
-                        for i in reader():
-                            result.append(i)
-                        if not order:
-                            result.sort()
-                        for idx, e in enumerate(result):
-                            self.assertEqual(e, mapper(idx))
-
-
-class TestPipeReader(unittest.TestCase):
-    def test_pipe_reader(self):
-        def example_reader(myfiles):
-            for f in myfiles:
-                pr = paddle.v2.reader.PipeReader("cat %s" % f, bufsize=128)
-                for l in pr.get_line():
-                    yield l
-
-        import tempfile
-
-        records = [str(i) for i in xrange(5)]
-        temp = tempfile.NamedTemporaryFile()
-        try:
-            with open(temp.name, 'w') as f:
-                for r in records:
-                    f.write('%s\n' % r)
-
-            result = []
-            for r in example_reader([temp.name]):
-                result.append(r)
-
-            for idx, e in enumerate(records):
-                self.assertEqual(e, result[idx])
-        finally:
-            # delete the temporary file
-            temp.close()
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/v2/reader/tests/test_data_creator.txt b/python/paddle/v2/reader/tests/test_data_creator.txt
deleted file mode 100644
index a2a8d47d43..0000000000
--- a/python/paddle/v2/reader/tests/test_data_creator.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-0 1
-2 3
-4 5
diff --git a/python/paddle/v2/reader/tests/test_reader_recordio.dat b/python/paddle/v2/reader/tests/test_reader_recordio.dat
deleted file mode 100644
index a99a35bb829e066c4845d0b85b96cd1eb3a12491..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 76
zcmZQ!W@4P2Bs!asfq}sSh?#)+KN|x>v0q|9K_sIV14Bftj}1RiRKwGd%hQO<)0nHI
Tz>rH1B4onlY0Bkk1`z@P(}N7c

diff --git a/python/paddle/v2/reader/tests/test_recordio_creator.dat b/python/paddle/v2/reader/tests/test_recordio_creator.dat
deleted file mode 100644
index 17aa89b6796184407e83246d3f342a55a66b4a69..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 88
zcmZQ!W@2QOHw<B9U|?_oVlE*5&&I$|?3Wl&5Xor9z;M0c)+Lav0f;aJ5k?@w7(|$W
R2vZPY1|rNsgawGO1OWMk36uZ;

diff --git a/python/paddle/v2/tests/CMakeLists.txt b/python/paddle/v2/tests/CMakeLists.txt
deleted file mode 100644
index b4333ed530..0000000000
--- a/python/paddle/v2/tests/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-py_test(test_op SRCS test_op.py)
-py_test(test_image SRCS test_image.py)
-py_test(test_layer SRCS test_layer.py)
-py_test(test_topology SRCS test_topology.py)
-py_test(test_rnn_layer SRCS test_rnn_layer.py)
-py_test(test_parameters SRCS test_parameters.py)
-py_test(test_data_feeder SRCS test_data_feeder.py)
-py_test(test_paramconf_order SRCS test_paramconf_order.py)
diff --git a/python/paddle/v2/tests/cat.jpg b/python/paddle/v2/tests/cat.jpg
deleted file mode 100644
index bc1fbbd371216b9904b522ed302700c79d2e4876..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 57218
zcmbTcXH?T&^fefI??LIk1rR9#>Ai#!N<tTvCXfh7mm<9yL+=r(352E~9psTBozN1B
z2#QnzL1`k0I{e>v*34S-X=ZL#@+H5Ob$9ML`|P{V`M30M4Zvz<Y-$W3BLe`)u3vzE
z9{~mcT54(<YARY98X7t}T6#tx6C(oyBR4w-3y_~jP=KF@k55QUPEtrjR+NuV>JCU&
zUI7dS3rebKsVZv9DS;LL=SRrs=;#<37`d33xD<u?gcbikuYY|2Hd=B=3Ly$I5db+G
z83h~JzxM#a>wQv@{jUZ1KNlG}1tk?V4J{o#!}SAgtN?N{3JP*c3Mwi}%Il}2ufGRS
zvQe=ME9g;k*tpS%1OgS~vnyyt^_%-SVY5HPl-z?7=;&|U<l^QLmynbKNrRPD?x?D%
z8yFfHn?Ov>;I?-54vtRF9-a@qynPVJ;E>R;C*cv1iAl*RscFwpIk|cH1%*Y$n93?F
zuDS+aTlc1=wXMCQv#Wbx@cq#6$mrPk9Fa7?u(<SL`OEso=GOMk*WJCN<CD{~pXa|W
zF8>D?8Gzz{!@B<b-@yJKxY(|7kyBDqP}2MlE;8~b*KZ0oN-ALmYIZ#v8n-|W5yf~~
zpni5mb3dJ^66^=3d(bTX4KeT+@uUBN_P>z*zXO)={|njw1?>OEwG3dSAiF+13O0Za
z;ByTOwm4ChWV29^1<SQ9nn|g2BR~-87S!%}^e(V-ygR*((Ji{Td|p!Ca9t;x?QQrS
zP73>G&sL$D?W+5rz7snkNFI#nfYUAyBkLzPqV#6w1I?s2?(ioJO`>i6jf|D{&5RM_
zJ#OwIiU<W58%n{k{f4lNX**8j1xi<*-UZ%mW140Sih|KQzliJI?ITjD!pJ^gv6Zy|
z;sT0^JeRCGrvy<Q#~vqDV@=LxA%Y{%YP4ou&*c;buD}80S*S26a2y)tr-?#fg^e`f
zMyhZ!A7C-Anw%J4?S(#pcs#3ofl<<>AAW(MtKNQ2VeqUn3pSJNR$YWNun@s8Fk*2+
zmoS(skO#yh!~<33al=t#xTU~d_>&<i<w`Q=Dr6v-XcV8-Re`vNU=6m73vkYNK>2Vg
zGE*_qqSi=wkdqog%^pj~>Rv;GdWN(PZ@>=6ciTl!IZ9L@Mf&rdB1uumG5q++-D=Yt
zpFlXIFHC+tRfwxi8il6MhI%cH0dN(Ou(?j^@aiWx2^1o3PPLl34a8>~zWu_3YXVXa
zW7FlVj3j5h1?8$H&b8#FK&Uif^fC$sMz_GBRmviQ5U9AYvJnFmHx0}?6b>|rv@tbJ
zbC#;aQi6$=XV?*7-?L=h^@b+AKwDZdlN8XslnU;eN*`E_3WW|!lTgyiwe~4}0l+m7
zm3l=Jj}aIZQJssCqhN71FbKsGY($~JPJ)C2T!?Xwh!`k$v#ci8E>VEJa&o~l2`^E)
zpn<I6Y@0SSqy#R2J>N=M05Kj>Zn$0ud_MQmHsv~NkzSnKbz}-d;<v;NSd-u^&JooZ
zcQSJbl|~zw%t)NNhzuNp#c6>UUFN)U!EF$%Ft&!x6eY_@rHuitUIzmdT>SzHfN_rE
zYTREK;Va2$tG8?1vaR1-+8!sSO`e#k!>)stt*F&25vm+WW?BVa*)FL<HL=cAQI{GC
z@y+S;QCZdx87I`(!1|!A6^LI|z_^>JFoeeJLQ7U3COuuf$R{mDpfw4e&WUII2pL4>
z0y)bKRm<U{Lw7hCJ=4N*5*SrnEg5!hjz<fG4%~%;TT}AigHLh5Bv|9UQyxLf`=Hh@
z)_wWV+ejarL=LyBHKMBu+?~LG4X`$k8;1pY<-gZy1IEpot=~Y;!Mw5<^ma|nEohf3
z-Qd|aO$47J*7cXD0GGM-7nJh!VW6ez5*Qa5TnfK{yPx-htPkD{0nIbT^TMH8bxKfL
zw|z4z{$1UAR`(azo1K$({APj*D4$V>L1g8m4$MvMT$_ldW(X<PFP6YrD}HIm_sHZu
z4;xi{Cu?cz=03zo8s%fShuFXwR+nK~0rPC&pUT|I7J3*ni&^XUz!a2=HGoPE)Cb!@
zVJW?GN=VBd`cJUn);y79axBA;2qUJDGtj`$5RUY6Mx(^y)2>fbsK(wiyG;cvQe=;D
z6{>+N8cC_uS#*xVK*@-8wB5}h_P29>L(ukIKMC}*h)B`A(k){n@h&k}Pf{I70VX})
zPKJWENqhn+zyrzYi;>El(kQNxb+k1IDx8ibfU2M1i;}^E3okK<+B7CGJrn&S^RxgU
z7KU<mH5@3;_@6-Ilm*!6;)WoMV0sv+byx#DhX{sBWF<-DTY(26aGVC>`&OJhq=Z~u
z9WalXK7z3s6Dkj{BVz{g_#^b{zRDB0u~-5mHPPIH`XvV7GRH$DCq+iKT?l?DRZb;>
zG1AWEwDI6>M+d^#AV!XC3e{dEq;YCBD`j;|ob3dxFZz~Bp1nJT1^5K2454-}p%SX0
zub>jH%mp(du)yl9&<emFq6X%Ur>s7&;qen@241@22ohjQB2Nia9`hV9Tuu)skYfxp
zfL+*Ky+&4As4-L(gT`3sZ=>gIV8VJejq>mXyk{H?Lq&t}5CJbo!I=Ol7?(hElSo}i
zz7*b;RDN`K!~;?R!QX8(v*P3s@F^I`q7MMhfYzIvubI>hhH)#Wg;dAG*q}q?00e0U
zL^=eIxaacCp)}@Qu)=({7Wp3dYBmc$)`MCgEPF_L&XalMI?OCdbJ74;K2klDytEe)
z_eGBz2wP85RM!V>!gz|Q(srv$5c%Znd7;&FsQR=lo=|z70A*KE+fY$kuSyHo>vS%z
zVb04XV=+UN6HV(n`!EJ`If*ceC}OfwuPRR~<;4FwofKgNPeB%GiEc;^(OawJ)Htl#
zCUwm$0}T!W&^urbXsxRzPdNU>iaLoh5d#BR2U?0Wiln7sfj{xTNn?&&U(7%>W&zkD
z28ao8&%jworW3#C%_#sv+rVC%q3N?=KJIfrv6U5-Mp_7v`YZ+ku)!d}Ue{DqjFpIo
z=;;EV;5dEUXN@Qt0m9Y9CD%1lsK|{%@XC6|=?8PSyEw&?NVv44JPwPiV80~{Ze38W
zWE}=C!xzvnAHD13p#lMO$ck%f@kPWuB6$EJ3JxUhqCH^lrA8`QOhdw&#7witaHCYW
ziT;{ID2}skmPdHFh>T#moE3`wAgf?v<}!oeaHCtt_(&9E3dkTv!r(8av_`;n)$91M
zFo?9lI3xD;c|1sS*^t(~`ochL;B{pn`ndZ9l+)`iluA>%b>fDkfUN`lW=vez%0bpS
zfxIp(@)=;z!XLC<G*8Pl?reEliPtCAwjR#=*s~JpYT!(UDdcFP4?jYf-U>J#!3S4B
z^)P~%xUpa{a8eDumk*P@$N(|^I>h>Bgjexk<~&=lA}qW*R}cK@EQ`5@-lxW^L3cP8
zoDL;GSg5i0+h~<VFo5=&cr&PRbzWH{se+5BF?XW^>aD;6JhA|OF%#v7_z;xSk#Qby
zC<wevt%f+`0$&lE4uMHCP%tL7OJq9><ByQe5)Re_=UMkZH=_c;=9-dlqlchslxynW
zN|e!=V0KO2COY(xNrd#csmJ1Z=@Vv_Qyt<l9;0%fw()rsSrg=Y^67|;+4o29Cg>fX
zhy`i=16-bUH2b;iWw4EL6^U|xx9o4stsYd&$)E^bjvcOtR*wPQjkc5)=c;Y7&n{cn
ztsicPR#dZnYViA*wE|lE9YwOhIo7VB2vu}LHTZ$I3F<s%;lI-@u%k^pXkyWI*?K>G
zjsq?S&C>>8kpa6<iTI^tJlu?w*m#41<CdAUGM{ZHwhvKFB;3IPQrbi=XGSo(;q{PN
zVbWH$H^xrc!ZYaz0!lhDWAggI7!ATwqluj<cg)b3?7Ew(weD`Eh;bwzv41;Bqz~9x
zT|qF-kp4zNTfL6GR#;hN=DNVn?S!nnygdYX{3h@V6nL0dgaN{cCK82eU}5HiYZ54p
z$qO5+rmzAQ*9hoN15O26M}+vRIvzB7i{twA5lM5o7PTl|_B?p78IwwbsSpl4<0FCb
zXcEZhg`ZgX`?#u<RDqYXK_)>)BftT*+9)`rvB8EJ#0AfErX|oZvGOQcJ;Z0|RXocD
z)#lB6YLfQmMI?Yp>lmH{lB361Igb}&;7yal8nL#rpq5<}PKN^%&HAJ?Np(mz7zj(b
zhcLHwol{t^+vvQpT~dR})Fj3a8TNC4YPKukRs^X-g%9hPVKk9z;ST@-<}lU8?@=tg
zTrL8ASZ}qUlz3r=sD2ZbW+sJIHbpC0T{GYS-XfqfUJne&d$|i0mlvRkS>1<2-<&M9
z@Jasz+>FI}4V^ddJF~AUzshq04DwO?&*QjiZdtws+ZO$SJ0(og<}#~C#ygFHJER3}
zHDF&R1%Ir;YuYOi#=KTGGps~fLmFpB-FLTxXf-`UUd@ZGX5O=DeM~C2W$ZIG;^1IV
zA`ojp@cn=cTl7&o{?IAL_bU2NT_;MWvOh&^O646ROLc6;m2?lH`FZ+;_J>rvhoYi0
z8D}oNX<PSGO4<`1<XijyJq#{`X(5vdr86u-nT4gLpTGRF+jj!~*>-tN^+a&yj5D<I
zCaqJu>R02#cMl94N#&TCVXUZ)vx!1e72XX^vQQ}z(BN6^BL0Ov&rvwuY5oT|i<KVl
zN~$kGvj@>!oD%}4UOelECB{R$*D+S$b+yZo>J3q4jERx~!bp{AgbT;kQLWu(&p@kI
z-64WQOJ$LNxP-;tm;q4QiGkB-3G}V&oi$SH@k`k>oD!<l_#yRd(YrXqkv6kIO#C!B
zo=E~tG~Leoft>YmAtr+vG2uXLf11w}2nr_o8}RXzFqv2q<oSl{#};t6tkr7WJ(LJq
zP<=~g(0Gh5>~4G@lcl<y&orlcsf=wO+*I%MJ5(Z<Y@h(c1OfxIl88|=h}mk$sOl*{
zluAtc74`P+QsmsB<Z`KpY~Yni(*bM{IXBi0mW__0m;K<9;Vjb5{7Fa9%_>7hRKrrs
zio8Q;Hb@C{(Zf8r59e2zVtZyjZn~LET4)*^+auWXKueC_J0_@2<J7n8SL(7DHZ&l+
zXa4~7yY6@8DU#_G+F~ATO20s;3_E0FPcmINNWT~U8=ja98W@u??4ykDy>M3|mY-ck
z*;1C))Hd?|B3B@{yl{^y({suQ*z{I?g=}t5)vVm8Vw{7g^>Ob*IWhJ^h;n!umi(D8
z^E8kHD)9o7hcak^Ae4-e-y|d>^I14KwV<l+4O<t$UF((E_2xiHXHXsK+jf#Lm#tA0
z2}^WNiE{<6LB($w#l?Js-Kgdmx&!WOvjk3^esPrdjGNo8x>h_`2oIthqmO7Jv!bpd
zXxjVI=~dKH2`fV=!E1=p(=L1tk73`D6?KJu&32ts#fR!#V+8t<H95ja2~1&q&L#!^
zc4Xs)rN(e!;ZW_Y3=mj`jTlT&C4wYw2!KdKHK_Wy$^-<6u1?>58%<Oq$RXktH;>gb
zkkFW&@#ch8YBI8n&ygs0hS256M7y_^_dbc(1X+8YQFlvyeWAtopR^sleGrx3WPsk)
zzPl<(Na292N5|lxbY$Pr{tJVYaB<|qeEV9=z5c)irG>VP)GnBcGD$W&sgpj344tLl
z(R_M2r|x1RSingNv-cbPq4Os)*)yQLR$=3vX6$Djl3m$>VY^WV>>y6fNO08S>gTJx
z^>Z~bY`b(sPJ#5yRE~!bezhb9xD&C+BE})1lWuZxRX4Nnb~3qR{HKhhPFf!}`|Zjb
zN`s6FvXM=z<BB1zV&*?KVRM0+kJ=K|H!^m9oZ)Z#*_$wy`%0@Qo;Iw2Eu5@u5^VTl
zE!%3m@fYZY;~c&X>A$^cXEYy=_npk>%_fTtEWb|YT7E3BDZx|)a(w(xB~D@YUXXs(
z_OMM)r0tV1wpBxigr(s}rM8XDV{pdOW;I!D+r<9r?ySIje9pRn*DgWQy`M7nruMo3
zp7xe6+NL6`bBtvn&Y^%Owc_#l@gu%G%gu9R0l!Y`7oD9)lS#2RcP+uIsPvnUO7iew
zpzr88s*6~juaCiDY%i-M@mKo_4W1U{3Is{&VkIn5I-?aE414>Kc0C)eIvtF8!F8;$
z^z83GPa}H%gI;VNa~sb<*xC+vZF|+`6PX6F?5_wj&)n}0NsdDCeaeCbP8sSH@y=on
z;;0G*b5z^r8nZg-p+ebsUZwdnW+kXdwcXf|a$fvdE#x^Hy#cJ@VY*x&nRebcK3o{V
za1}WSnv$lW%pFl?qN}7BtL2RF`o^THZ=U?!;r@zH|5qLObzSIhb;y(?Z#-Bk{%-Dv
z4022yQun1M>j`UQe{(x${97E+$~i)fFVHkIk9p_9c4=zR1iTkwQ|OTae4>?X)^zcr
zN&fk#TH`Hzy8(~C<V<M2yTpSmmXbf;!loY|)rYBQ4D>KH-<}NqOjNGnY%-K2i`wCq
z)cR8oAwKqaQ^0FJihEoWz?O2aJAP4cuJu{M`0W=HR8bUtb-8=iAGcK(ceFY@|3Kk3
z%KHUUljR{_X-JDk>Ii-_w#<R&x9i^$-T;L{xLk5#%0}%F4>2xp&&e4=zkG=C`UeP!
z{2%^?FGoEJOo=`m^vSrL7p=4?@L$`DC$YV8c2;?A`f{AwrGL-bNcK2se=9?W9@2NQ
zThe~<fDb_LLcr;*j8tw_9PzBgMOLoJ<@o){uip#ND=U6nu;7`E*YD06QuAj?StjMz
z!T98kEq}&uM>K6!anMlecoh8rTmIvsr~d$}r<eZ#$-ezzZ+^vye6t&&$cR-RQRN<4
z<?~+X+W%;rDe%@$MIpnA9|WjE&n<$+--GjdYc*zsx!_i2!2*b+6R1doxCDX@s~pC`
zkBJnJ<|Cm<Gh*hY(sVqfgdvCQx|Es6H8R`ft5$Hhn`09%IEfU2y=7t0V%lBhRs9`1
zoDK2^QP@V+;yTO2z%3{x2ttP1O9$Ljr#ahQOB<-=Lr@Bp^YP$QC)G&<_u=^U3UYp^
zL;`}Y&WH|UF~?zwNujJmtdWF>{p)qYfty4+96^45#es2#2ZNC2Ws^8W+9}^vRuquV
z_9=mJKv(s^#RjI;+bQ4`Qd&0RLp6mN6WyNhDbW&`WS_>vU5ZdLL$nUL8>*_E*0zl%
ze!M$c1t`b6i-yzm-hZ1pN%WA`1LbcXoxZ~GRA+pnY_#M1(_p@xTd?uE4Uux38!mX~
zNA8r?UI}tnXG(?9`|5H&MLfe^WT$2n@(-Ys|H$KWZkN~-@eD7fGx*|<+7?=>tHqee
zI!wroJM5h-p5qER_fTsw`7IA0F;en|XKzm{O1RXXQXV<$GaY)U4FT7iS8kXZzKCKe
z4n9-cI3_@fvwT}xVk4B=biP;8T|K(z_fi)jahw+vp#J-b<PrG4M;_;_hZGGt36LuV
z)K{D45zxv+w0cCGJ9_CGmM=z2x?bjPnap>O?Cc(aD)iA^j=Rq$E{Li0io?cVOn!Iu
zUBx<AXa@Ly1eW?fCn@niJGF`bMH&}Hf3Fg(L9!RhPc=5JdH-(ws6DdK-nCj9m$}dd
zRx(ogmGOglk#&tRif`kG@|@$RAr+g?lxW6`cYh+shBpek4^w43WfJXSp&|Wy$dcO4
z9)WT+s^MOC3a_SvlD|Yr9ppor*rT>c_|?#6Si{oclK`lNAiZ$}&2e?Lm3ZrI)>i@1
z(v3q{e-f#r_Q5L<K~iX^S36{;_vPgI++cxAaE5d}!tbpsA2XBRGY5pfB9ymZfYf-y
z^Q%$O?HX_pBQxo1>wp8({@oz3Z=ax903aWD;ZBb-_Sk*<L!DV#^)xqkSmwzSHIdRJ
z&-vpDb(!F@zhl9|z5$&8JDGF|&&=epu~+Y<Ha!}J(|LvB3Ctx<C3uN|*$<{Wf`u8R
zT&FkLBLJ@+(_8K?S5?Z+cOh77ETipjv9!xK&BG&X<R+r!061C_l-<9>NrF}z4+2~{
z>>d)kwbf4XAxr<gno~<P*bd$&sLzqc<RGJb3;lCtfnJ*>3mL_@<i|yUs7$Xz3;Dxy
zFRkhVr>8sjt@Ip(XG_0eOsQ{bvLL^{#m`Vt#G7veiYOY9wL|_Pb<zt;p2VsI384_d
zmqEe9!%rl0F^3QT9KZ2ST)-RP^tQn{=#vr~DUgh)bgK&nvSrkh{{;%=N9UB~oAS}`
zZKGDIhjc;NdFlFf@fp%u7(G78U=bz>Qy@!s@%~;k@_083`{<_q=~?}#fIWS$R`M+Z
zp)DsUSh9Q*vj8Nj*nVMwwKUkz6y$nww1iN%;7I??cM0E;P%j!8*J-?|sJsDESu)ak
z$Ss%!OsDiuP~&%}sH|Onmn}HDFq>(J*dDEF6@Is9z@}9yqV%h@)x~+rgS)_V@pp5l
zV8zH;rMZ?RV|1~5j)<z%&ZXe4b!!LmiKgP|pg(WiRa2~jgJ*}4%we(f-Ke)r?YSa%
zybY$WXgy<v?Zi7epQqfbi{L)JlKyQPRMN*IGN}4#%c{F%<A+GL|LdRm+4C$mT{oG2
zd0?h&p0z#{U)nje?$G25Yq)Uj%&`;|ifrn;+Q)Xk%ch{yg{iLc0Aht5r{0+T^?dQr
z<;Mp}!7{TkY(~;&(9GR|5rcMkhrg-!&apt7zCG3WYkJj7d+e+1uM6XM`PgN+Uk2|x
zE`H4{d#@9nF=Yj$e?EByva838C>d5JfXcT^kPSCPBcG*P3{+_HAeaYiFfLws@fJ|V
zn%n$7AI*%ZXxZ6bpc+MOU#-{$1tGJR*@B>!<)pw@P-nxMaah`9pbbZYk&_InQIg5P
zaEz^M$fT3_sklR;dH`|<9>4ygiI@s>DjY}hm<0moWH=a@z{AKu{IoMxxQ{rZ%&H9b
z5VKWa7de4%kSDp&2Z!L?=S0B6;8%e*O4dt7*Z%3sEGAA2eyuj)R;v!IVg&G<R~Dw4
z3}Z2i@kf#m))^_Y)}VZ_rrini+~5UYJF1a_?JS<>p}f2JyllBETYRwX`R07u3lxsY
z#C>KLVnmM^uFvBU_HP}CsOiGj-|3Rgc8Zk0((7-0<m6tk`bAc6ID)1mT83A(zFkRV
zEJdeiGsbRK-#*)RYXUqblHeUI{<fgyJ&UlX@VoD&!>#O+WJQy<@#kUG(;M+lL1TW1
zC;vUZwG_n9nOOEd^a{Bgo@wQl-;rLX6B*pE;}WZMKQAJ(^kY!Y+feYWwI>t4y1f(S
zHe~^HW0wnA;TskJda?-xX&t#U)aAx_yp#6y$oS~!puop;y;d`eOnmB!V%K1dCZB6G
zvMXmP!~X9rrbms2<YQcjZLWb~vgK%bRf>HB@%M4PlV2c!v_&qxz34rD#bMz42~hO7
zneYAo{w77*)#y&@+DY?<d8yeP+J@>lE3ZEL;Db6~3qV=wZh64p=^O}reyNZ*!kb`D
zG3DZ>aC)dCJ3k)>xc5<%+sl>gv3}r8q!hkLwkAaH=)48Z9vu{0Ch?qO((My@9CsfD
zpU{2C&-;l-)gu85{Wl`SlX+j}Y-InP`4+8kmmHWXIsOj-3C`nXkUtaEd9oMic&?7T
z$*%PFZsuhtJgVc0;c|nsq>8klXk{GIPSlI`r-$$lYy^Bis|%75O&h)FHHjQ)yK^Jx
zMb3MZubJ<B&Xha<0d}R|3<r;@!HH!gdrslW*QCIwuU2U_KZ6ooLr3PSe_M)WdB=t0
z6HVke<3CYTNAWbudv^Qt`kP)Zf?J(=Wt8CIs%q!u(<LW!b@hL~Wbgj6F7M*{8cFI&
ze(og0<GqKNkYNQr`D@3K6SzB9^W4YVe-MS0Fl#Phk0i}1rf!muOX%i3t1a8opqBVj
z4ZBavxsP7Ba8CD{^!S#}Q$C?eiHfw|`tQ7)aJSH%U9KusDABC3-2j7otj&8=E8y1J
z{N3t>?iMKrp*j_bNn;|!V-jO5nJikQ3$zmol?N8YDYjNouf>X0rz1lO&07+$S@X4H
z|FlLj^SzY9ppXE^%~($8_P1f_1`BGdr$o|WkST6*#8Yc~*cd5%XWTL}48N%(7i~IZ
zU%DnnLt=iUGH$k2rC`LRSt>30GJfB-z#)sAY%08##OImu5)bozTFb1+R#H}!Aw7g;
z67|fEi&gh)D=YfsTeHPiG8=*C5>*&_qpj*5A!x+6xj@?e?9g1Y<0slBe!BaJLn=#z
z?Zkucx90+5A-P99QfyiuFukTku<eR?wIyP%d)*<6qv}0DVdni3fTc?j-m_^kXQyY=
zZI8d|r{{lA#7!y`m%mlO{{vL(nR&D@&x3k9T@$>sl|J)%6DeLrCa&0x-ogMHfdtJ5
zA?%-3e@gOYj(|@X1IncDJ~dpmi~7s2BbU-@BRs~<5*&!sgH*_*_a~5ZMIK!Utj+z`
z5r9svdRCF;Z)%(Ui|+|%EuZd<<PH0@oY_%ogXrMdUXz&0mP7DT*<`yfbyh_7s|d>2
zCySS87}9>W@FvZXa6P`|hVgAS&D6A=qmc*WmEFwE_1VB#?XWSf;phmi{;5aGZ}*=Z
z7ainViuw=r#Fhm*2WeP&a2T{XO(+=5oNfqO&$IbEE17WKXz+M2K=AiWr@-^Au}^$6
zId1qJBD-74dtukplQNi;-B0-xdt6h^vWB?4_hvd;V4hy-MCz;7ko#1rH&0|isNs_P
z^KwIe=);oRVt+Yn>pCBe(cZuE#(Dl1>~a>P@^s52LgiYy8Yxqmy{s3Jb<!JBMIl3K
z)lojJY3YW8m=e^pZekM{On;wb9clZ90=%$m9Be@D13|EABV~EsIB;^@R>r!IH+@Z^
zt&b0{J9<stNYU3=MPbMGs?FYLM>*hFnvNRTTP$}|<{|=EDTr*U#pDT9z@@?Cem6AW
zPrJLvO6R%F_!Ud&(uHy0luk>wqH4tx7!gwURfElg6Rg>0I4>*7c|&5TDZC;RNFbpH
zfsS+UVFE}IpX>|#d7$;EAjaj$X1h?+cFBN5Y_R}(V_GTtYrLv}UDd-nV4^WX36qzS
zb!2&90ld!OK5)DAO=J|$Yti*up_`LKqNefI+|nvJZ78r&($hAV<a!mQ6T7c>7hVk-
zCkTLk*U^4NMIbGFeTyj*Z7`)qipH-rN(4P@R)kH}18mtY>@Bq{mPI6Lo`bG0TFZEM
z-ja+OeEp0u7w!0=@t}lORQz`b+RwK1cbM!bXND9@JUz6}65{>EJ!J|tS(z8EG&t@(
z_(1mN;R&6!K@fG|{vv0Y+{=d`-fb4iSUJshEqUmZp*xRCr*6DH{5ILv;zRpDMX%=&
zo7F^fOx^8wTh9}1g*W9wyS?O8=$VBG1s>S9)`LU=21OMxI)q@b$I*3Sr#ymgk)@?b
z<QK*@=MM<hem@MpJB)G8z=fR}IDJ|pZ}xr0<+qxpGkCGFKWj;B`hs=YVxyTDsg5iQ
zp2PkI!%Vw3pV5A^v=w}Pid9Fo9JD<TA5r9p3tVEciYaq`2WR-n=XOG&lm$5`!RsvU
zAsDDEF5)@L{XU2PE&GFfEt{jdLO8~A3k7JGZCGT0<UUIOzFAXbLby1A-L{UfZNVy#
z6Xv(3KEF~gtF3denUJTQoPL+SAsVI*s4DnFv(UDY^68_(@NABi?1OE|&!|_kDfz3O
z_r59oZklwAYS+0WVR&*ZcJ2=Zi<GIkQ{*&CsANW0x5-u?7eh>y&do18L03m3y^2Td
zD#t^|`kQ<r+1m<dW0Hb}Teg)X&-8p@`vQe5&j~Zre}G4_GAcPLJvr*4kNJmpo&$1%
z50Sh@gYUyKOF}sL9N3#ap=s2tRj+8j#d2UeG>vAUTx;n=kLa_A`c*Po7M(G!PXz96
z34A)M@P`9lTkSeMVfz-Fx2g7(MFf}fSdN`JT=ml7);Gd(H+Z|>g}DZW<k$*aIda-Q
zcsYO{{hRMr^KlEWwsC@+l;M%n@5f9jGCQ6Paje`V<ERe22(*YdEHu_GXTwx~ucWP)
zDRgBSj>NQomKVu028f|u=QgPrEtQ<5mI&ip!r8)r-?_Us69G>7wwrvIyR{3dj^|;0
z4PWIba;H^Q8?a~14R-Sc@!T!#US!+Qgb<CU{QfB}{qGt-L<`xD|8%c~RZU;#z79^N
zhuk?wcI$cYu#B#}564oXV?mC8`%2{c>$@vn;Fa*7m9oJ`O_l*VZ;`GW{w8j=7DG;V
zHCgHK&L{M?0(wIZ%4uz5JDtlfOzzL?DovCGPXm4*S&I+4@K>q9%W(J7e3|-IZb~Z4
ze0q2#RUB4+3+=ZSEB)@VAKF|<buadh-Teg>+nYD}<hf|0NKXn6SjX#4J6={=#L8J6
zzruuEw$KrJ**t7W{O$@vO#SDOH#&T$uR`us-ShZnd~f6y_@NW&GG7#|7tMb#c7bi_
zlti~b5qupx)ruMOzL3M&g>6C_^Y4G)-i)E{w(sG-(m*v01=1IUmU0SV>&)JiO(cGw
znSb3RCE(8Ip(-AEnV=<IqVNe3%-Qu4m;B%UfyU9<eXGUmtGPj-z@LmSo+D&abbnL#
z?F)ldKI@bD&%FHk`|aBx4$9%E%~<xblYRS&m2se_Ozec_mKBk>YfqbAB*QB9m!pK^
zRO@d(Fk9W2;qMWRBp{~hYQ0h9aiF|Sael-`P-%u#XB+D85;j&0Wgz61$ouosd*KDs
zN4XnDlB*GJyV?H$`}H!ik56RC;?I+n-BgHOV|xK@SNA;w%hZ6WmbRVzfnUdi{|4H=
zJw0~UDAANvgiHm|?Hm!p&h29nXb8SO;B9uP;L+Ks^4MO12vvUH@gUPkz#3VW70Hui
z#&6TBUi;M*IsU8;@-g$fs0o0@S_iKjHf+`Mq4jNQU|V#cU3KzzWlB+IT4{*TsXNWC
zgQN?8+6x<F>ZnC=d!lc{y}NMhyZKzXq};Q*V-M|ow?a;ej7OQlV~`{iMqx-Em7_3p
zL;C#$JpP#EqXbW0$2>!dF_qXl)*C4zs?}!=`5+2VSLAe|#qkO3OKlm*PHz?N$5nIT
z2*x2d$nzlFNEJ)u6bKjeD7<e$LDVV_eh&M#j<H>sjjv3rsAZABgkxzi{~6wp)Pp7C
zZGP3}<G1*eWrmC=V7WtvcO^1x8O<zaR_M`f;m=B)q_85GMk~(p(PD%HDuNq4AdhfF
zuza?8n}-g^^W!~S2?QZ!fxXBDKl=9xd7(J+I0dr-9~+ZMbvWHNMl84S)^(5H7c0DG
ztv<6d$1G(az@&=t@|iLcG45Kg9Y+pgUJH%Zr_I`|Btn^TtpW+kCd##*N6bbK(5`l>
zsUoi>&8|)Gb{ge7-uw%7DbjtHkqg)xzXz<iWD~#Nj_&;jkZ$A6@8T?ZwtWHTGrB!)
zb#CS1eA8am&+N@Q<ONgw{%k5<^>0i*460)EyDCz^CRI(K?7@!nve7q1p|-bUsvr4^
zhfU6d_tgIZN|7Zo@8jjrJEb6D*FEi!exb-P?e*8olP}`g{27Bh`2Q?k>AVb4`YHpy
z9$JW*v+$>;|NCG9Qy%iXsHkeGUz%egb?uJ{CC?mKoTkF1@n%ChZs1LQ?ewP%YXgc4
z*sVrf&o|V{gi1JD4n$ywf*kiIVebwtjC5Pq@(J7S6!d<<ID&WQ@#^^rkn9)zw*;c?
zV@oIy<HVO{E1f-ESPG2%p>$OT5%w8=a#8ZM#yi$kon>-9t5UVtctB~^_~wx$>b3YF
zH+^N#Ws<(_wdZvBDC(Qo!hk;0AfUJMep$M0>7%eL!?Zb-*m=-~cFp2ZJ)6_yltp7R
ziyckwulb@4)Q3R&2#<HZq;_6T`$>6r%lNS4Yx#C@`VXMBH>hp*^aHu19#00#FOuR4
z){|O1)Ac51x*%#KRDPHfLfOMkE0}MhdJt@EDw-+nLDBK_ZwdYAJJJCyo7Vh5ZlAfQ
zx8yMwr-@C5UO{KKuRtAYN#M#uGZI56*cr<+c+5EI=&ICCcgY+(j2oSk2j7oYoVWa_
zkwK$qHXe8iIN7xsjP~p-Rl`0Mr634O=^TWPtOGdn#T=N~<x_o2{#!E@NDA9{y<z9x
z{9n$U<28@1c5^c)E|2a5ZoxnPM&@29nXZ-PiKL4v>sN~88%%!>Rdp=>$mkCpk%zb~
zz7{HTUO&A!^rI|~=Ye4J`(?`D2m8c@?tP7T*%W;;Uyl`U%EW!x2gUSfa*cTkWX5y{
z!&F{H6f!0yIeHsyzSAcOx@#G$^%9aN_U<>&Q&k1WyLR}YZnx+O3^Kl%+|z0`apvsm
z@fqrNDQQxDzFSQ!F;pgUU;FF?!;>#d>{Y;>`cFGcgSwWI%Fp~0uYQv-ri>@rdK3Ub
z=K}YFcGzeHTg|wZmEPt2Eujl*rkz{~)3d(mUv|p``t#>^K3RaYf18}uIXEB{%YVF$
z0T;4=nG@Gi)qxW{?z_1ARaO22sE3MsLRD7_<TgLEhpJ9#zh&8rm5z7!*>N1vFs3`N
zZ@2#kK%gWgx9sJjk7i=3F~Its?2{WhnHNLe;w{QnA?AKLOPc)eS+?xXj1-+#sI+W5
z{+wihQr1jD(wMa$EHq?cft!bnItrOT>eDg8s~Sa@PdClQp#5Be8B~b(Ize&F$*H?e
z^riPJq`q%^|KRLFR#dKLFi`x_8E#2f7$TfWPdOT+6d`LwM)A*)86|%r=dno*=!W2l
zK&YVx7R|~Wq{Cr1ZkDy=7nX2@?l|Fp<i!w@rqsDva<W{%G|ZitODd&u?vCzpppcC!
zd%r-cAPr-`SD4uJ(#zWl2GP{i?z%i(D2#m0U3Bk#(q<l7;W9oa@NOiA&I$YSoXQ2O
zB~Qb*;8hlkrwKC^g6NLRB1T9X9lz`+ZDxJ$wYJ|49~j}a9Nd~<xvkwf66+CvH|%Rp
z6}3KFJR|Lh*pq(%wqFaO|J4VGQipu1Nfv=|#eZAbxv*PW^+!zrr+M+ysyan~+pvB!
z<IvtQqh}Dzg8u!_LKT6XH8lcdn`{;ZleG3Sc_;4o(S**|6*R9;&g<#IMBh6|0R_8F
zD6~K2`{}<aaN*SC`v-V%#J7N&eBSClB2I{VDB%0E3Y{`x^egn5rS5g~I+=~iQCl+q
z_^5aJljiuGQ@(A<Tt+*Zw`}nWiEcL|-ncE&^)<Mo%sRFT-j*6N@w>7+KMPRE_Gjy3
z;&k1mpv}l}-CXOBu_=YpRqcvYwV$FVb#D~VSO?Z(4qlr?H<l*1bKa$mqgrjEh^Wfb
zUx4M=sX&p8zMYhxI*l61>5t5dmi!zT(J^*fnYfz^OrZ>TFP6ms&1){v!FlKlXJz~5
z^u71|I3_sxY3<bQ^7lgT%N-4_Ch16(AniO-Pqx?sej{hBu_pheotOpQU7_c5W}`!P
zgqG>~t3{_~jsbs>XMCA&0oOJwY@jdqICBuQ@m~KUH|fQT9x25mYt?#x=^hDapm=SL
z3>(}S^t3zQfwMc_o-<t8&5H8@4$?T}Q;QL^0D#xJ@TBb;8x%2D*T?NSdAxvz?FPoh
zy(Lg&$i(uDfxbhiE|<S8@EL+lVMs_#m6Jm4Ry_Q@x`UxdMWta~@xG<cuUb~y<nX$t
zLvKhCo`t&go;0>L(V}3u(Sli;a~*eJw%#T24dp?vyIRZNSuUr%Tye|nz((OUnSh(S
zHqy<mO0}=vOtmo^;u45S@2WLjukg@=JR{hJZP#SY+i%WsRpr8k@b)!uvmr5y%ETcN
z4A-oiDWiw!rv!u&!Q9v|j(B22ALtJPrg9i5g<iLsuSZ62ROb@pJCs$#D@HKsi>PqT
zxu(&210x9+3@5Tb1H*xt1tzBjI{=@yIo+hR!|}m&&AD#%Jy6rYs|Nm!<>~d@reR|E
zPLWf!<#r}W;+d40LXO{4kMEQ%+TCW%{&m)9I~SQB+)O;Y_0ZRpS9mz6D)CorvT4MZ
z&fBJ#;SlyapNxOMK0DPu8y{@c)J^F~??3bU+ttVr%Tf7isP&=tGtkJFp%7^jwo!ys
z2)A6Gwr?tYG`#e!@4H|ce@yQqhl4vG^{n-FPlR>wJEzL|e(PPL_gb7?IKMKCzndh@
zg%8gu-uVb&A?m!M^5fRKH4c@-^#{jZS)(KAXmnDj*iFDm0=9pU4KbjE-GLG2&laRY
z=e*`&E`0wkK|8$nCBgM|Q@T6%JTCfum|NKnCY+90FL)olabX|)*=$3@kujE|Y|9D0
zSHV};--$k@5S+qIezksV5VT?KxVUMP35&h_P^b7=?RQ+W7A-vU<ktejuN1}KI$@1p
z-`fg)<N$9=bfN2mS*3KftD2{4-Yww9-pos+6P5d1di&ukeDw8!va}Vrmz$SeLv9*>
zx`W!g%h_K;1T>W*<Nth%RywfVxT#o>wj!1MhO%0}hj%ufU+KbjHLo(GHWZu<(~jUW
z8S4{mZn!uDS@iQWuZ5`um;BTWz3zBME-LdER6Gupb{PreX}&P`@X<<hD~XO~j&d-m
zRv?iNg>rG!E{~5;rQVP9ekZ!8fx6L?j&0L_ULp2oISD;h!k%8z@laH|tkLD!Lt$|_
zF$<;_9W=)0LYtW<2acDo(xbftWoM0DRGz56o@CbJZ9e#Xbm@;!<gIu2G~iX}x8KTc
z`CO{E_B(Ljcp$887Gck@IxXuR4k)Bxy2$9gytJziNttJ(K8L-N`Jt+_1JtUI2wxG+
zV!^Md{e8E?Y4?6YHqbK-yXm>J{qubK1pI=Xw3KO`CL_S9$8Zu`7MbMST4=df;UOxh
z8}$dt)jr`T@<q>GK5(%{_3By_5-o2C_l;_)v+9iEJ(e1>sn`-To^WK44A{I(+;TfR
zw9nM#L?<!=tbpvIQ?W_GSNq!c%zP_q`93z7&t{nZeDJqR*?4u{iy^ZIojHPY$t$~6
zlI3I#(D3H|qf|EDEW7<V?6=kU^@K{p_Mc5cdGA7cTj8@;V&9mxTK)kRI0>s(|J_T}
zcu{t8X}`7j9lYe8DPH4Fb8W|-u`D!~uB3YFL}Uu;Bayy#TFUBC?83w^To<SUHBCDw
zF|twq?x^)D1Uk&u0i964TxkMFyk)g|jjxf6rZZd4d;E|=eUwEjR@A}0b5foyYbv&g
z)AE4vhnekWGQ|<Wl;@)T=R>^<hv3NQmFvE<Wxe}LjL^%$`-M}An}4Z0LVkXvuhop7
zUg)yRD^$}NiodWwcsXnnZK<Q_o_7xyNVvy~j2nmWo5$Qkz8SDcADJi#*`12TIi~0n
z;@*KIFBsve<2zcsmkyTbtsa945&jGA36Ijjcc=JQw{7*e0tC4Lp;vptq1z2r-v#tW
zh&V4if?TtKrf4WXI5(d3`?T@KGW$YzcI)GAZ>fh~s`nLxXBa~$-fcijm0J_XLj~;D
zPE18hIKHZfY%I6k+Hl{IjnPXHZ9Ww^a}K}S>F0+0fQX_?gQ9g_TEBvu?}`CR)=Hn3
zu;>+Bi&W(2pUItWZ-obf4vx{$*J_C9Y<%eH@pi+Vxi2z!q}nP*hJANr8LjZ2ordJ3
zrFwzaT8ypljV{tiFt9fNVm(IQs_5N@<*~K;TXs#cAkDW&zapa?BvOF9AM6+yHxo?x
zJ2iv%GjUpNAYnI=RW*24MFfKtrhjbdK)9<Apk(}M|1V88{Gu5`524n2qu`QX+96x>
zs42@s{AJi$WKQ@<@1w!`j~$~qG8?}#xh`YVUAzp-XYogxQj_sDGzsN#n^8Y?(qYsZ
zsqc+89t!TQ%9}LP)Q|#@w)tJ*pJnzJPwKf%e~3{%XrV~_#nUDdrtoR<W9AAQ$BjN0
zH`B|I>$2U3bh11a`t8Ud^7KD1g$v?lM^Gn%fjRz%VoX()wfhHFW4i~I+7JG=*|p6S
zn|;lfn^Xio+~__ZdEMFY^0C!}vmbwmx$$#Y@ih&8hLI9H4HZjW>B!akD?RxF@{A{j
zD-m(g#61&_J!$7{doS7bv1$*@%)IRK+*bU(@?!<!kDqn?%q-3uKmFwfAl6;Gdd`vb
z!Su@tGxeN~YjY8KZ|40zD@%O4dD}$X(TS{K)Bg;4hv1-+Xf(3|?+CP;@5Hg>!rj>K
zh~QVywdAijhV1gZ+LO$U12pIHP~F{jGmM<fBVchL(!a94b{%V9PL}KtGKHMov^BhT
zXKOQoy>%0koD_00rB-kdD@P*qR<>a(brBCHE0?Vce?-uZQ+zaJLMOtE95}KOH!ZO*
z<Z<un!#g!wdf~;8avJFDPCS!Luc327Y9hfh74=H3HusSD%ruv}yhEApDG!BLZF=Uu
z(Vb9kd~?y&8r+=75NNTAVds&AV`9Sb)<;$ZMzVBcs9L(A6#-@{Qi3T8!CTu_VpywR
zTwkJrE5Y_D$u<Pbe4ZcDcGVggLu9DaKgQQ{(9oOgu3m1l9{&LE+>6dR<VP@TkreTl
zFl9rlUWpHtoMOu>CEm4yJD9nS2zoDxz@-Fi?G=mJ)V;VXLT2cFBPh@yAf)q?&Wc*2
za0txAq(=8)){Rh_-&HmL0M2cpTqaBHl+(XOd#z_1)-~=uFAHMwTQyE#ugWrsdDPBe
zP@U7#HRWIICiGtSpu6jj<=V%t2w@Mwx`ZI@l<p61A6V^#6=pn8lcmvUc1XtT;>3uR
zi{P&9?1!6;Q+yA&wq+Yotfltif)=^C_Eei4rYUW*Rw90=y_)^_KQcJX+?2t#Z(Du_
z+3g#1T8btQMo^WX1Um{~vqPE|Yn%)q&_4j#G{tU~wC@8=Gj^?2Vp94S^S22-DR*)Y
z%6bn<qk?V3x~t6K_HUeY{oeVY<AwM$te7$l2tL}mKl4~ek1qBqoWu>duL7brNS8#O
z(P7;lrE61pq~M{K>CN_|*^%D{*`E^pQ%w-!TPR*X)4_TwQ;#eekKbr{J?*#+ty<Z+
zs0eISne?7dn%k|WrLAaUg=Qq<>C_|p;eO#lLhG@?8?t1-{>;~!8`m=-jc`Uq_2O?8
zP7Kip#SU7-U&hPu)dtx-dp0z2W(*%J!(=~<i<Y8-$GtVZ(MQ>4mo5dz8p}OOq1GGB
zs=sSmn00D!&gxH@3DUw7cqdL%;W$3eC#tIFUy<*9EFPNP>g(U+$n+n%0x4QgJSuB9
zV4S@(wIofedEF&HCFr^x<D5kQ0qkEn)|_jZ?7(R~5EfrQ_o}W=DG@azf9MD{YU@rt
z9rFE;)-AL5+RtLjOZf=75r9h>wp%uNtyB09aQ}5GfKxMB?Q*+Cjgpb9gI^;~?{C|~
zhpVi&KG*WM5DXqVzL^W)&>BJ3YQ88$p2aAfZCrR&ouJ}7H#h$#>U=S$WpB{P6ny<|
z7xR3jwZR1crZJmVcXZ0iZU3tNWgDD1Fz0ZSBBMs<@9VjaHqjma8Ug2*--6NcA5RFL
zX6VNv&x}>7ce%o*#bVwy8W}#adc-qZ_l7ZuEsR>=PoNFd*YoMYuxTH^Qij4Qrw0_e
z$M>f%h+2J<(eC-A0Y!&ssTA@^;-{DPYxN=e5w5YR*)y=($0|AIJ8@)JG^b;AN-_`3
zYE*esw3BwOkdH1LzSYtgv@(YAk6zW?=<s@(&6)HGA*#mQM|OQ9*|%>E^nDHhRNmf}
zDR~pjD@CK}t@;n3-8QFh{^3i628|W*3<v$Ji*E=jl>O}I=zz|>xUM&3cX>h@=1hlm
zCAoI@ZOqBj?f$#ZNq1j$Aj4|gNIWqj%5)=eb((nP6G(cx_>A<S{p|aQ;LdR^8Wkb=
zO1CcTIA(3*^z3?j8ZY9S86&*~@8{37n#j!Oy7*c*Cu?ve^aW-~tFpE>L%tX$Th5PI
z#%A?h)%C1geHhznN5`7AZA&XWxIm62u(<8oV?iPgPayVEX{#M-`*x@XJ!j+JU2LzR
zBRxF6=ae0XSuMf?TTKiewLdBT0p{&I$1Uo^>>UTlw(pR=*xg-3VU5xxJrpN`HQ!(X
zto5ycaTYP=i{0JKMvqm?ET=K8F1zPX6#25Ongqjt|8;D<o5V4EJa$9aMcqiok7=6f
zcYKyk7M~rrYj5|eY`~ao5yQ`f{~Rg!E?j>+|2w;lM-;(6fd<;Y_U}A0Z#U6i%OsWI
zKphbY(=vZb=L>lg<EH<P$3<xWp?Qs5>6p3k;U9n~el4k7uxbbfj@9Pb=$70S)T@xY
z<}W&O!aYelx~%c7vaVqRAp(iu#+6xUh|*PkeR|Ue7gIiI#pc4*+`W-{!QfwH_Eqhj
z&ul06g=w}wrW1Z8C|r`NWfHrDQy4Vd6}Ik5Y`Yz<#3Yd<t%_8(lP3gHELk>nhVxT{
zEf=lBxun7P!J48Ee;(N=zq$EGTEgydja-KM(}$`DrIwNTLOgE^vY;m^o`Ii~(0@50
zDOJ&yWmcrMF7AXaahDD0;ERQVgZ+_777v&IvL8~ibBm_iO@H#-(6Mo5iv8&nG<{sU
z)F>*^p|&UWS$~xzBo}tmX7RDzKLAg~&*Y8($DEqO(In4IgVJ~Dh`~^*HAZHxz|wps
zk{XS$Crtti<@~CWMA|FSj`?p-TOYFHFU%QZ_!dE6#eRvg>}=A3s}U0H_Yl;Tuyzp}
zQdi=!ry<Z<c=LH!H7}kx$%n0q+xUT<i%OW8LeKRCLcY5DCsEAo2oW7OZ7(<<<>P$k
z?KS6Xscd7IJfz<Ensbj0-9>xg;PIA0$US;?4@e%5-Ip#b7f$WK8LPp5r_Fex8_S1M
z7`ivGW$M8nH78<w*hLUQ2ijuk(ry%HTLRPVi}h31OQ<1uvi@2-ysOzDs!?jlX`1uC
zfAOmb4l$b-jrzTa6BVeu07z5=0Nzrj^W_Ow-VzJss8}zB>N7!@5zR~<c35Qbv~wc}
z=PK+D1;Z4s{p^P>FT;UnVyH9$>4#sCRb4jD=^m!tIR*qn=eFj0!;0OHb)AthhcX)6
zjuA**L@_}v7$XZaO1rczS}$f@ulYD+AfDBPZx6=2v9A(&PHV_1RE@6%R153kgkW<#
z01IM%oni!zIz_^&8%=U0#RWpixz5;}n<la?7#zltY3)bOk*-}RJB9ZRy~om(Mm-gR
z26r)GWkER4G>U^#8Klh<unF{G(14JbZtAXOXO?2=ycRyo=oy?TBcEw*jlHtlGZe10
z32Hr@(#|iFN(~qt)l#d!v&eXJhfCn)KL9|6{q9mZRRKtxrmpMbCCeTnm=|j*-uT9}
zi!p6APj&P%@(-w%%b=NkSK}&s%W6&IukH5(DxX2)qoFCK7Jsd02WRDbu!d5*sXxVB
za^|<MCBX|HW!ZX5ZSzjU+AHx&SKV{fjorCdgFe+Wx8A*^=XlvzU{hL7P3hiJY9mSC
zlX^}~_74z;L2=(xoN1|e@Rm3JSNzqGOodb7UDGyWuTU8U;pZmDAl2cp9l-)6IGd*G
ztzuS>Z9B@NJ#~`Jo+AJFoC3{vnXB>WJ}nS-z)@u(Ei1s?U*~)7A6i^M%brCC+xQ3F
zo02)|-Ya3?gQ=RWEI+N5kuQHaDN<#6udO9>Ny=yw8;cXqoyXHtw8~7a&XBivr(EB!
zKjcsS#DASNPYr>Tg4wD5^Lo7XIkvT`(~bAi)`f*#*v{kl|CXNJ={#_|(|*#iSmQDB
z4^a1v&Nh4T@L>0JNchh(x%S~9?WuO_`GiSZ%)SEO&ox_g{G!}MS-uHnM)Alm(k#T#
z=4fnX<>%Nn?Ozh_3V5Qa?QB_wUz|H}XyscGYimE`opFaL$p#2s_!xirT%j`)6Y3nw
z0{*76RpGC9r87N;RoecsqWGf@GW{xcpx#-hXhlGtX8#{xu{Q73Hy68n!bivV_JLAs
ziMfAtaC-F#Kcj3t^Sv%l$s0V4IQz8>T%!Ua;g9vx91ljVfgbzLcg?vzf9je_o3t16
z-Wi?={62l6tdG##eVFBa9wB;k-9qe%+!86%1X9KEr|RlhE!3JY+-kO_^P+Wpq9*va
zZs1-<(Yfd|dKD{KTD6gm>%XMC=l=i<LG!+7{?BRC_;L9R*FOfvx@fV{v7d5B{{UXM
z@8=NNqJz&1jC*9&QdYYrtG$txIXmXS+}>KmJ@Rw>%~evPQDg%tyI}X@{Bu{AA1s7?
z+qW_NIQ==QyH+-725^~d`kYZ9iK`5lwa7hNulfG~dZ@`eJhI4sRNOY7Z%@*ti*sqg
zM{~jS8%Amsk1R9Fr_2Yh`OQ_^Ng3MAo-zw#I3tzF>Hc$8%a1UCJK+BSpUSJeeqQFs
zIl%P>pN3fa4u2u*`qd#aPm&Ze0fHABvCwrqdXrFxSjxWF+(r~&<a+%*%|`N*At#kl
z!0t(>Mv*pVnL$<i#Pi3Xt5PAN$|08mINayvJ5&ID4o@W2xNPLNc591*$D+i)Fi&=4
z$u(TwNf!s@kmKe4=6!$K_U-kirOceFb?SH=dy;y7HKLK5MX4?A5)a;P+IeK$06*^@
zrl{OrX{d`Il@LI3sn9X?+B*JJ+lzxE6jEhua!<;D{d%9n6;Qm<lG0wP1JuTwPtXC+
z=4twZK{f5fl5DiHkryB9HW7jN;<IN<`D9`tcOySEj(~ny{&i+~qJ>FzX;}0Q!GAnf
zWb>`y^2_;4*uXb&@3>KN(8-O`#Ko3&UhJ{_x%31Ml`V|YHL-1gD)E&Z{VA~9TBCs!
z(+L-*@09-l<F%<Al1cK+Qbim9<0tSVniYy{)$r_Yu{%CKW6)KNLkfR{t94Q;eKItT
zG3DeQ>c{%jT9mvp?Ew4stB%JlS9fy^%3^;q7UX{jt$Q6&ZsRe^yI6N4IR60aS3h+t
zbPh*7xT=C_A(duEB0zJRSGLA?I{T|w?iHRRwsHwyn}^45f1&2N$=FC9Foqf9Zc(})
zr=?tjQCXlRl)C-rjZO&1b@v^rJxKkcE$3^>sg;#S`qcG5?FznYpxCJd<gUgeJqCIo
zV@l)#Km>Y&+Nk-NInO=Mtxx9psgL<+2OsSZPpwR3QX3<4Rhtad*PE_sBYd?}MKoHE
zfm0!>>e#74s35eamvA*y%d}#b%)-?Sw6EHoyCSK1+|-LEYdJd**)tl6n+G(k!x^g5
zNHA%}Wkj4wNMx#Beml|Ud)A6Zwk??ltvMvB23l2==9`#U)DnzTGRO$3hFnx;Vrw?S
z!b)<~%+ne2)P<^BGm-{8)77}B*nHFjgI;<(;$YP%HC!5yHxz=v?bfSEYG&rEN^wF!
zLg$)jYHPPO9{y?Aq>G4hMOX)lr4w;h194Rt*wQPgNNNL8zG?<Fp>60Ynvi?b5LBRX
zP|U?qPXdq%QPzMg$5W1#UOJdJy=FsT!lNRjV^I@KR}@r$)FG+rYCx28NWy?=tkp<p
zV^RY_G?dyCCsUf3bgHO%s{rvtMun%U<n*Q~!L2<#!$_*o2L#seSJ2TRwpKICFYeU+
z4<fsLGS#AmM5q<Ls!cM|B}F0)m8*cG01EAeK2&WKK_DP;ip{!W0mlc{v}A5iTBA5q
zoYYL{iOk<DnIHi9{t$VrY3$xrW?5P(2V9IF$JV-g+f|SftIw@rTw63paee{*F5if(
zS~?T#Wk&_#5w>{4ciM8UKAoy%x!4*r7zKKxkGg$EJ9~<_VU5>!1-lRie@?W`j7Ud`
z9CY2*oyU8PhIt)E03$pr4CClWU*}fQno+lQ)*Xqe%VHUu%Kh7SQb_#A^%Q1U=UC@a
zpW$^pgYW7+>4>%t8+$62Uf|WB0bQYp$jGX4A#`aMg1nRJr@!Y`ZCJgyW?!BxsUF|f
zkOHf#ql|`Ajmmi+l}@OWFb{=&*xon?>N|Z!F;x}UkKH~-b5pbhW&wFt8B^;(3^t7z
zbZqho!5s}tY~%$DzcP#!>$qo;{uMkdSxi#oE^w!zT1K~TGbDWKHjqA?F!k+8uBa2}
z#`#-f+O4=_gPx<Ppb(1~$-zBGPX7Sbr%kfLxEz(>spxs*^vL{aBPh`Dy}`iknoSKd
z2T-udfOC_^Ju0lL%Q!qPPCrlatv@Uk+n&7R^Zu2cJYXv-G5LWR_dn!Tb4A6Ow=;Qd
zh2v=J_!Cw%*ob)){lNMB2pInWKGjVS&od(rodFp3^!zJI1p?HL?o?!Y0LG@8ySa0A
zRxj?%2PcP@anGj|>wpSKI5-*hI6Xh{+M)K4NFd}nLG}LtIjsl(097;OoUqTf1#F$n
z8nMbq%{SXY8B)EEU-QK}Fv3OwiDg3n0FVGwHk;U7C<GQzgCpLoS&=o(%$fO45j`+D
zKl%-G*y(P>T3jzB^FGpkY>f8f{{XLAy1;2!RY^GA{(m7+=&@|HS=*ku&#5>g^35rf
zo5<D3BxfT%PfzEX(JMpL&Q9~cVUm1`$PNmRPi%jmtyPXds8sc1pRHJti$!o$?qy{I
z-{r}y%U$xwq!7wDB=>CK56u3RocdU}^)?qF5HFz^9{3pjD$Gy?W&j<i2TtVn{{TLw
zqlPWquR)G|g<DoTaJ*-bf30aBGdU5-7%#ggXgL9UpIV0C26x&HV;J=UuCFAM%7Os*
zIq6YKnUiB-x%q${%~3Zg8-0)}L<16V0q^hp>Qc#Sk)dtU;5g%f=*RjG#;<u%!yla{
z)-Cr`_xJjC_o`19S!CSBn}N{C=c&(NI%cgaYt)O}s9TRVP2xQ8RAuBoi~clypp*r`
z$MD<6c>GUu??lYfJFz2Z0020`gVWemSzWMIMgbd`^uQFZI}eiOix~0&?iZ)$RBo=t
zqkN<^%rTzGJx};k<u4RIX5X0o)$RD7=lND-w(eyjSNV_3?0Cnp^ri_AJ;)>(0dMZ1
z>V3^IB)YiDENdEeW;oB#cBlD)7#O44y_tqtsf^PfnbT<OPu@RYN}|!FWFm-wUvqa*
zD`{d^RgeyzhPhZK3fbF$d;0!0zo*=@kfnByN}Je;CUmw(<jT`--6YCP4m&VBkMSPW
zJ%VL&PBZC7p=zE}H+*qg!8;t)PG0)h#MulAk(DE%9@UuUugd;gyi(<%CnOF^pYIQU
z=UKNP#U|W>2+FYN!?6`D^)R^9nc<K+9zp!-+&6!{C=bd`<n}d~&q?NUQ_NM%QjM}4
za1SD)X>dByd7GD>$E{bH>8WZsi6s93Hc2@(Bob8?Vogm85!S8F<+f@oRH6P}wMkzh
zpTCMpkjK2)f29U)D!J>LYqkYiVA*6#?^DGtd)8QXCapk7BC@E=*`Upps&X;rtY<4#
z<Vwj%xH6QE!T6^ulir;qd8k())$_BV6M{`STWO?0>qGU;9OWV9vr#isOJbN?H2Hy!
z-OVXD6rJf9@rqY5k!6KP-kwb}_^Xa50z<&!f_b1(ift6ojCiV&w>4%?l}ba_nnYNJ
zLsi3Nsq(c(qZO)&q!r6Z_!T-nYLc}|uZpfyXB`VgsxmpI2hBWlQqhwfwK~)TgHtKu
zrHaI&lnk0`oYaV<)RM^*U{mu*1_dWMtjGpxVd9<Mn#QRh8J=oYH4l1lp`fu~9x07T
z4-_+Yp&_I$DBD8xr>M<S8$v4J)S&TABZ@$9YC#jl_rW7I*Jv?9#2=Bph~~K%n{xr4
zYpBwPlE7qc@9phflo_O6r)Z3*fD8(2N<C_<avh)JQM`t_B{QlqC6Y{<s_oX1rZ~k`
zl5bKg7b8@N=Vlql^Q_6F`HtmW)tifez4KK8^G4=is(No9<eG-fVpfrts8k2GFf&Z^
z3miNH*@q;3eX7|=iP^9g>`wsx9cfYvwK-6MJ2Y4YPxGcSDC3f0%u)s&(R28ZYMJcC
z#APN$+tfbw1%3MTq-$wJq1j1R<g1_LPkOTrq<hPBkv{e~C)|$z09v9*<Fc9}k|QZ>
zVZmkjz53I{E}>>vTmi$V=V|;pkN&k-R0;B98R!oq>FrVZIk@v9A!Gv*MtI2e6b7SA
zOsqs)dvw9;`qc4&>SJZY2Fj7f4ND&7KvjuV{uksCM2iX9Lb8H!xej*c)Ou3TE;D8(
zRmmH_fP;+vdsNX#>&U_9Di2Tp08!~omNL+lc@&U-`kn#pPs<ef^O5(!=O674>q_=x
z-3G*s_onTmk`GnE{$iKQ*(vA)3><U^+LYu4e1<B1W9q##O)a<(58cjwosZ%Csrd+G
zj8P7A(;(Fww%X-^8!^s5n5%`38ZrSOoO8x&I&5KhJ5LzT=~zvg!4XRjmwc0``6OJi
z2arJ={#ZYSCG3j~SRW@Ok)EF4(x;64jTdxcFnu~7>-mbz)pu-FAZ`dZeiS_-+AV6*
zWD9%dNUEo}Y<&(ZpqgE}Sz(Nk&*lFB)~-WMQ!LMr!Icl?&Obq2P1)O{x}1Z62ewcD
z0A7k)HY&+oBQhvdMDh+cw$u4%`I?3m@^ovt{ULpzae`Q{^{K7dTHVtiG>l0dh{s>^
z=}ftdu8k$XLo*`&Y(K3Q*5TxA>F7Sdha;-Drg-$MY1pmwu21@<BzENRKT=QOTd^oj
zJYh&L7y}2`Rt~bs3qqWSor}xQJ4Xt?i6`~+tD5&c2!p(skh4u?b>)q&Lu}8fP(P(E
zodJd=W&|uFEuN*Zz#l?IJ57;%#0qhT^A0+ZkMYG^*lg_NE%$~oo_XZ{ILSX;R3&wO
z#*NzOu^R*n3Xi%zZM`$ztSD2vc5+8<!=^t<p$Rd72Mlq$J-xqL(6-BD9FvfEqB`zO
zv?WMBbR+^7qK@o;z)=h-xZKD~MyKTf<9mOH^d9wUHCbGMFgeR-pzeRCy(G$BKoA8)
zUzE2X4_|VB!nRAPi+u|7G9B{F127rdIKciD&u{5f^4?QTC<V6gsxSxB*j138a*c@l
zbMlf$C%Hf8pQSc(trCC>5AQeeAE$riLj-0?6M12njnBzaK*{Z&YO4*C&gC}~yOv?;
z_5Qu9UG1Jp_V?=Lx{q)1sOQ+Bp^898aK!%r5znun^rVK_iF(S9h}Uxux=#v#cl<kd
z&1TAy5JY>IcUIey$NVd!dqwiemn05wJJxaYT%VUEjsgx90QEi3wF6AXOGW^&Cx3UC
z?mv*=RLg8v9%fzh(Nt&r1!(=5iw(*Uo`m)Kf!pe7t8EzJGTq5yNorwpIK!4X*n{1-
zAJVln>vRMkynUAyEOy`!Ku|uFqi(1$Lvun`6K0*D+Moei4Fb&;4^vt2KTKAPQX1M8
zFrKG3s_3VdMF;6zoXA#2w37n|DBYF4K8C$r&^%;U3*tKsk`s<P<k421oXF>rIVZ{z
z7!XGU9@NEPYUR{sL=tjXe6Pn}LsWJZAZEFe1+ge>(mHOW1DccqX^lv%(fd%VgT-0c
zNv5)5nk_O&`cj&SSoEipni~c<42q4#NDmYh$e~Dr6kcl53{w?&sZ>&sNfRN+rpeNp
z4h=i4W`!NdcGOP0TQy{WDhZDqRU@L~RM1XpBG?rhN>-hR7D&ex7;svnk;xRu$sEvK
z%OuhRkxVa24-^>4vRaco(+g0i*12MECQ7v^=}{$2I#sgBWEE0n;MLX~)GnE(g<~<I
zC9~S2BNd|2;8j^7^sP~>6^fwYs&zT2wh-LZs;9MEdKyXSJrt8x6>untI#k;+SvFwE
zg7Z>=PDts+aZzzxatgAGP(&)qQe?5)c&M0Ttyt|)$yt&_d8V3CN>??kQ9}SUy(!^w
zNW;AnLK8gF5k@E{6p|_^a+;ZVs1l?N?@mK#lygYf6oIJk(>|3cU5K}57elvmo`cig
zyDb(4C^H!czH6J%?H)^j>|76VUFEb$k%9oO`cbmEtRtx-C{@6!lH^sX8mY=#r%F<1
zR8@-3>N<l^^85}*etJ{gGs|#2eQGBl3=Wy-Dm_h68wCV}C6B9fkD#irZVaC@F=Ofh
z&013#enw3!fk+@m%lOh|BD_)T;2i$|vHt+pt4$OVVWf@t0}UTh^&RR6(OmF@IqZ3>
zs)a^;vIk?I{{USHgB*}VP%Li62<T2h{{RX>4$Nhlmlzr69^Xop4$uie!#zh@m4d$`
zBmJCpswAqrSLHcn9XRD_?F!4W*Co3UdHg+UK@el|oa3=LBAl@W$N++R<ls;QDhl#k
zqjqoe)})Yd7@;J01CPLpn8mzxInHtpb4?M1<P(o>qww^kG*tpbZZVF7>FJN@(xy+G
zfT~gZW2fQ%6h1d#g;VtYpW#grVRrz#eF5YD0IgAXa_p=;t474~q;u_1%B-YIAAA+`
z<FWke>XQ7BLym-Zs3cH|LCKA~4l!C=LNBSAcHl-Oi{%7hkMrr%@~TlvvdmPTUyywd
zU*}e?zD9UlfQ&eP-pBm=)@_(?JjBOuC3^B0bovlMtR<|GymX0sU}tOsRo9$$&-oRb
zs-7?j+{jm_Q(E(H3UPvR7&k%d{xzGbL|Ig+<$){p<DPx0q_idvQ#VX+^sRw#!aReI
zdfJvkEG0(gYK(nwYn-xU6M2MTnbd90Ff+#<mALW`vX^Gc?hJV923XY7*t)xvtw~)%
zNY|A?$K_MYGCk}u927WH$3j(!{-1~ySsaVhAH_6o#QJt0){%?HeQvos<jSrGr#${<
zn!1QRS-l?Rf&^yeNEz*c?f6z@@+6XdqG9(S-N5aS>P`pNu13JPQgAnQ_X8aMAJVC7
zVPb(3k{z}c9Y{bhzf#2oX#Nv8{6so?he>h6k@I_d56ITf+11uP-!M~_J(ub4$N9x`
z8hnJ@Fj;qDh-Oj4Hhy9~he7nNwlTI9L?CgT_Fmu5rDIF6rCk}8vhBD9u&(EY1BDf*
zYQ|>?9~mUE^d6?MH93|!#B2VroE2Vnf$RAGxE-rf%$Va_bRfuo)^or=-TXWI{U~1*
zg)fHYz+iU4ZUY{yTR8fk=qka881QhzsUTx_{{UK`g~V8i6MJCio~EaeJALv#fY(ay
zbFm-G35h;#+d}@Hn5wvUEXcv%yN-%IGu!Z}XSzi@R0cnw_Vp%|Nb(K`KU@LqDjb%@
za=phm3m(LjAnrovpRG1KR6&p!w`OCEew8{~p00r7pev9lk}?#MOJly@=qaLEPGYP!
zq79whn*-_wDH;em6C#XpgOYzrkhu+&j41x|a(@r-6p}0K+!S(oXW)G)+zJ;E+qOV*
zqqsjVe-WSNDx)lL@Gaag_lHCH)`XBdj1~ZQ!0S+$OdP?GM^-13`BHA<L?dY<F_1Cd
zrCh1V>FG@l3uZ+>R2clnPvukX!fsKO{_y_*JWzzmp_#wC_|}v%oD9}V=chGd7iIOT
zOqPwLlOnOT)k7XisruH03{Em?rmtr6!gkiGO`{n^;AbGpz>pO1YJ)~qh&@5YYTj79
z1eP^MRn8kdct2Xl)Xl3RxI$Y3tSopGM7cFN;8Zgt^&{8-sNWqbLe&c&nyDKmFrXaM
zbInLk%TN}<q3Slt17f5`$*2PZQ^d6*M7&dlCfZ(<TH|9}`cu)eDl^HUPdKP*#&1SQ
z-6{zeAc~tbgk)2TG*T$w=A(7ZT`}dU3^>hI%_7QzGl~p&t3f%<Bd-*g<jN9oY0BJD
zGMs}=2B{-CGSC?`{kRk;#XA{l<VPKWc&SyYJYtk)sg05WlSs=*q>6c9M-(7L4z*H2
zR^&MqRmU~FO`=Aq4M`jv=7`rOuEifTrpc2T99T6cms5!8+iM2Qn<3eOQI)BmiijMV
z7OY5maZ^S)s7D5-PAb_=4<{8AoYJ|)M9;-%ZAq});i>&<27YPi-K#`^oOGlkB7si>
zG?>U3VAN-mR%A?yghoeN5oi^0NuF^}>BTc{TPB7kJTb*AQkdopG5F9p9@T2sPd84x
z4h=XbeF8e2W{rFbC}j;@DoPnR7_1E&NBce+Ndq5B=t6~D0bTdd>4If>AFWqn4stlD
zrXA{|$U1ZQRt)GvWXL>Y=}`Ga+m!>{ig``^{KM-?p_jHt)~4m80u~&Azg*H4OksdN
zqMl!9<P+>EqF_!6F!awft~7!;;zPj#t*GPg4t-RLhCU7pZ9neSg;vH^R{C+xR7D{(
zw;&J+@6YQ(mdWP?bs+Ih$>c~6)|wRw7}|dkR7miw!*}XNX~75wE5YlGe=N|E?ci=+
z{2H7B4o(lgX{0NV1@5HeQZl*fFnvcuR!H5!VV^^h{V59ik_hY16gY=5QKMooFvHMx
zs1+U~xdVmzXCB6{H<(W+Bh*v`6=8?idnw21{{ZW#aV<z{vbQU=cR1{6Wnkdv?;qBe
z4UzXtXQBT9Jbn~Pn~<o^dhI{cHKJM)-lso&!Ltcv&+zB5J&&>ebwf=iS(zWH!32&8
z91Q(&kHW5M=^Mnv@Z&jePyV%M=oc}`B*<h1i3Pnw4*vj_GwY1kH`_XX#m<Gc$sPbX
zWd%?9{{TGJUZxJxH;@M603O^5*I=J13KW8J2W<EJ>nB()%7sS^FdH0>D`{+F4~XUx
zR!f%1A2V>?r2ha))s{jmnj^+T4^znh01xL`Vkc>%a!x~V3C;&>dsW%oODeLj%JHcD
zNF($h*E}z&(>q-gDk{vFW0wFD2VuebedAEw%v)5k;PEVnk9Q;dDp30+A#uWw$PvwD
zeW3pUY1xsS1Z;!b=LfHT->q!)iJH}$;zF0FbE_bH^am$D{bBjhs!Bhz(aSk)#~kD4
zIXV0RC+ZfcJ4)7zaJb&eFfoi0O*dMeQ5~!Z2}aAauVaosK2Ok6zT!&l5oxC0Nj9Qj
zZrTS-V0Nyy*>Mncz){ft6~*YPk-DG<EZA%g)!f={Sb=l{9ICP44%w)pRej1=Ht^Wn
zmk3Pn^A;VwG3nI%^Ts;VRwv6MJ-@zTh6~Yu+5Z3^%hs>zsuI`DXZ`6T7&tut057j!
zO3JZyR@$Uzxg2NM4$J)N=B&{WcTDMRV^xflBc7~#(`F<l#UQhP5$XQ`*QK{oP!?gH
z%tc5@X8ADZ`_gA0m1}JbZ>dQvLlEppQ^_FUnzbB201yxdrfF3qke2fcj@<E|U@B<g
zz{v;OoC?iFT(>9GV+U>q^&L8M{{Ysgj0)U3@5ew+YSRoKnUF7h9MbMpIZ)63e+smS
zxm<={yBI<0asGcQVFZk^+}*G`eJW>^x9|n`-ZNEM8-C+3!R^!eP{9i7fOC!B=Rc)W
zkT@SJaC(wOT~&1>c_e!e@u=h^gSWS(A(`xj+YcE0Y2q}5<xU&hnv-B8vuE+D(iFkM
zlkc2V+^bx$p(OjH1L@fPDn*QE?^b+!cV`FltI<ebgZI5pf2~q?C94;qJ4oiQ5xz6t
zv!m;d^`#yP;<Sr1k~wW>PiZp409Pq28#g9%-n}CJ-^`4ITy_1Qmnk7>NXl{0#8I}9
z+XAMNHd1#CRcQzVn!@^=$qg(3$*S)t&sv7!Vmclwy2x18h}joB&w67V^HG9vPgA>!
z3P@DRHF6`;t0xR9<aIJbXq<GY+*81&wK#~33QXat#+uzIk|siyr>`{%`qZjxGIk%f
zowp*Kqau`y3Rlz=tVYN&P%x`JnvK_*cQocR=3G^LRoT}fs%<`%ab;TQz6TVlywtmB
z#szYtiLx>&F-pYdl}CECE8I{ikKUxH<jyG!s?uSwX}wPrj=5@dXhGwpZ5C2w@g^$V
zOM_6wR*`BZ5@exxq##q&QZVAMGHj<WT8VfyETEc+M>V7oaVAyCser(wMZl+#RM~8=
zDdMWL8n-B_j;AyrNRPcrl#3o}RE&dG^b<t!jEb<S#X}NeuST2-$%!HlD>T=K9M$hE
z3S+MnBCKZ>=cOjfu<3(LBADz&u(9H{w8%!`nF5N;jB|n3wzRtmCVcUV(vs9^S)FBs
zXs9rA?^nne;;LEMn4<xXD%*UkU5{gUqMXMSRP4@s)v1np)_mC+EBV$e-4mDja&d~U
zBX8(&RZYVw%VhlxL~fc!z>FyMV^Jn)7?w`(+?M<*MP|lJ06ot&RzG>O>^Q5DgN?up
z`*3PeM6q%P&|DtjxT{FA4ns%n$3LxCiC7%5V1L>*bS_sUllXsHxRD|jGW6WP_thXI
z@Ji>~nwJVk2k{*$Bd`O4N%aScWRDpGzCRj~TqyaAkHDI&z$os_dm4?OV9bQ5?~_)9
z(V!Vv{pRPfrnpm(u6XyXnYD{sY=@I>J$U2ssx9K1GRMoAN$k>sqw}hAk5MQ~p}V{X
z$C!BPGyWo!qk)AP9-XTh{6++U9t(+EyIZi31N7aKQ^Bji$8n#_W1b{Nk3Ud<oh=wz
zOLF*-uMHTD&tuTjnVFY6dFg?Qt@fCuLApd`BO!xl_4KOp%3{FW%!BxW`=k6Q*f%qE
zl_2g7hhO2(U#Iw0T`x|GYp=D%Bc;NqZ>b-`N1++P9{&JJxv1@)0t1F8rs7BPs`@;E
z5<<a*sQANo2a)yX^{lM9?rx;{4eg*ZXOZ0JkSjM)h%_EsbAykl_N(gZ(DfMy8SH&3
zjrki{vy!6#AM@M#)ih-nsmMO`c!>mNL(gN*Kb2`d-!|NUaIq7C{{Ve+`Tn%qm=)e2
zqiFlX^CbTODv_af0RI5fE>1Iy4nL)51#6p1SFsFi8tLDHk%9_?{5d%OMyTDu!u|jO
z$C&DI=uX^zXEmiAs;rlClZAIx>Cg<E{$$f`;SaUg<7;o@u_WjDkIJ=b=#?umw=xHY
z$^dvPxRNu|j-Rb$Y8M}Bl1UV>F2xWa;Z%G5D#oDh&WxWTlwQ5C2|v!MM)8Rx2YMt+
z<)OeOh~zKRIXUT?=bo(E7Dm34=C0p4$vNGy@CTqjLDst4R%TGo_nQRcx1g?POoB93
z50o3XF&{C`I{yGKO6l#vW*%Nu7mdy7=s(7)lv$lwCdn@C8fMD24i8bBd*k}{tcW9q
z8MdZdpIq)I)YjeI)Nw${#|4ft)Sqk`#)jf{n6WGde(J73`mySNPfFTp8A43lg-AeI
zOS$zp1OEW6L6MG7I%hw1dB@>Y((7_<JKG(8V}bR>O1F?j56vWTuW|+l^ZvBfsEt!C
zqe+316nDwT{{UL2QyE}eV|)Jq`s)F;+ZeYoodW0Y$luHV0M%9_)*z7;i}1raJFsX;
z4cyxTGCn|`&YJvn4Ufkap1OoB(cxk24OvT=vGQ$Sp=JJ6S`(<OM^MSn(Ek8hZ=TEd
zV$Z%np>CDe<YaDo33L3#96zf48#S9L3Fimlml^at8jeB<IYtM+YLg7T3o!SjW(>Gs
zK<({FqdHa#_esxegT+#~XWO{T_+qrCaS_KSzCr3MF6QzR{0_u_bW}*RK^*6K<EMJm
zf?z<+VLZFOU`mfdDp;p0j2!)HV(d91q>aHN=I7tNX+YTmxyw1rumo1^yYMS`vlfeT
zAZ0a?djxw<R<;9iRj#916xB6yP9pv|n2<QEJBYEIb6tJrlo^Qvv!6+0#@fP3+`-dr
zoP@9vPDM5=a>s#PY0w@+_iDmu8wLXNO&Tg}<VCAD0ePpjvqU+rz-ZoD{oZO8(b`tY
zUMkAvIR%Z2;|8xH6&%+3=x26z@ln2(#rPZwhjBQW2RNpXde*yX#CE7&#fji?LUtpO
zvoO<0z{#u9T9q6vKwx9kP~F7h%A+LlQ^um4w(XvkVmnqns#_9+BTSRUP32@}t;1)5
zk}6XHCjeBB*t0jwz^K|1dsfof5_(kWrvubfyO$aa=9<x%bg2Hs-`zFW8x;eALs%$Y
zm1^edk;=CjsMyn()GXZADDn3KX+h$nCz=5@Rw$G;HTe{Rk*Ee&ag$R<pyHdiH8ev9
z6w|Pl<j9z-C^1b%sW#CgOI&yz=^LdvM-+ugtgJ(cT1Cx9iOpLnii>S(i;5hyH8*xC
z3sPk<jyqIrl_5OR<22$Zj6~+A5rImhigxT%lPQa|VyA_LRu4+AwC1xBp~fmUrmUG1
z7N!JJ4AccqYIijsTC`CmyhWrOS7oGI5gOoQ-nor9D3ESn{=Ifq4e|i0K(2~P+LaSb
ze5=;0%O(X#bB;|@o$;P(<j$C)wDSAbbhBiY-Ph8sOxftds7yj0qOxq+A*`ycmXsX-
z01j!NXk6d`F-;*@^cnT11Oaj-!1m2UMWRKx4fluSD%4w#%%G3>6;y$k$&I12+}5OO
zxEbf@aYRr_mLYi+0b`$R)S;we3lOW=0h3U9>kz;g_a>9=MYvp(>(Eu1*t)@S&g1x0
zPO6K%ug%zh^{R{A>;$vt9kOv$C)6f<u-->}bg6X`DN}5{G|r#J$^Pf{tX*2@#;utf
zrgPu$AO5Oi-OTPhypz*CaY3+ViOh#{9QDO0%b!BrRCQ;bYnsf~kUYx}`H#xd63zY<
zhZd!JmdkfN#%?9%0IQ9I{(5@nu>AcirSV;iM^9;H2g!u)a5|Ho6n%PqGhASm?qo7a
zjm^#gz#f2m4wcfO7{>M`&9|XtH4CrexVY<*M5G$i`$nf6YEto~1Z7c)Vt*0O>scUS
zG7mKy-Oq0m#}uxp!vt=}y(seD%uUGIvGK&Vb00R{kx$-e<Y&*m)<4MCWn<#FC6qw3
zDRF@Ni8w#qAoRz7f2DZ!i!xowbjOIkNP+$-I*-70{Jm)vo+(U<?622wJpOf3rk2FU
zT>GByHRzT<G7<pJ8FR{?>J2gr*vy5Mwy^^seRG_8`q!QKW5n+Bt)1%(kU~m&pJQEF
z-30rYxxos<qYk(};QnH=wxIMn$gcr;qX!3YU>E3rtw$uwG{{4Ldk5Q$*F9&bI+&E?
z$XE=4+ZgBbHKA`hNopKFAd$yjrz81QGBz(NDV+mjDuiR->q?-4p}&ST>EE1V^%Th5
zJ4G^_=Q;Y~ts)k=X&eoO0*rS%s8I&NZG&3KiSutevFV<1RIcIQZU(}qYXOgKr~GTC
zir5F=)HXjtYK`<NA1np|VUMV-B^?DB>T(E-Rhm$Ga8D$1SM?^Ng2ZllW!&4@<T>m!
z$JZwnywL5Dvw^+F3I70iW~oa4TE>HsjifiOFu%_=k0GzLELr9+owk-F@t%Mjb^ic9
zm8JcpakX0q9P^xig>usB2Vv!i3xSTLo&`a7sXHqwouR%^0psiXev~aXA?kEDdZb_#
z8xS`Hmcc*Q@~rgMp-_JA#Pu8xObX>DpD5QC3{o{8jz7=;0A9I0TV0y?`A`!Ly6^xz
zx4AUbT$s_`-~2$fv|@?@W&BC{dJ5;SJZEu!a*(D!+gzM|Pxx08C;B=wj57hBQJT=P
zz9^xwv@jU}4!wBvtyMK^h0=CL*wpXT86q&T&niPB{y^aVAC)#Q6nShK>raV*>d06V
z_2e3%bkR#YG-Zk$U>a<V<|79In2ZKJe?IjNFujXaBzgvk;)^FmF-ssr{4CpmKZwY#
zy5CTaKnTFy$YeOr(2DU53f<(AL>a&*73dmQ+HBxcCS-w+%zL5rALp9Nad3J)3(`vO
zbI>BVE`Dd;eOLm2I<S%|@JUnc#d2`!h9>g&41vmmKQZbLTGEG5-bPO!m1ob~9W$U?
zkN`Pj?~Kwtz-)|j^sMOT7#v`GQ;dL~G70U_)uT3vWt%I8WeR;y@)bhrV5bBr<BSTO
z{Ikgl8`LN{s2j;DRmdFiniwM<L!6dyp7k7e3BPPxn;rXBo7*h88wcf3EsvD>2NgCc
zvqHhoaa;D%aacC-F~G^iYukdV25Q!XGg%uVm6Q=x<9?jglsT;vGia$Q3V;an&MNJ~
z{JE@Y+@l;CndoaxCSL^yKq_QKJmRsE=LeHklK27Evgb8)%@Yc@Y78g`097@;2OQMN
zKI7J_oe<#1Gzv!oqHARXrCHt$E;CNzv7Xk7dsHuA2*xX4U>wsJQ?O`ZzM4HMvuR+S
zoL5D#R8jy1CV@pW5BoVl=}ucUQ_{3pQyD<(P|XxkPstrIQlttg%zY^VIp(J2@}hM`
z>?%PPYADW4T2`in)rJVb>qzOIf~Np=G#p}?Cz?G%%bKvlm=~JC9tt4<(uL-y37TIo
zj+LQEgS`Z1rZHlTqNR<ZB~MzIB>+^YxuhhCA?brtgv~Kmik>nkTACyqKop<~QfL(H
zQ$#YZ4MKWVw_U=kNSLOHv0cqZUWqa)A(BlnW4C0~sA-D4Q@T(nlWxT@y)$(tY1pyF
z=vgvqCp1P5X;?+nb5d@|sE~0_98$0WCWQOcq*BCyxaOKeH+0sCO63DLrFC}93UHf6
zU}-QQP;r{*)PREn71<SNnn4Ve@G6Y0oc60S=BUir2Njf=)f7{>BeAMYa8sZSk7{+y
zrzBQn%QU<rZ62Ad+>O#Lo0!~u+f@4sjhr#aEC-;fkyuN(ZgIEQoSLyDk=ud>N$*2p
zVnw;wK*`VhMy^A5BLR%#@fFTJ@M(bB<Y%sUtAB1g1~Lb`0aBRUO}Dy)01GL0_s{aD
zZWCdA!}Q1?ezlO7U<~j1h{vF$^GlvgKsQoY3LLIA8yg17S(tfp05QV!AXT{Z`2ylc
zEV%42b6NUz>>-GbNSqF&Fh9>Vx915HK3nz1Mn9qEw1gwMoaZH>O>E+fWt1*>z!*Q)
zl-nqTq+z)ne7O~1tcQg2zJP820QJ&{Sd+Ye-)zLzv5H$5%1xeg;|oJJmvd^X^93i4
zK|ZzS)&Na!f-{bz(!H<579$0NDb7LL85#co>(`1aO{ME|LozgI>~WEding1JE;^UI
zfD<Hz_RUe27HM`U%Q4yqsjc|qNfTi!mN^89%fEt4GnFIfQUUt)`c@I%<@p#bx9yT5
z5MqNUr~Ax#X8cd-igatcig%RG<N?V!HKA>4Vvo#`K74$De>&)t?nGhAjF}Y2F6AXy
z9FJpOf#LmE`%=L-ureUp!1V|G^IRq2z$Lgfr{S4e%K99!Rhxz${i$-^Hyhr^a$s+k
zNdExV;DSN*&-AO1lOrT*avAaO*R43h&uqa)BqL$K>T%cdH7daH%+3J$co;e72m1d2
zFM5MEYFo1xK%+l4Fg-E<0M@N&NGbw=c7S;4(?8a;t>f7d$>kJh9CpQPTSu?}y~fk+
z$NvCaRisNcAOp%3LjEGfu({4gJ#o!wqHO?n0~HkMxer{9mAj4Aj8v@7UhJrpf;cL~
z{$L;HpTe*;Yp*5U2nAPw-FfJLophRvUo+$dWys^d9Y^7c<!@p0%*wfPBT@kFa6$a|
zHO|(i>6s+DI5Hl0=j)D#`qLst@=`}5=I!m%HELKHBs`J{+A-<Wf1hfjt6Vjefs+Gv
zz@z=|TAN&FHLvPIYlCSEw$=OiJbf!C%i6zpCZn|*=56OKxRQNqXh5$9^%-UR!mX}d
z2APF*88+^Hvr}0_O}u-Cc&Vid6@dWmQYrSx8CEsN$^fS2?pQfrJ<BU^l^ahK=8UK~
z908tccwr!9gMrO9@f_`rCH>^2Dvx@`I?Bb^*6@v}ds${HoW2L=UV<z(TPE0x4?;hY
ztep=@`xT|7n2nA{nArV49{&K1b}btuDjp%XjynGUpU$+Fq~6CrCX!~iA+5=H@=)aT
z?lbz2#;QiL+qTmaf#`ExtWn0u3}7pEAnvMn)^cH=%ai`uw?CH^&nkD+)-`%F5)0;u
zx!f0_$m8{`IH!fvK48J>0jyidp=|#7-NESV)AOo=S)l+J!CuE5D?7NFSxD$E1X<jG
za5%vtsI310H&OWWRK?_F2xsJf4QJh6`AkBo+x_5r)n4TCSn5UDDgb3T_Ub9c%j1!X
z;$zh%5`3Y7&wAOm)FlHTfTy)PiNNXY<U!T{01D8K;X5#{Ij-g>f}ncU$S$08U`;-B
ziS}n)lY@?NR^pR5!Oe3v)5zE)*43<;12ie!icqpNiQK%_KA|4pm@R9m$f}oU#Z7Ix
zA}-A49!wKan}R#lo5TT+t!KB=xn`_(#oZSox;Yhk*7Q8GN6us0pTuUc)rhCVKbLy{
z0PV&P<VgOtnrw1N>tdaF$*W5!Ju91r@DeiBqYdJL*0hYwr2}7Zqpebo<99i!#GF>D
zMou;)ViZ+o1-_Lp-ZbsG6<4^nDKhQmp=PVHpv6^a@sfH}G-(!B$flE;M<aFx3XI)r
zJDQ3g9V$hX)P7zwnt)u!(#aa+w0Ozw%`HPh_u39|nw1}QXDjoXu^gQBp-3D~A%!#!
zY0P=07^z3jp}<suMk%XLrcoOTW-3+`&^H=s7h$ToH9DpR2)H#NIHb^+gNjvb8g6N&
zu|g^(kigRCny<f#a=v(`#@S>rsHFoGA*T_MOh$Z>^H3GRtMac}sLRa+whO?flTQ^J
z^`J?MjwvxfJYtjzcP`=;ry#+lq{tLpo0F;HotlAOqz02HaeQ%4f^Cj7(x856V;MbZ
zp`EUiDzXd;-BZU}<g{@&9hSOivKNzGN?IF0S&tQ&aCkMPCrZt^Qgd0z>4A?q<nzT+
z^J5Q;oc${F>N0rEQe^~?7n;Gdp&)69UKp|VtofvA81CSD=cQSe7TSJoze=RAgC-hK
z2Yi|?Qb=n6x%solUA0s^JF;aZhgN@-Rm+=D>Ohf*_7$3_ut|l4wmnTDrfEcrAlriD
zsl{kP8X)<$?mxmYRkY_bN1dBfIL9B(uDn}vcLm4rf-1RmGji6&_-05~a4WZ=1$q4V
zt8v}L;9y{S@%}Z6hTteMgd@IlpVFd{h}>DQ-|UXG?$E5B=Tsz*IpqQVB1RAEN`@&p
zGVlKC<AGdgx|J9on;%}adI_W`Tgiz|da&#M6;Z6VWXhJ8M?rTCbHTIBa0tN#xUL&n
z@Z2}?Nerl~F#D1Y59eKNwxMn$2;w!Aj;+W)n54V4iuJJ3#;yJ3;<o1Mh>g~a@!i)t
zEzyGdAbG;e&@lc$`&9Q+!E(Dm7$Z5Zt6uP(%&JUeOoWry<~i-3O5=xzFW|d)f;2Jh
z$Z-5;>5Nd}8EY3}(`ol0hT7aozdWz47n5zvoMC#L^Hzt2^&PGLoUAdD6*>Ist)GQ2
zuWq)ryrM4VLz2hX4k;*Ed(3ZFqA&*Q`Mk}i%Lli5x{D-taJIs}TaGb`^qn)nQzCD0
z?m*~!W80vl)O<Z00hPe{dSjZ?om$BWC4E`Frb3rC@HoZ`1O9#MMXUo#5f=yn+CQ28
zL-VRW7))BK1pqI;4r`-pb}1|LE&`5v`hFepQx|k&HD+W-0clF|WBbJ8(||u5R>VjZ
z!Ny4(aon1U-U5uf7a(ISPY3*u>sm2F0rRoA94Z6ew1jSFCt_d;L+wnJOAnXUtg7}H
z&$ThvBn8K){OhNoiO%Y_%_JwwVhQAW<X1jyf|Ybo2R%J92nX=UuF~!yyBl+p^DnXD
zvE2%$)+8`IoOR}~oK@~=7VL8umS8fs91P(1=LhsP#dy+hDdL12jpdskPBZz}rd~&w
zU5bu6;AXt5#c@66s8T*zNmUK`;8t<m+9>51m~I4%_fH15EN4uDa6uf@(Rg<Ct(ERA
zK#Y#R=cQJcN|wkMSHxgshXnEU6;o<^GUFDI+lxhz{_7a`r`cK%!*TD<Dzq0a#{dQ&
z?6Cg0t@}MUR=bHjojmQ&-*d<5SvXH()R7Fa`4POVM%3q%(zZM;qCLgu_NJnRVL(J`
z!>Q|xdY?|i>smU0gOUQb+O(oc17#sh4Dfkg!=C(~rFKx;wb_LQx<zXk{nScE2d;ho
zwA54;-sIl?gqm%y&esz-4&387z^!EkRC#TmsOwRMjBs8+20l=Fei)|QU114{6|>Ng
z*&f{u9;R<sp=TjfW);cwQ%rY^lA9N9IRt-P)*PD5YQ<zov)Jt*{{Tv7_KQk$9BQN8
zjb`SHP-V-%vkY%=8~wgNBV5+1>~rNS@y2){{x!1Kg~<dls<KHT$k`E7{obSN>r?R-
zpDD?-vJRQgq0KS;$-pvYaay-=6Cuj&Ur;(zVzqL9f1FhE*uG;jC`Rxy3H2haqDvpk
z#tmEk%SHL)-IM$|rx+PHagWC|IE_s*7()}uKSNq}-zyFQsNl6nAeSbzp|p$vf@?c5
zwp+GfK;sp$ZO~R^5QQKBO>00f2NiKfb7q81#X3M8X_1kNbMh;pY+=;H)Gfk-O6IU8
zkSQX%d&MUsHG_9(GDxhdF2`Lb+|Fx^bQKUK<qlf}D*!!v{{TvRz}fVw^2SwiSoIw%
z4Vk4$D8b9IVJ>#@n!63=I@V(ZjGPRSO%k?q(xIyw=+V-_eoso$g6DQJYmv9OImK$*
zTpW&QQfP@l>LZXfOdB<m1;#+;t_g<GgIy4v&S^w^%mq@AvsKJztw!Mi=M_&-PQ_$|
zV<wTCZvvey!m8j^Q2Whh?2Q%J>15j*HhK!7Zy(Cd+@4KOdck5?!R3yBDs&(vs*Gfk
zO=R8Y%45%Z$#x?7fvF>Nnq!lXN@_PWo`+rTb0l*{DpIt~rl~XLlNCDDpi>b@H9k11
zD20Y;SLLY!(iG=3(P@Iv)V&83#%u~15@IuAp)Ho7nuHlSsku>l5)PEB!KVeM1*y0;
zJsO{%T8kJo@N-s}l!nPrGS!9|6%iB(vb%9n9%^?L7Ac7_gRMPi3(Y(&OJLa2YBtEJ
zSW|Y;n3E$`Ak^Ebfhp>=+cdaltXr!Y1{RnUdRB#u5^n`insC*Wjm<o=Mh4s;O5C?s
zU<*|%3t0++%Zk}Zwy!nVqI5;*K_Y-FKI6S<OVX&wh0Xvbvyr?EYlPs|WUN)Nqb=A0
zT`O9qGa(rN06prQ^TltKf;G>tPyW4U$<@0XDRwZ$t;>SUqA}g1?N;E^?GTN}&5-v4
zfkR0&@AHv>dJdIhIT=RO{Do4CNtBsZ@l^p-U5Yxf>sgNg19t7J>PIK&D`HsZk+KF&
zQ<}zh<(RkV1xtt~Lu%0(Lgxq3m=^y4>sG{3B#ZK*f!vk<02<D;v1A0HP)X$N=ku*4
zgJNT0^(+rR{c34D1e(5sp58JK%vawaR&CX@FWr;PPeK95=gnD__8fQgZfYX$FIG$r
z+2iswuC^VznDgQ?N0eDlU4y6iQ>~^%I1);@=MTsp;-$FL_c3HKsq``xAC*^FriH#;
zqz-eOkL6P0*=k7Hj!`NuBu&5G3<~}$#a)MhIp5zX*&jj<4l54a4x@56bA>;xZCa=?
zgD;fn*LjZwenyD4f>vgNmh#8SnLWdDY6)!PowFny0gc$l<=(79k-ss_G2aq#{EbeL
zV*!G>J<E^eD_h@lEmp-8(rpL@qsURv@CVkdT3EpwtE6fMFuZlCj1f2=B9Wd0o+`qq
zg>uIS`^+#ZqLDIeCJ(m=pbu8y53O=q+)We`?Q{;vLh*nz>0M0Hhb*eH9CAP$oc{nX
z=Um3VjDRx*M<?YC^JD)2uSDf-Q0XhX9LI)rTZnFan}oxR<Yy-!<AL70=w|bQX;(Pv
zy;O0}<^DD24HuQFd4Yz~!*em<XE@2PL(yDa+AM>iJ4wa>$7KWFrkib=){08y#|<e`
zRN=ZH<B#e80O}p<V%`v{tj?r?fUD3`Ev-~dxULU>{=I6Vxg4lb=nZz$gT2lf$!t|H
zjiii#Mm|;P{uN$GnTX6l0pE5AAJ?^Oh{kh_nubXBp#T7Vx>n5@+{cm}kD0?292{~*
zVBgwHzk0D?pEd_OhI#yPkF9mmh6(d-+wI3nrz~;1zXT41<A7^9#`+YcV};Z&QV3li
z10D0uJw<R@d+GY7vQhKr$1j+J2MwHeuA|0DEO#>72T0>Qu1ExDx9V$~()0<&qXpf(
z?Gh`aH_P)f;1a*gR}+^jcRS$=#^<P8>5w&=$08%654^RFci~89-jBCyAD52R(AxQt
zMZB<R*l^%rA96b%TCj=omCCUrIQ04-N_sV^joNx1dOjaoLn6r=@z)(k<y%(TNsN5c
zZQIZV?Oh|=xKpqJ><<+}Xj?x!V2+BTH7&K&(MseIX|N0vnKArbe@@k8m5l9obWVVP
zf2C8{p-W}=XNLf&AC_u2ymVYFtfX`Ij|bP9j&z60*=~E|Z^B;64nF9{e!n+0LTy&<
zl<tvFAI22%?nO|V_1I(?r6lB&$fzL1&Ige3pYLp758^5vM$1r0oxJOpAL(!-9&sCU
z`kJ#In%omM?_JGJiZqWPh}WKTCTec`g91K-0+oW~av*`fz4XDV&e_f)LG8#jJ;afy
z&fa}RQI~rRuHRFUO<fbC7jOstTYG|YR$#-oF_1fPP{k2oH*O!TU5z#<JMqtTtr9bA
zl-QY<jIYwE%ce+;#F69uyo$Qlep2wvarsqv^$8@9f-(u{+=^|*Ek_P+gQC-?z$0n@
z0Ca;_qPCGf@IRGFZ45GC`C=pLr;5;yCya3Am)A9)B6}J+GB_dw9AMSx(S;&29{&KP
zSw?R7{Hl^hQeeG0Q;8VsPvnY)_jW&qD^x)Y=owB0VqM0{PXG^dT9Y_A1&I7>F6u?;
z)}nSdr*kG?xjAw46;Xng3NlGJ8LP9EBZ19Om0NIO)EtWEZ0K?3i%dz_#6s;Ez~t3e
zl#|U_yKguNSAn^Q_5QU<hB}(fu)|B5nq<*Qx20W%=z=RKG0z6Az@?Z}GohoPww$&(
zt&4XU9<_;M9x<BeEmd$ZD@tcQOA~Hva4Hvsu*v4H1ny{L7~;B8Ir*$($!t$2ii>Ge
zfm*U6fyt^9l~rc!22DdqwJzpu%Wj>IWy1deQ&$moz^Xdqn#@egARpdr{(RN*BZFM@
z+H!I^@ZR*LF|tN_lTo69-iLBeCYWRk(wwY!#KlpQOShVGigi)*19tOLfyFaC(}PnT
z#oJ9`QZHIY9MN*FLPlyC22EPqQMzK2V%)2?o{D`cK$$f#kBXau8jI^s#wp=2DH@%I
z*up7>pPEk8$he^7Q(0*`Q-SPgY%V~{QI)F>DiS%Ra7=2gDHPx_N<~e<vMf`!(t*u8
z6z&LUT5^C-N^lvZAc82k`HOb;rbh%<Tcz7ZiZTXkl(Uumzy)=>Xpn$NuG~6C(04YI
z0ALDJlSrHnY5DZ7rxT$vyT`^&Rh9OPD65ilRV4vOtzkV4Vr5*Ha(5cfkvZozr#?wF
zLS8<#&i6W_u@Y@&2(je*R-`s*vo3b~R#3Z&*RL(tinSi0B*<GHT6&(QwTo6Wnr4ZL
z75k$<K~TvK@$%-VwxGQ?6N;@iv=Czfi0ztANJ<R4@Hju~a0gy<QF+Rz3QUK)Fc=k;
zHmqk)-V&;MDxRjKhG$h*3aC9v$sa-6>q3&~Hx{=o&lF?tGJ=2E#b&cd9Dr<<kF0G|
zNpa>j)hbEu&5~-Ywz9ug0DUTF<T&}4qkDEhyJtXpi6c||#YZW%Kv!%|2=v-V<Z6<~
zD&<T3qp%sr>zbKW-;hBAkC<?Otxq#5%ehuTvQ75QBre~=*dYG^Lsx9?rjA0=M$td_
z&|y#4wKd^T2;9gUu=&U2aZ*}AavKP+%Nq6Cb~qnHRFiB?*&A?3v#w-;+!2z&Px9up
z;fZ!`jvSyF#=>fafg8fKyAM#TtAqJ-{HtA20bQ~#@Oc6?{zQXaF{Zkl@|(E$6gNwD
z8S1H({(}^$19`+nZWQO1FO&UG@~bgMq=3;kKAFemnz<U|jqDe$G5-M9t$mfu{i8)A
z45=FgK)#0^{{SEUy>mL9gtsy=Wf%i2$G@<y<I4;VLw!iaLe?nc78wBK1KiY9px%hO
zb*%1rn`lxA0(l|YM&>?;r>X2X$I#btq-pWmTgD<kBCZD{n$SxnfHSrO9+>Js&-JG%
z-^X0{9+gz6sOrM4XFU--ah;%U<Iq(5Mmy7D91QS%>BQqAx{^7=oM-f=mCjGSJ3RZ*
zkVbpZ12$ym12szKA^{;XGti#3d>kAaWXrt>9B03!4GQ|6apL<p<iE5GjmRv{6FJ&C
zdUMwta6Re^3rL_bvSbBZNw{YVjA!uvRo%QX%yvqFn>jcnbgEHksSw=~obK*>dRHA<
zO}m|x>ZKbpyf8}`3=hr@9Y=HOJ!)Wqn`kI|l_Lxfw|dhSVPe4<0AL(;^dhQHY~U3H
zNHf4;mS3l>OQ&Q?nrx!#th}qE^dLBI(}Psaq-I>ROSE(*7ay%^q;CHJD?2_qf(Qff
zAk;S!TEvGUHa*E80r?7_W|}YBTSE%-WMHUeBeL`Qj8+}Z&&*4bn|KPqbNbfR_3e{x
z21Ll`<Qwt$=DGHk2@82~?iu+=kFVxUVNwYj!dD^_yD=^z+wGD7BSGB0e+sn?(1i#O
zFl=<{KOSfd1YvW4e)b~+^{HCbCs4-YVCj`l{=G$?LAJV(O|3{ekO1j~$)x*hTmV{L
zdw%Q+c(I8IBIy|WuQde52p21~x3cD)rPP+!LgJuh94mU{QZg<|v<K>=kLOv3S#$s@
zC)aIHYpq8q#DD|rF;{d$i`>zOsN)1xo#>+y9zAnVE5|fs#OVBTGlN-^T{XLvP)0ou
z_|w=*H!s|^yn_hjei_X^($NkNB-KlMnP*{<f$jxt+Q%9GK7O^Fnv*q4l@_3YE-L(J
zM@o(efWhGV=C2?CbBd&l<i;sdaLa@0Ds>q5r!EM^9#%O9tcC%xO>zjQ5&iBDtwAVp
z&}3Di6mHNrLs*wsLPkw%-8lq;F<94akVqWXRSueE2^>{dk%cA9?1b=YnZHcn^{!^l
zsG@bx*;xnhoc{nytm%`<s}}P4ti)p*NamOqc*zwDvCoC~d7;^nB8}Wug|OzTh`=>w
z62OcKmX6DtwqvI?w{E};3g_(|xZu{bEax0m!Pw`hu4@TY4z(N3PYX<n7CmZV2wt_&
zn`b_Vr6F>1O=oHn6^KXO&OZv;byM3l&*~C+my7`7F|>V6Wm)pUvnuoDru@cQM$a)S
zKIp4bLm=jxVB#nXW0pUlsaof8<W$j$P%jM^8fd2Rw^N){TY_~JZd;Lz5mjWB9Agwo
zBd#pWLr$iFiW#V9%>gv3Y9e{3FG>><Wa!+~olQWdrc=cY*ro(h70o{x6y;tkE@2`i
zOjLn!PB;}CJr@SWW5qy8r7cmGtrO5qifGL=nkk5Ob3hb;Q^2WVVXnCq5ngJ+=~1w$
zxDn!tQ%VH_8BPL_ie8mKE-^sh@k*-Ba6dY+8l-q6^Xh+~tBhM6i+XfTxEU9(LEP6{
zW}qM#$F*}-R}H)cUcLL8=<PQZ73k5jS{)S8rmMzDr)RZ7q=D9?L5@vm-PzKWkl8pD
zN^FzTt;*F(QciJL)HI1sPzFz0&7Zl(D^1fqd8!vdRCD+W=aM&rG21?FasL3<soFvl
zF5PL8FiuZX*i}hx8Oa;S1EQ#?S{l7XmUIWH`eLg{;fKq)v)cetwC|2sf%F{HWVsmH
zpnXTnOO=nB#aEU{<PYVHk8GdA{A#TC^Bz9NC2q^+F#blYO+MmDU_Px?9##VjFF)^M
zsb~$S(4GxK^p(x6sxdqT-+}e#n!LKCgaGizjB|x6lkM|o6^<qHqcTY*UHBPefGJ?P
zNgw77Cq0Q3uIS0Gr$G*|TjlZzP&$&wAC785TI7QAvS9ESk^1vl@d#wb5p(WIKhCBR
zN~aL{N>5^o{c3E*bZCn!jla2V*d0{l{)hSsk|{3Xak@2Q{@hqUf#Wq+*3$7xma~RI
z{pDs-e=qQ@Yc#_y=q_MAep!nt{KaVq>vTk<tZCY}nHsdB9!K10@t@Ba{3~wOD9mAe
zwm9Gnar)I8aUQ@-vc|`|XCI|#!^(m7g(v-x!~83@3%ScCT8bc}0XX!+{{TT#BF3C5
zs{I8(n~w?!^(~%%TC%D}0So-ARA+JFPf^FzRDfr%CXqlLs!ZTx`eK-dI2(F#RLUyw
zIu0u7IrOTMow*D15zZ(YB#Dp84l(WhY2^H)j`-%Hjkp-e_Z-x!+q>{-j8MgUWKo`&
z<Qk1+`95QVj!6{$Ks_;v02`79diqmj&dzwxr640ahBKd9S0}A8GOUpnWdX7Q&rD{l
zMhXqQVDzZaWx@2TN(cDnronN$K^Y^GIjXZqw>jK$I)E}i1J<puP<;<-ts!ijFvsVL
znJosHP?-q%2I2nFWBH1uaD<~0k;y0UsrgiXA!|xhSrcg}Pq%8$xsG6jBE+gYm1g|;
zsBtMs=CueQmOw62Hh#Mz5-|RPs5}lMD->qlA^qWiKRzpF?&HZ|B)E}D>aj!e*E^}r
z1c!%tTy;=b5%mCpT$K_!VA~@r&leC%dSs|y%ZyY^FJf$?06P8PL8=kYEMw%2K_{e{
zKdIub#XRHYHvRc3I(}8pYn$G~n(E~9Ap_sH@~oXg?b%P3L;8T3tMc6!f89t>{{VHc
zr}L`LWRPLqhtTG!CPy+bHkNJ1IRVd5D%=8C$Y5Bn(zOMwilBj!*NmD$f%26fT9*?=
zG4R|Cxfuj?+t#f4o^SyC$KI<df__EM(W^$%4dfODPoWhW>{Zb<q**vCk9yg+w3h&?
z<WkyM0s#PiCbeR>WnMV?RYo1m<uW^XnQ^=kR@DwOx}2q#hU8R=z>WoIk&|~L8<g>a
zJ*q{U1D+{a1~}rQJC}|Ksu4Ee<F!_qunAVd6c*ZeHCE-e<=FhBe8c+FPTPafrFSYy
z3OU-{jbPl$@Ud6qdivG7=O7#$spxAKTa4ru8SV{bHks2)RdhO8quZPVRL_uKB?Tia
za&WD+y$RscO}j7Nu6E9-p}~NaJeCX2NXB!Dvm|Pgx|56^K9y21sKJ;Lpz~K{i_c}1
zKnEL09@Q7soUv*cmY>=}AWZblU4;V>cGPfN8qkUrRpii>&Zi>G1r&4@r38TE7_7uZ
zQPi5d0Ry0`S{X}ISsdb=Mpvy=S#Ubj{?MRuYh=tC9L=y2M*w8kEj*ju-}}s3x34cF
z`#ghi+*I_gcwwbLdJ;4JYnu}8)KSNYyOM#k9k`C#A^br`ezhAajw!NSGv^2Js_nd<
zwWaK=k4iDJu=x}aamlJ|LLZn?$R!6M_^70EMh{w+Xo|1FsE=B(SFJ$es>jT<Ct7e6
zgQYoYY)V9mIH?694l-$b)JUrmuQc<(rXiu;vq%OzR4m+5$n^=yrneP^CZjFQDa}TD
z*2xvhKXpA#L7IA+VPeA5#Y8FK@k+vDjYdT&ZYiYHIFcb0P)z{PkXQ(%3*7K2xyL4&
z%v-6ZwA@6EARI6M09uhrEs|<6Ql@i^R;k>yjcq-~;l*_pvJIlSdlbnP(^;kzU~^rx
zo3YbP7u4i&QUifaRUmUpYik>j$(H(6ck}|SFgWI`NZWq3gpp~L<-#vLYcAk!B%0D-
zTO^(;q_O8Gpsbp?q+N_jlLVgiojbbloO{;n&~@UdOKYA;{3||IYNU>SM3Av<z*T8u
zY<>17wx)n5Ey?t%uLB>Lt~~`DrE?hp92_s>nt6fSt~2UuL2a3jPpw#30*r0RrlL!z
zVZD)d$wKGy6+Bvb7%zao-R)ZeG1Wj6;U?3F)1OmRp<-P^D@dh6Jh*`azRo{0TTuDf
z`JGefNA#@afBER!kEL6NQ=G1G>~UJhLn%9#gGldzb0&QOAIh~XCcO*><SYK#;QnH=
z<C<K8B@5dNT2|6$hGI{t>-h@W4^t^@Y$UQV{{T{l)PtIxWgz_9RDYjSRAP-A7!99N
z4O&Q}L6%oz?l}Exp&2sbKH-HU=nj9CSd2F8<d4G@RU`ug?t}0C6=fI^@{k9wTG0|&
z4bM5vJ9ao3s=_R=VpyKTH8$`UAJ&+KcK~%Col?4+V=v0PTY^d8eQH^N*|-)S+mAJq
z75@OAJ6p?zXwM)LPBX;~l@w@2GRCMk<%rJ}WIrbZ4Owq+nJx(GdG$Z%G@|}8+XG<Z
z1N{0@F5Qh?!;Ih@@I@ivfL<|yS&e*~NMZau`ucw?(`VG<NmVzE#EdtmPQL!s$+I-_
z6q0GKz^Uf|=cQvdw+EO|x~T&M5srHC?N<`+In(59D|7UrG!89pTY#uydC#?41fKb<
zNbixHd2%Zp^vTcV+O(mNIL1`{C?S^H1P+x1fCJFxl37e=j&s(aYmmbneL>AJ5zOi`
zHn1bt{{XL4C6JN#BxOFb59v}|OOSBOk^R^I02<4Zbn4m2?~s3$Ln#FC+DyMPu7BPD
zkHWI9ba+@2kh>@SGmtp{0KQKn@T>Dn9l~LVf6$NXSyMc4xdmk&gti59O(R&gWZhcY
z+qohs&O44jPPIl!EyCn1tLRLdf2Cc#xO0#bgWNH!8SN#KLdCKP?_BLAvAk~gCz9z<
z5)s&Y$_+vNr4hjLi1g(Clt*l;dh_qiOt#Um;YNKiQ0`UYh9;y{zE!~W=B=#Wcm<F*
z<Z(=n!!7cuU#B9pEv#HN9T(V&T9tG~6}p$miyq@5wyiDLsLvmcYHLeL7a3fN)`lNc
zIir^Phd5fnB4CFER+M?~!KkB9oRP&?R1M12Bxc>k9S<3%GJkfJzF)k!q}(ao1oH^%
z`P6S99MjvL)f1^8jC~CV2&8bsKdn`gP0Ce|IN;NsMumuF;P({-_;uWeBhrG>GUbR8
z@g7vyKYe7;Ny~0u;bGpjFXm{8bR-aZ8iL;T3-ZYL^FN7CM)$5di;?ND({x`l);&z9
z?YHrtrC5m`E(yr=G+kS@teF`xlfm?<VUx^WzcBqpW}Wss<25MrErpUKXdHpG?anI1
zFtjS81gr85N6GwYu*mByJDrNJVtDj4uc|mPe&t9^x&)ntJS^SCXhac*O1T}pYymY%
zT<mhOmlH@2O3;UXI%2LY5e|0L!X+SJ0al><nYopcGTa(_TLu^{im(#~hF*GBi5c9$
z)Zthq3XFMNx8iCGNDR|7hwiTf>rT|9-FV|Y4nHcpVHp~HXBaQ@etkbm=1ZQd?2amb
zysqT>5VDw$12r2G-npyJgbqzWRH(=wjcnuC^puK{BV&rEDP?X}uE@lm4M4*=$<0eN
zMP1pbo2@iTDOH0S-Nwl9oCgJ`W4$3@yii*;nVNwn{0vdLq1}_kNUBJr;u47}H4c3#
zm^q|wl@gAG@{BywPZVdGTvkz9f<sL+6(Lhewz`rnq6Qq(ldVWsj+BbL(9sPs<24}8
zDHK!4#R(GP$c-f{Q)*m7!-X{QOS+Md6)6tGN$p5EpwK$d(40r8=B-7M{5h)1YRVH;
z7h;k%Y?uZa#dKEFDuIr*%vpk3x@#B)139kxE!f%yf^L%swMnxbX;G9e0H>VeHPOmz
za<rW)la0;KHF{2Jv0PR&j-^GF**$+MhD6V~t1P6NX5UZ3v*jz8q|j%ns`9`O3N!Co
z{{Utu@8JjgU4JgM42ueI734o%qxzb!W*p`Owu%ns<kM42l5dzpD}&G~=QX|qATA?@
zGwK&O{7!15(it1)c_#LDA%7}8np=lEC=DV)LbPiiqP)rv<<_M-O{_!r_mM^F0W5zl
zwIMHppzU7a2GROeS3#PqMGEbaPX?Bth~&JLy`xlL%$)v}T(+Et0?Oh+{?*7I$!fzW
z83XgEjAK96r{pv$+-d3ebco6S0H%{O)tI$d@HXkzqaKi{{$i;ZfuUo?3QYx(yKk#c
z{1HPR`>25b0O&QPZ+(3!0VF5iAOZOST;-}WN&p3z^ilp5(Az^KL<bmu?q}xwt5`|1
zIV5XDmyAaHoO<9@kqIAqW@@J#lE%0yKNCaWkTAmo>x$@28J1*)<mn>?_UTL!It+QA
zDD>izE46;iGt;$Br5}@2gfcnUj0IvpomdE13;?Io9Moj42@XPztPLt$CU${<c*QUo
zFfpNKf2(fBD~i-^{JiRsqh>RX8ztlS9kJJ*wcJjz$^e&<*b2whETEd%)DOHdo%j`<
zOQm#aO{?m8$NVMo>Junc*r<8!o;^Evt1GQG(%bft&PQtD{AHm{q}(*t*Do625w*6Q
z)g24OlFGu;{{Rf<g_sO(_HXg7RZ>q|o~1a!t622-JW40E%1ki8G;V_+k6+4&H95J<
z<&JXM;l84{S?~6IirZJ(tXgV}gzj&UbCPPKTU#>T9VK*jGhIRk=4x)A;xTJ<_U|_E
z!zelv+XLxdZGGZ<tw!_gx@wh(b)2qA`VPXo9|~wQ&2pDlo2G1KOLbqRTdA&xHCV+*
zQ_%DaMsnAaW6pEvK?Alcr6Zx;$o*=al8CLsg4_&xkxrG0?Jh<S@hx;iW6hSVv$$>f
zmybi*p`U7@DT2eMYBKIu1ExDs8h31C)OM;7og^~c0vbm87_<FFXWUy`T72?}3H1sT
ze=}Vx7}ZD5`f*TB=*hK$x*t(c<8h{Qe`>WneTqqa_o=G4T9u>#@6q`6bN>L=tD2pZ
z02YolUilnXH8suLE&Gd^P!C5x%cXP4wYj26l1btV5J*i5#Gm$?Kgd)++mOq_@b;K9
z-w*yYtEpZ$6Mdd>{{T&`(Z5_~t;H-ZM*S|t`>L{kCaAspeZ^YECAfk{{{WCKoVoN8
z5B-|0!XaWeX;y>KhLisQimeD@v6BI;UO#x|Kh#yo?JUpTl4L%TAS3ezj#uO^Vzdbc
zL47t1dWj?-)~}>?rx>tB_YPX6iq9YF<cY_iY<`BWMQG!nEr+%<R-@!AkVdigf;IYv
z{{R}aoiZYe@EQKKGQtmR8dirKfz2lo+-kl!F`)kdfYLK<ISPY6?9+n`;GTUADcR7R
zW}&81WL|pobYH_XjPVYre*;M>75aMAVY;g*Bh*$+wYj3O@7*x@b5TCn`P|t50CzMp
z;N*0m9E={7De5w`{!5N!QJ+w2J>mczvqnMx0BaRu_Acsq^sHIr*iLirO}lh7itItC
zS<NbvfgkAO{A-<>%JwNlk=wX&(62SMGi39~HH&to8*-eA!fi9AntK}dlN-xzlf=sS
z!z&DCqH8}Oh6L>=gXvi|((RR97=|9Ex)?6EW1Xx==Ugw`dTesZPYEt%@V~&W5wX2`
zcOI1<HqFT6KhCr~=~3{=j;FamKb2R#!I2qMC?gzqtCgDOOnA1cQRYU)q85!<9-_3N
ziH|3xW@&OO+W=fE1Kzdc+&JKmP;**Ebv1<fsis&&6(XSZrXgPDmx0l%WJ@O61xF!O
zQq8~vG}8F*RJA$8c5lxu@`|o&E1Ajirp|75GfdvD+kX;kplkbB)nh@zjicx*8%~T*
zew&9142Ss-=UP(PqjQien-6<AcQV39-Ew_MC)5`|D9u7|%!`7D@~IlokGMIitrY0T
zI3l%oJJp40QNis{825C3De^L&9S&-^8yJ8{tl6S5M2)`{W;t<CGLYO=n4!oS>s^x6
z@}z~{#M8?Vy(-jppssOGdu%Tvp+v^TD1K-5tE-H0P{#%&>BU<S;**ICi10H|u{B}G
z6&Ax%;h<Q6o@znbn<Qj&PfkFrlywBP9k)FNCc{<{IH&o1bgs1>4IEX9yJ@>{YRODf
zHiBvL+?ygb>rdO7aF3dlM^vem$Wvsfj|Q*2dF@r+6O7W8Fm%ef=AXqhsRtCJrDEV_
z;5%;>6Py}(YK4wkmnbL_BUQ)=HAx3GZXrU3r6?O#Wj6tlT@|q#KnA(JJ`K(}=DMpp
zZ<J=cscc$SE<&RowJQvY6+(;(c@@%@f@N1)gr-ga$*XPv``?hLBx8(p9)h97E>~nw
za?PLiSNv(4+oAx2{hq&{T6qNfRBpmC#YL<;jFEy3tJKmYVLoT&y*(+3o%@g8Jw2*M
z8TmyoF_1_Mt)9Z6Xn^fnA~30eP`r8hK;+Qk7c*WWLB&GQ5D7J|&|rI14FgC%Rmk<E
z;ukTV9`zrR-FsH~(@_9xnSyun&~9V?6rO6^{{Y9dOBw$A0MGjHKLJaVxcQ98beqYO
zf0QDN{(4Gwuj|&O`#zg&e|bAyIQ|&&6n|6sid%yu=X}o6A6IPC;nD6cFn0NA*K>~F
zPQTKfmql>(BmU7~JgZm)N_!_6`s0)MlUmwjZX;6I8~Q4659QXW<4x1f6^+FGK2RLz
z+xSQRJZZ0}T=~0Lt*-ty0%V>20mnaw$||FExSqQmD2aMG!Sn-~VYXKLi};h$vQtnH
zFo)$50rLiJ%s#l_A8u;eYA)Q6Saccx01DReBFvskR7K>HPi5ovsFfHlN~lhsH%e8%
ziH|2O-_oA*#@{f;@9#P3{(Y*Ix(H+3zTl^5QR!1hDgrPtPXir)3aYHE3n?r+zCS@o
zK|!2j>rUWm<y3Ra9>TIUJC%hAoaC=0DL5X3{Pm?5jHl(xIUKR<D+^JcS!dhHT;-3o
zWnNzHh0|K<b3Q7!ZBoU@mORpOr*BVT@7q6LO7myZ8pia)Alg9cJAEtGO~VPxDyeLF
zVe!BxuHVCoz}NJ#smAwE<sK1?H}MX5`qv~|XKW^-&LZ+*=HstwNhDXbc=uy>2kB8y
z8!f<64svU6MDYBV_X_d`STo7*_)!(9**b|tx#1rVu6))|a@(*+KZSb6lWMRgLXnOu
z2TzG$i)e@jPfoSF9nf_cZNv-^c*ytv02=CqQq=Nl)N*Fh$jU}a^y4G1(9_I{#H4_A
z$ie2an)mlx8Fw#noDuyqRNCQ*>db)hz;ZsKwz@_ZxzK&8D6@`n?Nx4XgJ3F;#2zaw
zz>T&*O9RG39DntVU4=efx!e!%p1=Kn!j53E5BfvNl-R)b546<ANzrj4$cO!^pJE`-
z8+(J>{3+gWh-Nn1>IvZg07_P4F?9$ekpnF63wCgRT>JV~6tcWhn6(R4k-xk~$Na|!
z@-^4Zd1ojwOo<zwIXEAVI(=|!D&pO3*h3-P5&N;+#!7#{Bu<r_<JAo_Dmxo1fIN$n
z6Mu5yarrRg`BeV^*>@Jo8RBi7nR#E#aaPk!gpJ7*!F^$MnSY`D>cz4xx&6(|$bB|r
z^v!3=x|MRYF>zO7OmIb8MvhVQh@ATEDAgc>KFsKQ2N@rjt5FrmQ9<B}wHVq)(~qSB
z+>xAT9oL$2f({SoPSz1^U9o~s<5GjWt`Ff=V>mS^$mAT;xC-&B4D27BR*_TgkmKBQ
zR>S9~2hyW(G03TuQyOJFi1GWuk46TV@fZ1{+JD_OG>VwUYLuu`l{l;<t-3W#A9{V|
zBBC;(1UK`hJkzKlWjO3}R3^WjPtP1|J=gk*&A6>W$*s#Xk>!w1M`Kubm;~pLYU5w7
zK6_VCd!zpVjZ~E_E(Qg|p*^HdKQUQ4)ZUjW6`;2<CN8XYoQlt$LO}#qK`xgmQqf5i
zfO<YZol}=joq-Q*{{T=kS+w4#bSg#c&Us}5UU;p0X}73c^Vs@T8>=%5eDS_LN}7GI
z%0}Afnz7LNXB#te+VCU!Q}?S^*5KJQ$Ije1`gf{pYPWEJcyaez(xbN2&85K8uZ9)Q
z(8soW{VE|FY0&cJ2SK<$f&K=Diy)DnGJ9lJ^h(Rn8s#*pzRjqrIq1a?@~+4*1LYN^
zHF4o*`@UYo(42EpF4NwkErCq?Kta;CH?_+_MPcjmfYHP|DPxa*D?{yy+z@LIQn-#;
zoaZ=feE<~|TI6;)u`Q?;G@D}3%`dA2U%+$vnzYb5pS{g3jv%p#7~!$F{ZBOcBva7S
zrxc)(wI$00i1R9rlq`hjate-Hhe7g;imunW2O|cl=xnaZriaZ0g#xHOqh#dqPHWd6
zbmpLv@vw4EO+&G2=6XU4XBB!Ya5=?A8lOtP82M|i$CW8+J_P{a)U!l3^r%Q+K^0+}
zF@k#2anO~iSs@rS;TgwDa<B)Ql}N#(mf+%0nlcSFBP8=zWC4vc8(OKr5-ZCG9V)+<
zwzXYDEmDziYnjH(IY+_B#Z9v#r9%<mRh4Yk$>=8{tFI=Ssot!zb4U;^X$XoV5i?Q*
zspCM^DT51|)YUSPk0zs$Fe>p(4n<a<g*1kp%&9=!YO_cIsb@TzsCnd8PCAOolI{ka
ziMZ8e3|g<j82}nwg>s?k4;<BqA<JT)8s@KC#sM62O;Zb4)YC22Fc^%JTai91D%<5y
zJNj0X1ChIv=sSIDc=kltynvp6T5~qh$LCDlwQ_2-vX(d%t?WgvIouOb^6gPb;YKP*
zPs^HHj>1Xuj8#Vknyo)I3F4wah}Ch=dVGAgYBF=c>U}DBlq&rEdk#$`40ARq(Zzwe
z&I#?_qw@(kQG$3F>T1r_iGO*^e{g?Vv>~N850yYs)!(3~-S+VjCBNsRb^HfvSCF{N
zV`%6HN^-zS8?)2yj=%kH#(@-!Wt$2MG5krVeUC4-*zW$~4bAxWAB|a(SmPUaaor<5
zey#o$PI(!VV-XBpe6$>&!@tzhX>}U7tu?)&{n`hYpa^#y`?vH__|+?^<d-7qQmfa<
zi~9B-j!j1`!3ShOpy|gfe;?>69yn}(H%sPE)LVm$f4)agpy%mO<=CqU8c!^2VA*MI
zygbB__-^V?u&S+aux8&&j!!Z8X~_NK`zZbq^yl8Jzngfgad{_~ZoEdSxX-9X>G<^e
z(xBgN&Zq1)JsH=TCH4oYKadp~y}z$d^DES4g{s?@6Ed`lH#BN;5PG|I`gN^LpowLb
zzWU`<3}t<Nm-*Ivl($Hb+u&pydlAWYJsw|QVc*zP$W5{}#moJo?nlR{-{?JY`3{_(
zmmM?{)V1Z6Pc^<w{lx>5{sN=&<X{2G&m>ebU9g-+b0qoYdf|_+y%S$5DL~uL@i6Fp
zhqYDEtXlh7KonyGxE(%~AKTVe!NMTwPp7q1lI0?ZF#x*{k$t^SxAUy|WK!5>C!TRb
zoq<HyH*xJUXXVcs{wkw9s^g!gJCjspxN^Z40FHf>RXHwLFH`<C5}IWwmmkVGEJgt!
z0s7S|oJkJEJ3;A>L;nEQs`K24*ACbKVB__wH(Hv-FP5is^2hoLl-EM6%$xfNUsp%K
z`T<>30yq?7g#eOArEt%vIleOIAUIm-Z8a#^FC*^Y=lRpUnR2^I=SK^Llc+p!YDBro
z+Sm<_IjkkP4yvf#K2y)-`qYN)m<GrD-pAITv@&ov;kwx5Fv#zY{{V$bE)+K8D<Ynl
zKQQ`w)(Dx{dgre+h#bkC$DTnQ>XkxyO{cwUd5J@u^#FDHj{gA8kl)E6%x8jlBiGQ1
zq=?QybDVTFY86G!;s`xy^Ea@pTZ>V+!C6-$x#&mZO`hf2CV0qX95)?4qM&=xYE&>1
z#heTr5%fKew@RDr=_)PVoXVu}mKa9=0A+{ppV0eLO^Br(!yl4W^J4zvbzh;aTS=vY
z0<e*DY5Y*iGFSXQqN<A{F=m1`j#U2uS#El-y84PC-yDIJim>$Hikq|>nu%#?bNkyz
zFu!=;oP7^p=T2cfKnce1dKVjk`ZxaoUZRdjr6bH9TlSF$ABg_|3c9jQ5c%-v-^REh
zzP*p*&04b&3IuLID1E+R{xxbfZM<QJw<3WAQUcNZg!f#P{6|sxih9P;kXUosf#Ci$
z>;#Au_9}frsd}6^4e#EJ=aK>U?@s_Qz~}kX5gx_Q4TJAPD=$4g4{Ccyk;hR{g2~gN
zpkjG|@J0;=Onu{#>rlxc-G@wiGXDUDFtA~TaL3T_e=3j?;jvWA!#hqX(#x>yV}s~x
zI_g#AWCPop&PK6Bva*Kc;<K)g3<n_h6%@BT6?1}m8qb#Lmw@|Eq3Kvk#?gsZO-fsN
z3iTd_q>Ea!Wg>XOvv3ckWna&|fo0m&o@LxNkSmg<C^vIYtHNClv@CO>@+AdNp{QCZ
zqZv76^;2BUyv+mQT&-!tsLvnXZNjm1?eOShJRFv=WyWdFe7o7=K83dr)}Rt<$o~NA
z(slPm&+?~@O4-NH^1gbSUotl!qbc;-YF$dP-Q0y%y&Wd6^8HJe+JyfAc0~aH0JsH7
zsM*1*+Ie$u<L*d0p{l-YyK%ceQAnDD#MpG;RQqSt*Xxwzttgo>tkx2&7bo|t-}}SV
zS9Pb}toCtB+>U;txx3+Os@wC3=TY)W>Hh%Mr)avnLpRuO6d_IqKcBr+WgFcy8gqpy
zHl5S_4zqsl)Yit{l?j*2#!f-#M|x{YPaxn{_eXMRtDzclpYqRv-mp_M+{YhX?~lW&
zt$T)H1F?S9ou#9BvP+EZ8$tBXTE<T89QBtySi|OSn1ggQ#zFz8)?5%WYF{}>t&>Mp
zh#oAid8(4ymd##iS3i4-X#n!CK+%lwv}|w<LeaE?fNN=#<D43gXvf{DaV<!#8jdQ;
zfxs028(S4R9&6CC<J#=7A{%p6<~um6VSr3lb=;WmOw}YPZuMpnxK%iQam8MZ3XIhx
zO9o6FQy6BP%t@;9GhpJ7>^)x`R8rJI*f%{X>`4{RQEE(;gkB9<i<+iMQJS#?!xZCU
zELqw!Lm(#<EUQS)H)K~tU6D#9s3QPXqPa?^6u7HNAS+jW1knVG!K~|%^6^*akaJnn
zwP7b<MlH_ZO-BrDdezyZ1m==89fd-b*;la|Ktawb%AKQ;QYvIqT~0!POCEp>lUg}g
zZbgWQHGbcw?1fRmW7GctuUSaZ#Bk?(kKj|l`k&}OI<}I$gfkrAa0dVnKs!}NUCK`8
z%ri3`v4R)79;f^(UIz-G4ozWMu{?CGc!&T2S8-Uh(lb+A5w7Mmo3<`VtgS-$E=Fm&
zB(BDy-iVTGwevdFyLn5D3g+NuxtwD-&2-k0VFuAt2{|#;ii#y}l?$jGxxn=ARvQb`
zlj~I^%f1Fn6Wgh)mWD`ZW_H?xYW}D3t4k3nfq<i{9)gh}TXoF8Gyec^9-mW8camod
z)b>>!K7xg&;K%u@hY*507d=nY(vU04`w(aQ$FTaEdczzm=1$*4Q_1{lyw?pN+5koD
zi_m|Gqix0LS~Ecd1&{Z#58~*f=}|P#BDPp64!wu}0A8XBv5dc8?*9OdK{Sl`T>f1J
z4ptq92}Vgy-^6<U9+g$M$sj~lc^u#Y$tI^%XT))?=RF7YsO6Du<7lnhbFjpd4*tvN
zKb=Dv6UeqXSQtIm@k%)n`!B!0<xpE%tcB;FCM4=J%bvsWrHT^Fs-rB&fm&uHfntoO
zn&0xu5&Ud=vwM1b4@yaMdJX7QFd3O9x&^o3MFMbKb^vx?=laubCNWO93qAJ<8X(F;
z=oEA<`VV?8FCWinpl>?rPnG3g{RKS-ss2K#s4pkkZCy;P2$thNDlc#kW%X|T2OV2>
z{=cud5iCj(=5sM(KjZ4@PDv;JS;l*l(?8I272Vmja&W717id-cw#nHC`(CH|)b}&T
zVGKzMkjg)Bj5%EOm529_Vc1q|5E6XWUop2~5dQ!h{{VpR_)=}HKd<Zh>^0ncB*PQC
z{G_g5)l~Ff_nY(|T8qxYeWFk>Psp8*{{UEj^}T5>SUkQ@m`qWU^2z=7{wDr{smDLs
zZnsE2MH{yE3Of(w_||RPLXs@YEAPvaT#e38sj8-9yr1rhagfQEWAia2{c1BG?xgp?
z{eSw^E$UK4Qe3x_D*|~qC)=<1R%2f|bqB6%LRiAvk?+Z>MgYx~91cZGk)lY+y}6c2
znRj(K{LeqqsXUT>qA5!sm#Fo?z%{oup7v(iJ&jVD*A3GY`HHeKUM=E8>6XZ9d~V2h
zDLZfmK9zi1mWyuRn2hF^>{-Ca7|kwVu^qM4u|DF&@%h&3-DC#OC#_^9xl^@);<TfS
z<-qJHQX)vcvX$x3boy0+D8~HYcly;V70<A!jK^yz=W>#`>ygj$%~~^Ny502v^Nf#r
zciIKaYysp9@%-xrZWt=CC*>i59jfv(M$2&#$2^bjpHHEyQX!)3R}J@E2UEkNe-P|P
zzrAQ$yV)?77B6o65&XgT{&kx!(9z0^`>8sAu-(-AdR5ywwljRYk+^uzfAi~9o}{y&
zj3mn&#Q5zS4oV;H`;YcdBer@~pDk{9nF7s>?p|^|bNoZ~6_*$`<IISOA(M<ZL+pM3
z0R3XMZRSa1$t9CHB%hfNx&Hv!zdG4FkU=C35g8ncu?sQ`smG}54O=K0J+geu*}%d7
z06(FqS_X#%9E|nsX*k<d1j?%XugLwe{VHhWAnkBD>*#)>qeT3v&mzW*(hwC!OCG|h
zF%|-;&JI0^;+j;W4CGT|aOxdT1Y{B1denkUt_}zts<dD@HzPegX&HNpP6+uA;A$tF
zCk>EA0~tJO=zAK?lpa-ptIsOGD*d`vRrG*IyCWTltmITtPJ8*ICN|pYIxK^5{{RtH
zKGkJ?`=&BlPC6X#L;Y$SjY2GeCJuNfy=Tg*?Y=TP<E?YfZ0U>(a_P5DrJ^ZtpWcM-
z$Klqo?st61<B`~oYJKt}?^k4ye-fM;mQ7vWJYK;a(vCMc!;kn7=CaaWMyl(~=OmR{
zLXsb$s3eEy-lnwfEwr22aRuXAOnOIjK3w}a{<Q?!UG<7@mKHG|`DmXqAH-I3i$<}M
zWgxu#!||&rfSqX&gZGC@n-P)Hu{OC%L?yDP@Tp*l@-e}u?86eLBCK2P4UjS`ITxji
zw&aW~pPg8He2j+3rHrJAGQ88g<UzHma-)`|8gtzhS{xpqh^X#ueE3H=b^ES5ioEK<
zH$~LeX}Xpvj`7c?Dt(ep=Pa-{)(pR+>cv~^SKsgx>+UOJWif>)+mMWdT(#0Q@~cKx
zITMgb{#AcX)THvv7YntA1x;xyCApeaFzepf(zuZQowH8U{!1~>EOKg{)9zJO9ZgFR
zlmG^BIIPpu-iPlhxvs{LAf9SB7#SHf*umhMm}j0ityZU7(4iyX^dgZaVmLLR@Z@~z
z6neK62l_N;fLQzRC~$j?NedIm+yyO8cSw$0?mmnDG#NK81_JsH1xCjM1PW=`R=E_a
z!Lv?{s?^bfII60IHF{3RlG;p|HbJVFF&8y<G|8!jzV2~UkzB<KWGE`@Nrm83#jZIO
z9MKWps=;y$oYN!(jwoY;)|{#c&S-HOWmQsJ6>vlfaw(}8-JI2snL&!mak%tmeCdJ3
zSB=O#R*cc*Y6xKj^sKpT#bPyQYItV{nz*+9!jCKi$gPsl5*Uw#sgitgOk`fw6DbEH
zr7^TgE9JV?8<sh#+c>JSw_{xOJ>6L?q%}tM=ARs3RA{Wtgc+%U5#w*p-lw4Y4)ox!
zsA$Q2ye{Toh<l#@072fW+)e_Oh!s=Szlpx5`U;Xl&Py{APjgT-d%9MwX`ooFgy)LU
zwPVI}QAchECafz3#sx}?MH;rdi5%coyfP}UT-QGYjB&}uX$b<x2{od2MDEG1?)IVP
zx%u7hB*4#FyELA2U{#wN09+Os_WuAH##fs<BGgeXk*AWNo)@|Gu7hY*3I+$I7V&LR
z0O!yOk@hFv`H%NUPsX~T9#nJAT+D!>U|S(qyC3t-MYt*35G$Upe)0bR3X(=?4?lqP
zrWFzJS9FJ>e~<W6WQ;4E5_YF!)Ssa5PiYnfASeufhp+wgd;L08GE7m98+50u`g;nh
zNhboO%E4jtOwQm8v$3U*hWU9Tt9pGu#;b<pfO_?BU;edB9b%5A_bS7j(;|(hauWa!
zS&sonsi7k?DUsjHgd7*gUtl|$Cc0lJMG)B#<SuiQ>Q8e=n-`$zrBH?#G9y2I&sYBd
z0qs>;K*j+lpaZ$3S1rPf=Yfv(D@-iTBKhu*%eZs1*?mvFE8KRl^(&_?Hi>WF<q?6A
z9(x~C?Nuk5K<#e}8E$0HohUz=gX!vjr8Jg}+6YK%wg=6VJVZN;`ud;1RaxC)fuVyt
zTZepL$^HxN`OxKM{eM^dfg-fB?ai&BGD#=PB&s=l{{RaA0JHr5m87X2muM5^d06w@
zI}YFODgOYzo7ht<EmF?k?X!cq#v>WQ!Swe&y@x%;RGLScW|8rnZ2)i-dJkb!vb!7C
zLKfX|cb0NV%R9H~O-}O&t^Ck7mr}5=bx!T|JpQ#_La0n`NI<@1<aOKGex&2`9+g<!
zJ;Sul0Q6DNW1t?wmF?<Ph%RK=8z|tDgZ2La>wj9W9D*xXex^p)K7*6}DK7W}C$>-H
zP+XFt7>Dr23HrAu`HITrq)+GYxg&v-_|t{RSXqh2SSb9BP@Y003O$GEPeLSxx8y1i
z#Qt=yGTNUu5xb9X#*jyhckfje;hoPI2a2^6z^)1U(irl9;Yh|tK9x>b0SbLjO1NJK
zIpowkx149AP&8ML05(2)91%`1g~obzse%o>9&?&=sPgDSY@pHYa%wfY2`E#+Clt3c
z43MqPDulU_44oZ#`ubHNGgRFyp@8e05!_T~_iY@bqPF5coo6dH^#`cyT5+j(fIfu&
zw9_SoX|m=>NcnjJuG=@3N=g2$xNmQIh(<j0az65@Kg;@6u@_&?t!_glfQY7RdANzV
zW7lt^52x}qWg>W8)rZWyb*RyI;Nz7TKH&7N2$V?}Q~t3BAMG#rkL6k=av1wNri$mu
zDjGJ7Gb!)RD_$7P5(y(YL!GDk8pMrH+rS;5&;C7Ic8|3PZdCOY;`%a8OL8{lN6Xz$
zZ~nDe357d(Q`8!x8yH`%RP;4$N;d9Z-9>Dla#{caWas7Yp4EXp%j<fGx3?r&!gB6E
zkNo!h>c*viB=dcbPI#K>2x*~i>U}f89;2=*-MkRl+n*r<sXLuVWBwkMokZpCt-6bL
zOeGPJG4lFT%*&A3>&AbjXWdDJGA}%4vredE&mf+KTcP$fT*+u?qS?7D+fG5+PwP-U
z>`r!1>BA19vYPYl7VRyoGX?wcH!PpxJ?kO$WRr1aZOPXiYFzA6r*om6IienHG6^Ca
z<$ABwRz>~B$c*XN_|bYdzCWc$ZK%kYlHOd7xXnuU7I*4*e&%EMiVTD9RDUt~8qKu!
zG>lq$ob;}fatKm-2IxoEx8qHP_Ayhq9_ndz3mNRnIr-sp@=AwtpT?qD0a20&9>3>`
z<dyHK)~#&|22<asOw|dl0x?23Jq=o53gA{f{^bOWj2ff}uX+`*>Um#UWv%VylL;*D
zWQ3lfTZ8o-4I;_rsa8F@)h1?wU9I;?;MBL(7MDhJF!+Kw+ZXzD5}t-ge{}x<rw8(@
zdvux3=x<)#i2Jt@@&5pQEpsICymR!aqhTUsAHt$j{$+DTPqR0lMZ9J6Y|K|9sSwH=
zKZjbX7UI@V-oY$H_H-ht+o=)^WlyN$wc@zcQ~?$A=sSGmkI2+}kH{Y~9Tn&+(*~q#
z#Q+Hq5$#9YFTnlix1avHK)L+JDl@5D^p{V!2itJ|wE0&?PQ8iZl_ZnoVh3!~+9+np
zW%-ACsxK|~Wh~?TBx=*JzPFqR<h6}GIVO{9t0G*{_pPCr<>7DwaaNbhnL%^~$A0y%
zGysp1IDqVSXZh7eRtE)$`f*uJC9^T9QZ*xDrt*>n!vF?BZYxe7nh_wCI()z!pVG2#
z?0(Y;8N&~{$F*kZTBNo&Ceq{;aC&w>N~pBa%kR<mmd9Nc-LYV#GoOE!ucW+XHmLR$
zk|U1b2_eRNf!?1M=+6ftnv+jsz7vH=-5N}>$lo!;d(~-dh~!|3P2`Z+%}TTG`Fdy8
zuIgKrs@SR*X~Co_ry$jdQ9;2K7QhB^QsZKiC4q#|gG7-5&MP;=_vvt(3FKDQzyT}O
zRPL^M3LVO*!*Bwx$AYzvB?$Pdu-x&AOG52p@dFi3bHNoapY2trW+x)8G$zW7gttn5
z&;>je^rUUoPQ=}fP!^OsX9Ad8Ja?%fQd>Bzw`RApB@wfVdO{Ruy<3R}-juRN3e#~X
zn5q~Krl68k+;C`>2q&7)bZz;`snHdwb9G3hHBb@}#Y-Icp|m>^%r=V5k%4}4eT7-N
zy2{{HTXgd!1lOHjZc|Z5HAi=3TUgo93}AW+O^m0dXhj%Q6v>wdfnD^~k)&f}Qpc@1
zW6dr(=~9njRLe+bmw{2^X{nCttuqy>T8X<BW0Vi2Ip=v!)jU>m%3ZdKzikYX<gNlQ
z-*z}v`kERQ%eJY2IOo*%u7b`VEdx2&r@88Wg0P{xg4~6-9&3)WxZr=fztq*pZk}c&
zxn}N3q>_nB?B5fNVK^SW%|uwc1zu0JQdv(3qbz$=i69>;9Ov&hudw#2=D4{_pKyG<
zx$ZlCKgO#{;Q>|Ze)E4ydrBAOA9(fls_t<^VzCj)Y*cT+z&v+8wJt^~76ph331h$m
zl1*mHAl>uhwM@u~iEfDtjHE!}pWONnf69^FEJ-9Rw_crAKk+}HsuHuv#pGhn+_5K<
zOKrn)OPlwd$3Uy{qNu=8>?$U9cUY4wNx%SPlj%@n=g(nQ$u<lkjZ0ri!Q07*AK@SE
zAJe5x+_hzFTSogJFh+OXH_FVs?9cG8zp1EYb8Y*W8BNmzzCd`0cftN`_zFltfEZ#S
z?q7#>JShI^pXO^O{^VraKyrkSF~{+rPxnV}&Z)jt{eM^dgq6(;Mn;K}AN514k%J-i
z_8x=r>sBqTqLJR><l@#2Tm7TiN7k!Y#>N=oOmBYYpdHEf9co*dmU$wzha%b&^5=rb
zyB~8xdR>FP?1Z|wdzNYB7#$UVx+=0D3%&m8jmO{Gq?LopRetL6MZ+q_sDAD@ztDF7
z06w&vySoc$W!)100J|sVW9CYI`wze$)|nh<810PI=^H$I-<Ob*YH=E^WxFWJxPZ$#
z#y^OKs4h?!cmDa~>Dsk0Jl1~72_KbI5VX(FK}n4n%>ii(<Ei4GBs(JV$EHuYt#7k|
z&NvlLRLr1z(Q6L(Gkl|fGm2K=f;x4puNDpm(xP~<4J!`iBP%yK?@%Sf36DH~TDj$6
zdT~_@ILV-7zi4n1vBqg7yJw8x^c>@{rj#Xz(yKP*ke<hiVj-GM#CA0|MnZZH4Kd@+
z2pwuv1&Zg6X^2A!S@G#wa4+2d0CyD-j51{X<p3Y2Z}6*;$AUe8swI;uaEGokKU%O8
z<_3{h<Q|2O^r_Sg1MgI3#fWWidV#_B6=j1&%E);fgHbAsb5lxpLBZq>D@ce~Wgpt?
z<NZ;$7#~*Teif|~sP9~@t>Z^^aIP`}ZeOVCKQ4a?)Pn3tnM26!LTg`8Q8&_c!B#wX
z{{R}RsOpDGw(|xaMmUX6M*jfoRamaG*QPtxQ(cWx=)&qmEO8Sac6y(o?@Dz!SD!*B
zriRvyZ4>X+Audd$kRR_M@6UXmhtjsvZRC|0an*ml^#1?~$cE?aWg`G8fwOOJJ5<xm
zs1|6+^0#1-)-&%_Pu*XcN)}coxrm6GTXXWBN_v0wtCqeVWW|(VHv|#T*H8A6Ri})C
z>Hf7xMjzf4%xd>7J@zcow&QUFo;`hswM|=C)|{;DX0_GDw7z6+%OK=)gHLk~v=xCC
zx`-bujuKPoeGlkrIbCNsVS;^oA4<;+hUeD@+PNJ@wCHxIC$CZBdz)K_j#OypPO5v=
zw4Eja<M|t@Uw>+qW`_PGhWW6{F%`}^UqIfY`Bh1+X0n*9s4{s39IFpdXl?Gc4K{n4
zkX}fkmvB~Zyc}*xJ(zSqTCF{t7j8wiEpGZ7c^l+^_Za^GBl()IygG5g6y;@_KPq0_
zD<0n8O0_GiG8}EHDJ+sjBSA7UNQVrLFjk?wD(SU{2fw9Qp7cj5+iGa?#-j-`{{XFs
zdmrLoWB6jBy0>(VU0r5@_>ST}RlSdJeUCIXZBWy`=Ra{9+eyL79%>huIQh9#S0{Wi
zNd_xAXz&R`$E8CyiY!FoSFS}^NXQ}MJa?>^q$tDRt!P0pLJ0gtVI!g_*|cC-1{J?+
z3a}N4WD;BvTaY6r2&~rRR4tov-E&puu!=+XdHPkz<R_9U!jO2tHKB#<U`wVL&&$%F
z0pNP*rHi`NIhHbT>4Q?@i&O44$512Wen-D*xZ7mpR#=5YmR74NBd<!yr?JO~qZvEL
zMq5-?7;(p~Qq=S)ucG0C%D-|!)PMD=<TnWTSK+;C$yQ&PO*p$4My#p(l@-y+>DSj9
zb=kDL1zk_zN2so@FqccZG6VM>wNC!V4Nl+6kZhHCj#1T3WLR9>X*Y(}@L}_i-JE)Z
zT3+htoBKD<P3Ux=GMpX=ps?@Or-h=uj5s5HKRQ;3fr6ZlwMks=r&=>tL@PKa2Bc6h
zN|E%Yv;zgUV-&&)jIME2siudTXxiDa)Iz&_q~g6AJM_4PST=Gi#xFrhOAdh6&W++X
zg#=Q$!02n!r&CKDT&JUzQzmNMN&@D&IJK2^1WBL9t;ei!_o^i;u{fP8N~_kH_Jk*s
zSifnKRNi=B=T&CAW#K{1E@LFMI%SqJDme~%S3hrgH*jkDU6a=$t`6pkD0}&WO!ljm
z((}(1Rc2Q1D#9ouiq$|;Pob)lAYQb|Za`CmS&q)0wNhKA8Km8a<uA!`o+<_e^HdV(
zm#b+u;aJGhC3zU5nv2Xku~J+H&T-bNO(&8vY9GQY?ulh!IVO?`f;i@@2Qr-YqiX^y
zl8q<N%*-K8xm}$E7h#85oJz0Ox$yEXHr2Rpj^GHc_?yXSY6t^29qHadQ|(T-V9E_k
zA;R^oc~KW-ciN(}ZjS7LII9;DVDfM(DC9{F2+bC;A!?RzMjvdKGm)KyY>f}O>feS1
zEyR=DW6pn(e(Y*Cs{H_~D-b;fDbEnaODP$V$sv3W6jqg`;gV5r1{sGW_8z33!n3A=
zX{GX^A2H`SzyrA_uoW$;BnV)DorLB|2*H&907V^#;%aO=of^!hEvVap`_KLF@D(gm
z2++uVaHG`LI!?^%1bz6cZ7LT4ACWj;e|k^3N%S<LlK{IOO)=&K^{nZxil_pc1IcqJ
zjX5~UB;Y9SN$e_lx)saR)rneS6$V(+&oF^XK-qFI(*FQO^d9}`pJ|OC@}O*?`_It7
z_z!BV(g_Fv({A3R+T=TMGfX`3PnE`YgTSX+qR8zeoI@Dg=hx7EPx#h$zJ&^->`;q>
zGc*4Hs9a|qw`2Kx)n{+8hEUnNek6S7U_DfO`k%wrrn|XloXpr4E<Qyc^2Kq-(Br3|
z_cfad@|W!`_p^wB_3qz?^ZHdcZ4i>arKzR+G=FDL^{>R3{t^A(@%*ZQi!x;N6zGm$
zapxzkXj;OF9G8;-VTYoh`Do{&{X3ujy+iky_9QR%TX7%!wv|DD_7k%GPgD3+WS3^<
z9eY)FndOn4iDqM#C%L3z#ACH5AV`|szG7JQIW)49BxLuIvG@=Ezw)GtJVwXx6>DHk
zqx*~c(4K%pDOeD9W6)G%I1JrSYJ7-gpK<3L3Y0=eBvQB)1Vw|KaezNsks%1RKEfkD
z<bPTiZCYsoQf`<PTgvLhRnZx9npq>l^rLVy3T26?WnAET)#r?=liZrDS9H0g1Pu-&
zHBI8j&{o68i0Xu?oG+=O!%V92;Dg$!%7Y*eTGn*=YMiNtUt><fxmX}B1xhXUu;YqU
z7@YnBtgX}>ZdsHM;xRTqJX5g6Sn$UqhF)saE0zjJY*O2`y~hzsu2-lBkSf~1JitKW
zor$>%0*Jp;{VGWFw1eE#(?$ps#auVg(qt2-DhI7Z(x@d#zz2i%qDI*0z5f8inxm*V
zSfq43vWCad=B+ZJ8?wFNk{f`*hVQ%&uealhjdcMAs9@-r00ZgNe>$xl>AQ#)z+^c3
zj%v12OK6?j_p7-#W3D+Lp{!K(b~dLCWSO$pb2B_jPRAI}r!^!6W6LW5K*H4s7{_d8
zla|L{TFGmvkg<IH-G3U*PnvgoBPc`KP0OM>s5N1DuWcrL^ce&GdLZ=oAIs@euCmaC
zAw+=uj)$X;<NDVQE+uA3B4U>)`CNBl-ns)JC~g>RE&(b1(~dx`-CJwas+OjU(&~=|
z*n4+%K-r00^N)Jfg61O;xwZ!f=9(eZPI`}Q{{TNjT)a*mLh>f~*kPBxJ-rQCQ7)Y_
zG;-!9`I205>5@M}Ju~f58k*hz0K*dqTUl;t-`K}6`)#2GPt7E2+tr7+sO?xXj7(T{
z?kiS1#<ZB+!Wm#MiDMj?9RC0?dwPy3x}}=QAKLAJ0AZaJ<Su#<>ZkPSMB#l=(+S%~
zP>GbQDEz9`z2c;QWw}EovE%{wqdu$t6_{pBkKsMOl<zlp%J|6cD>(b6k6p|6R-yp1
z#~^qg?n=E6vGhLGSg{<`mUmKHNb!&)g~!bsbZ`E>SeDv0h}_ze@P87Yyob>L0F7Ho
z>WNO;GdAH??mr>^{{UFq)7rBv?rwDHoFP)J#E{4`k`HnHdWzGYEDgZbS)&(7W$7aD
z`0wvZ?6i>Ut6P0W-R86mHVnxVa`Air0JOgU0G)GF#`fnp%AWPP3p^Jp;o3J-m61*e
zHDd18EkJ(kM-AJ5%RZ6Jf7$G<?a=neN-U+e<4wnNmH{LOvsCICRAaSAEwe)_0;L!n
zGLeD}IUMH|gwfR(bZzO<V5=#vrqV$m12{DS<{KI0#o4~cuAa(pR1EW3Ey<{Ed0Ac$
zHJ?1@2jz%0ZV9n~d8c06$8e(~9`&j@x|HG7j&RN~p7mkxV3ZO-IIdGtw79pB`DB%<
zme(>XVc3E@nym`uI_79(A1T^tqbff2P_nvN6bC%|invoehRCd>mCkv+Nl}Sx92&Q8
za`F79k?&c8Go8xZ`c=4#0hT<9o>Oi}$~KXrr6lD@2hi0^>sart%rh@eRlA|}6(lo8
zK~@LrPmo}6cQ5KGG}fr|>fxnL-WwQpH`f|n(6_li?!RUg*FkW%k%FuP{{T9|xw1>h
zN=P>;=togm_V+g$J<7#!#iBoQPR5AN#t@TE=<4_FB^Z$1!Jv*#R+|*C>JctmY=CgW
znTeosk{pWNO6Q|WqMGwOuTj&$fI5z%xl9z8fO;DA3*EnEz~|-SxOR^h1oq8#LF!=_
zdzyA$V8$5NNv6Wm&Y_9gIW>!?sK_F^T_|k@o2Oc-SSI;Z&3(PIg%s#*QbsuyS1%wq
zJOP@0ozY2f=e;DfxS_G99fB`WQ#GV&xxl9_!W7`uE8E?Xk_|LALv)K6EgN9gYU`3)
zOSQ&8tlPatc_b>J;;MOa%5r<vskks_ZD*;$dY}dX`c(5;2U10G_O|lI>e1vH>FsYM
zjxCdpeQRjSGeWiYDwu>)b5<jj7oICKRKJZ$!*f(^^$qR@>cVX&V!3Q|PcpibSl2Qc
zWhyg)TNX%@3^D6c-02F6TzzUR<*_X$W?RMb4At1})s1JHpCVv{a1Cl(OtOrMTWHGB
zXkCKI2<cC25T_std@{0u#aZ61)YZk3+9E{QF;QGewQ5#;bUCWe8_L}=S9U8S8uCdQ
zvNMXB!<PA~`&bt0I`^l=r?h94!1bkeJqgv6Eki3i{E>{ibm#Rx)opE9;3y=X0IHgk
z{hAhr5td<xVc*z$e>#~;%It|*6<+!{;xNVY=kLBTy#D})p!{jjj3Hyo8p0oFlH@MK
zjPqO3tbz~#!Nn-H^<>3gQSmpC9FqS45&2j8{{TvTxM?3G@j<m_DH%K<=jwSs&a`c9
zhbjmF46`1JJxxAq=!X)kTfFX7FaR5!*!-wHO;)(PRgBx)lD6RG*o-u%)cOxW+v`@Z
zuNC9_J-90XIVAOv`k&}KRz!=tipp~3*xfGVr&1vR6*O1JZtmxRD=I|@2{mHYHo#&q
z2TGMTXyUDNNJ`8A<gqvabtbeYxQ@aFjO}8C;B^v7(3Aek^!Fp{RICETlj<`PBDXsu
z!R4eQx40jcLG4+Eq`RG_kdAl?Iw&0g_7$ppvA<Si)!B77T}WBn{KT`l=L5N@u4MBZ
zU~Nz^tNqc~RE5M=qBQ%=es7@YKd<FblsiUqxcXEuA88X$I>f9L0lD3}B95f{3b}tj
zn(<r23c(o4sq4>k@7(=pD7J_!@s~e6$sXK<{5$)fYK3>R7jL}BHCrnScX8_&104r*
zO~GUGjy-C0h>@2)YGEHhI#k$WVThfodz_k>p?OFjjXp3Gk6%ijCFF5V#N1?d;Xv+b
z?GUR$Fe$7CD!ECyNzfv5>zbz~UBK)r+~l97S6I$@?^4P`qmPc9RC0`*Rjgqu76T-j
zVkt}jUwVe#4E)BdI++-HW~WHPrU0Z-PGrFLthq)VkEcr7n~()%yhM@@QfOd^f^Nk_
zG+9!8dRC8;RwCWU0+}>ncARwLlH3eHq&9sHYi7lPw@^C@&yq3bx(<pl^`}`*BZv&~
zlDztgWm`i0altWfGB9#*NZ{k}sGm=3W+uvKLDykyij8hIXKyD2fGMeam{G~~&$nt#
zs}VYPMC*Gn`7R?Hzq~l9qnRX?uyc?NRC&ORgUX(s)m+Fk^5kN(a%VeIi|$y|F19do
za4~`PteczXkr@fYDfv&YwON@Oc)$(F>6*{DP@GDbLbxZ;id@d;RT;#N+Dv?<af6dn
zt+q)+%I*wCdH{Jpk*xNWl58$ygpi?^oF9Km(zKP$z)TH}hwwk-3eVwoWmKOe<omD4
zexo2*B8w-J>B08;{{V$lkIR}8l4mEO@0yPGe9s^(c0bF{BO}t8r^76_PSPm_I486J
z0QKp`B^7bgwJKL-`xrcl+U%9xw5qn=fQ%o;oA#x+x;DV_Mh86)Z}aa?yE6TnQ3oul
zTmJyBj`*%h-f5wqd1fpdC!Vy{x|vgpto@_;9lSUMDH!cnQI}zcU-fKq5BCqR_|`Oc
z%QSFY=ZQY>{_BpvpI^qcB5>fTBC~D9gzwaBqH9Op8isUTu7{+t^~d-g+<H{8-53@*
zA(IiD`A);`=c=&u>__8Lq22^-jm!5K{{SQHQ29!Q;&6P4dV~BY*YN&TtSZ=0pK_k3
zX0jt(+eT4A+cZ7lZ|bM9tk~J3BX?ehHNS4MthfXVsD|I0kt%<5`q6WyLo{A|3AGPY
z?-}$y-iE46n^bkeX=ut2NT(U<D%=vuYcHD#>(q85(2Ati?yKvHK_jjJBryGIRIF&@
zb5?t3uHk8|hSCOJEcEvt)rtP5Z=0du`-;)DxoyFL*z3^L*S0Br8%YX<m~zpMhuwd|
zsq3oTOHCPxG&@|W<aGzuqTqsu<{0Q|(uVT3xcQ?v&$UvxQmwgjpK6DurL96)t@R9B
zUO$&O7>?<`MSoHL*HP{(BI4R>8@9K&LXJNp=t1s1>h08PaTee(%Si>uo)xpc4Dhe|
zxfEsLKHjV9zm+*PC9dVh@!Mf7&Hz1YR^5rgQb6xn%V#yL!5oHG8<EuITzZ3AF~q<J
zPNS`1%G0rKNt%}PI<O?>wG!_H2OL%`GRG?HakSOo14IC9k?TzvHFDL(ycZ*K^)-oi
zq{q0E!1k?JU87b<II9~LNb(NvtsY|7=Ha&cIsPIl%S*SAO<tPc4b$+dmlpX`fl~En
zB(;!5bGwHJ>MGnaIBcN>dxKa8;jN^@ZYx%j{{U_Z<chl;X=rbYsKNP$57MngaE(|o
zYH5%h40_d;R@fzt_bKO&)Fgz{HOK+CBMy61b3qN<O$?|odV~Ecj1z(eB*S_dnVhf8
za&u8CchwxVaLRhE8Je<O=$8Q`C9TQ%U(?vvQ)hn-^dTIKk@BBP%H18*Mk$wa5rdIg
zQbj(CD3UT{v?qi8+DX}*sY(*F(Uc~(W>rjcvaoLSZGgTjwuUhCl0`b|#yMjmyWpLT
zX}u3Tw-O}F7|$ma*F-#vTy!K;ZM1MNH<cJ(YSi!<4gkRDD4zB+t1f1d4V}@9zV3#Z
zHOi~*Q%;u2<4i9#NLor{<ndahYjm_VAiE|$rjev+4%6P28$yUF#0bbZ98i$kBU=S#
z$sE+*U>_&|3Q=~T;PFfLv5|(R;Kg*yw)Tn{c+M)uqX<|F4@z4(gGZbSTZj8Z1wRV7
zD<eod9%-vw%5E4htvg79>f&I;5V@{`eIX>cAdYig2AieD6e9{La$Jed<PlrX916+U
zQ`1(KBjwJgY2!7rn2cj@6<+GjDdRsTDX6&Wh*Cu<EaAIfF)`1jL1+!Mr5iZSGzs+i
zGP!8;(zY$u3(y<@8@*SvjP*llN!=30rIZjj^{1Vy)bm#&wAh#(DO%M}-XD!;$!06q
z&&VgeT(v;W$TemyJ~DV})3>n-1`S$^Tax6N3|5H4rA)R84;?D<LKIW57oLZ;4oFEd
zk+N993OPIz=~U#>;&aN@sW>#oHI$^8N(MjnMY<g98KrADR~&b(7Hk@?GH)0iNI3kd
zb96;G=wvVNrXc{y$t3Uq^)&_U$aMQW8S;V2Jqw>x{RKYb_i>n^IbY&$?;r52sik!U
z5JhFpt%3_t#n(G?ienB=1yar&xnf0HNkf2pQj*ZRnk$irH_8iitFgrMPaU)ix<@zz
z-kTWbC$(u<ui27oo|5@_AMEboN7VA)Ks{)u8?i=riJz%5hFdtFbQbwlBY@0)^L{7v
z0-!(<0Q~B2BQ5l#F?`snz1_>Tm4@dm39TzQOc9&L19&@O2m9UI@cwx8sBJA8dxTD&
zr1k@;tw}=2;GiNj8&Q1+`RDPgit1-B#&W~9H)G2=$3L0=l=O=ovTD;rb}l}(EKpz%
zS}X)97jyYj%5zqhFKSkgXz4|XxmGyIV^<=NVD~h{z#LTR&ot5#V0hxAZgM*gwIg@M
zCNP+xf+{k1rU<f0HFIF^%|{}f;8fUUcfx~I-!Js5(yth*{#njX0MN#Wl|qkt3+Jaw
zC19ORM9z3s7|$fqD#K(@P_f|VtFne|{+t?XyGbYsoq*tz-`<?qN7y<b`KetM%1J2!
zRYz{MTI7UWo^i<f(_QyIK{!1sDM6M7k%<eOl6b(S%nBtIT!!4=GjmWfHuR5?+~c73
zs}9k`iq1<i3=9rAsAjuWk~oTjoF2dDnwHHJM7RRk1dd5O<yWm%fQg}4W9|bEdHNsL
ztZFl!ahAr@i~>MEtz=kkwVPqaaN#}2T36^-EzzsLlNkg7(=^<L?^RlGU1S_&WQ_Ew
z6q4JRZBjU>XB9+FuJNIG_McU5L8P~kT!|u(scy<h`G>cyIxGjk9edSTVNo+HcjOV;
zr8@*|EzJA9Ijv+4OEMGkvZ!6Y#PzGT(g@{3!v*;O_Bh2$9npt$7emc~_|JS*StCJr
zZjeYGC2~O=@kG7r7g92mQkPDp1QM&3R7Tw2G8~m``qt_!cJOiXs2%ymQ?ZGbM`s`e
z9KULUd!uu3fk{}<{F&~3O;gu%N|gPpQClK8<(*{nK|kv<xZtJ*K_!z+rs6Ode;R>|
zA75(LSlp-%fzeOQzr?>o=~nGvNafud@Ou%~pEpCHMMY?1Y4AUnECdp+yOiUd%l&bh
z)3h!|(s>o4Z6jA(h(H7!vW}F=tkoZSa6PJJ8=_>Ci6WDJ6~6al>;C}Pt7Sy7^Y?{8
z6h2_d7(Ma)>dFQO=KJ5yx!P%*bEf056QN~SQ}f6Czxvf;8H%4d06nBb(EkASjSPSs
zj`YB$H^>|wzV%V5VqsUK8+l>~`$euyXR7y8=xZ_J>_!i*ZbIfKByy}UN2sP<S`H+P
z21C==>GiF-l$FlPbr!}M1qj>*M|!mj94jjj!;nX6ds~Cor@c4IXt`6Bq?ys93GKz%
zXN(1$j5-dEJxBOf45Y4Ah8P@Tw`I#HWjkDQ2<i=0x{f_Sj0H>0xyNZfp7o>mmzgWw
zentbGx!cgw5+ECJs|<x%mBu#l%_1(==kTmq&|S17qeS4nxj8C6@*nRnq5Tb8NZQec
z^S&1Qk^#dH=zXfRD&1Fx6>=N0xsFuxA%DKU{-fTSchQ-@Ab65h-3bK#wImX{0J3xa
zDjSO<3oF9^04s8iIv@VOwIo*=ZL)Gf`@)k=J%+5&qK>3s{Mf1O{!&7o4{E5YjKS29
zd;8V6Q;<Qd+a^sUOv)JNy+&<|j+v<obI8G|;Q6sp7kL~h-bPf~YBqeu{VOhAHSL>x
zV|0F%+e>V+AS82Cp2?FW46mWCo>QtbIL27rikz!et4Ur1jmK%=irKu<Bb09g2imim
z*b-tVYmS+zX`@vn(8<xk-x<j3iXgp}8|KL#^&~<_PT3bD)}JH^ox`E2a$~YAtby>#
z$DX35hIL%SCLIq2TB%1H54qG-&9*~^;r%FUnHf%;WUV6Yyd_(uSZKlRPK+O#pq^r!
z;=L$Z);#p|xZ@Ila5~cksK9Ns*zPx0qDEwkfyOFh9%(jZ8D3X1<hKr|HBoJUdC20n
zp4GMz1yG#%V<gs!HfGJ-U<j*%D<<u>u?t!|gr|d1NoH7s*A-HB98AQuc{w#Q*_G+m
zw7jsZjC86q+?b!I6)tYVvblEI+6YK)Y8bDCb27Q%jYOBK$&y7<xI*pbq4|0d=~ATb
zS}k1PM;!Uct(_+ICWrt=YlgkIo#pvKARe9S{*Ns7?g>^HJm$5h?-|ZVc>D~~q1>F-
zY<J>70X=H{w1M~oHBB@^Xs;!*$oo%2PhCD+XA(eir>LzjDM-!^eJNsWjiDS-9R{7A
zg_~2pKbwP6n2A4gBkNMyS~)v_I6aMOrK%1{=CpE8P;TaY8hSU)$Tgsmgct^%Kwbqj
zw>*mHoXSxH$0z>)*HJM!=A6x*wHux@ST<;o(%9=!vCn!!bKaQEg(uRY<=A0yAS51@
zUiHi{Iv>8sNzY;F{xp^kv`H!K#3((_{{UK{zJ2?H9F|f(yiv_%a$CAFyy7V)MV2~;
zc3aw~+45&Kj2jd+W*x*NHbLN1tcw?I!+}rKW{Eyo0blloL2L4iAue5j7_F1KG^rnT
znz69KW*Jq%BiGurUP88!D;&u=GdcYi^dGHdGXRAYj4x6<nzJ;!?g&%zqAHKT0YoKh
zqmqoTD-*;6u>-X{!}F8*Q#A9OlULzSI(1Rhwgpkt^d^okv)X4EdARy0{{R#GszgkI
zPo0CvcKy-vuh4X*jC_Rmtvrmp#Av}Ij+J6G80k^=%}*apQ(Kb9B6r6%9Fga(SqF-2
zfB~AT5>fJqH6Y+%Q;$6;E4GtBG>{HE(iWhpCyu6^Av_aOP~7Jjrn3Yd=B8x|ed;Zv
zIX!3@6E1l@Jt_%CIVYO6BZXS3=OnQoN{L9)F)2Ji9G*!%O;?jF%onFX2YR+on<hH@
z)pt=8g+Un|NUW}Ckr+W5<N=aDDu9pO=dMX4`qZ})0fb+=PtA`?h{!hWKg2RUu}6^X
zRGGwZ_AGsSdQuk>Mdi%cW&mv+MMrRakn%Twx=%s=H3X~!Jj5IxgQ4kCn=Z(!HPZd1
z)F{U8JMmdMg~OOh1mw8{dk$$WWRiQCi;O8?bM2fFO4jD&ODlcn$-wP`PxPR*WJNBj
z(g<S!Gco`P>JO%B>JRkTvJ05g1ZUGce?P*Zg3TehfT#@TEx)p2q*-H@NFcW5Oqr08
zjo;om^aB_k{<PPaJ9HR>Y=k2T9lmY&;~!7!Rug^zV1FL<OUVN9Pt3R^DIVMk(uizO
zcV{El^{H=DmaHs2axskLQqLcnpo7S#s^ojqwl;7&R&AV-c9BHk)W;w`$KI;>b6H$L
zDkveJYbo!?Ut?QP**wk)BL?^Ae!i6ss|+^LG>sva;a76`W8^?R`}M1YTh#ATQjaN_
z46|PsK@-NYovgm6*NVQD0H7^~$6;A|JaRM=GReo8jk)0Q*0n-OY(VHwJanp+u3U6l
z4vmaXayjU7dWx{E9}F@v)Ou4E7L;xs`x<h<$&s~{f#ci%0IgZ<TC*fco6J(AFwari
zr*|L+P%=0wdy0lM+S|TR-S&>vU@E#cK*(TeUEan{Nz}JtvF;dX$@+R#<#6aSpdV_k
ziGnuZA4--|(R|p+^rF`lW}Tw9Wa}7lfPS9FqhLn_Q5(lEsyC-<nTF)w+;G^e<pY|f
z8xR4-O%#E04=Q?!go20EQr0SNEXhg5*^ZXqoQB9f53NDvZ9dgzH{0@`x@pY0>-Z7g
zx+65sx^=o4w-)>oo%>c5^lW0>G94R&E2?xXK3d1WfCYb-2N}g~wmKs#nMo>YspZZ@
zcVJ*hcn|D-{p&JwJ*=%1szFnX8j|4|PU!meAW$!o@ddr(?P0`o5z6~FsHt|A`<pAT
zLgkIv0B+|se8dcUl;j?31jj9`-du`7?0XUFD^d%>suYZhh1rCcRA}3=SjsT-A#c8!
z$KpK=A%pBrtbj=i@~w`(-pAUi8B}1ddGAlOx-qjO<>6!VAwQ_}H2JNeE;lt`xwm`<
z8%<pTJ-;ce&$F&om9mY>Kd<;7YP5(^D;ZsGa#f=ktHrbuvu-@oV!4qR5CIvcIp(Lf
zi+9qZ7jHv~Dx-IcEvB?o;i5S9s}bs!(u@`=eZ^Li%rbnq$f|N$73eExIm2?@T9Mq@
z#7@~qQ}i`T_e~cADn4=0ip~*Ar{&1{RG(?NhxpIqN3~CMX&hH{Rhk*%;ei<ML^6;z
zN}!I|t7A}<j5@3RMMe@iB-~J`_NjA@>76mf(|eM_vB}D|-)gY-ir^3iM{!L^&>XI6
z!i)-!Xk%9uAF|6aTd)PNNc5_AknUlQ^if{SqLeOkrul4C7VORydz@EKYXYPoA4({w
zq40_c6PQCG=~#D`8{GDyirPx$r>RaG285DG+f%9u4il{uQbLn=8uo6@hmC;s6n|g~
zfC8{=^v@JgSIC+b+GEqf<nUuS{{UsGczhWoa)LQS)Hm{?ipi@x7k14v;M)nJRhBM5
zz&Yf8G}!zn4Y|US%kF5RlYFm1B$;7+HEaAxGCC1f;L)wovdRuR9+Xi_o>wcC7uLqy
ze+a1syi3tV6jO3xWIb@50HiWviYgqLN+@Y;ifK6*2Z|`EWmt`cLm}=@6j4yiMQI-&
zcA+vBQlsx<y%bh`N_#Sml)I6@KPY30t$%P)#|Mr@6jV0(OriWUI{-f&4<qSLGZaz@
z!J>*$9Ta8XymBe?7c~Z+!rm60TO^YlOYi(g@Em<lwG>v=x|`MbOY!7@xW^S@5tH?z
zik0ymWi2X~p!qZOtqT|Vq?7xL!S_6WQ$-cB-0P7ja4<45dJ3?I<i!+KAu+(@jC2(`
z9GWPmiDUP6rb!_u0*WfG#bFRrh&lG6icRP=kl+@MLH0CJQ+7Kcj2ND^8A5oXicOVb
zyoh`H)Jng+!Stev&F)$iZLyLE)YVy`4hX=_6jJCkQn|M(0o%qg!0S~OHd%&G1CUQ@
zD5+(klgEHS>6~Emilp|<f=d$Q^v^-<MHH^?RkSM`ADp>gmPI)Su;U-rszs-}x)mIz
zcIW%ub43(fYhr7CA>o4I)xLRxf)5~M<+%R<J!<8?lriUJLlKS#zcf)oVr-H0q*MjA
z<%Ti`e&(7iqf*SkWSryDiYSqVO^DqT9zRN!9itntXri-woD%qpg~q}F;QCcdn>gf!
z%!P|O?c3A}D6Jswc2hZx4>tEwMI&P*sXq15C`9=VHucCkZ~nCuR*!Q;D%(QNUKt5-
z*qXYNsX())03Cg(qLq<Isl)#DN66<Z^rA7AWyo>2rg;=mSlixa6t!|>k?m8tl#F*K
znA(lIzS$MWZNrg86uGRfZ9#L*x6ofR=E{*G_Bg>6rD*Eo89a`2MHO8Uw4$BH&mPvn
z%ChmE#CE1HBOa7dSk#KS$v7(x#wqCBVDzGj=T6T<o#^x#AS!YX<5ezp%7s`A;{ee`
zbyJhM(N0dsDRUD>=dsU9t1L+<Ag&Ls6jey{;O`rY9FnXaLH*({DW0qJG$iNlvXDg-
zQoCbm7ChEL>J3<qMJI6`Xrh#ajVnofx7tZ~U+*ucxc9ALl^aeONcmWI6j4zp?wq)t
zitaGgdc^QJ1R5x%OLLNeq|qy;Q@2{JBu|1BvONtHS6#F^8c%Yf+(`_ZWsP?FikoyS
zH~C^fdr?JZCqka~A~C>8A~L?&rwCN?2kS)@%Tb(_p%t+z#s_MC0HTVBE3=KC|Jir}
Bp^E?j

diff --git a/python/paddle/v2/tests/test_data_feeder.py b/python/paddle/v2/tests/test_data_feeder.py
deleted file mode 100644
index 63905c04cf..0000000000
--- a/python/paddle/v2/tests/test_data_feeder.py
+++ /dev/null
@@ -1,267 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import py_paddle.swig_paddle as api
-import numpy as np
-
-from paddle.v2 import data_type
-from paddle.v2.data_feeder import DataFeeder
-
-
-class DataFeederTest(unittest.TestCase):
-    def dense_reader(self, size):
-        data = np.random.random(size)
-        return data
-
-    def sparse_binary_reader(self, high, size_limit, non_empty=False):
-        num = np.random.randint(size_limit)  # num could be 0
-        while non_empty and num == 0:
-            num = np.random.randint(size_limit)
-        return np.random.randint(high, size=num).tolist()
-
-    def test_dense(self):
-        def compare(input):
-            feeder = DataFeeder([('image', data_type.dense_vector(784))],
-                                {'image': 0})
-            arg = feeder(input)
-            output = arg.getSlotValue(0).copyToNumpyMat()
-            input = np.array(input, dtype='float32')
-            self.assertAlmostEqual(input.all(), output.all())
-
-        # test numpy array
-        batch_size = 32
-        dim = 784
-        data = []
-        for i in xrange(batch_size):
-            each_sample = []
-            each_sample.append(self.dense_reader(dim))
-            data.append(each_sample)
-        compare(data)
-
-        # each feature is a list
-        data = []
-        for i in xrange(batch_size):
-            each_sample = []
-            each_sample.append(self.dense_reader(dim).tolist())
-            data.append(each_sample)
-        compare(data)
-
-        # test tuple
-        data = []
-        for i in xrange(batch_size):
-            each_sample = (self.dense_reader(dim).tolist(), )
-            data.append(each_sample)
-        compare(data)
-
-    def test_sparse_binary(self):
-        dim = 10000
-        batch_size = 32
-        data = []
-        for i in xrange(batch_size):
-            each_sample = []
-            each_sample.append(self.sparse_binary_reader(dim, 50))
-            data.append(each_sample)
-        feeder = DataFeeder([('input', data_type.sparse_binary_vector(dim))],
-                            {'input': 0})
-        arg = feeder(data)
-        output = arg.getSlotValue(0)
-        assert isinstance(output, api.Matrix)
-        for i in xrange(batch_size):
-            self.assertEqual(output.getSparseRowCols(i), data[i][0])
-
-    def test_sparse(self):
-        dim = 10000
-        batch_size = 32
-        v = []
-        w = []
-        data = []
-        for dat in xrange(batch_size):
-            each_sample = []
-            a = self.sparse_binary_reader(dim, 40, non_empty=True)
-            b = self.dense_reader(len(a)).tolist()
-            v.append(a)
-            w.append(np.array(b, dtype="float32"))
-            each_sample.append(zip(a, b))
-            data.append(each_sample)
-
-        feeder = DataFeeder([('input', data_type.sparse_float_vector(dim))],
-                            {'input': 0})
-        arg = feeder(data)
-        output = arg.getSlotValue(0)
-        assert isinstance(output, api.Matrix)
-        for i in xrange(batch_size):
-            self.assertEqual(output.getSparseRowCols(i), v[i])
-            cols_value = output.getSparseRowColsVal(i)
-            value = [val[1] for val in cols_value]
-            value = np.array(value, dtype="float32")
-            self.assertAlmostEqual(value.all(), w[i].all())
-
-    def test_integer(self):
-        value_range = 100
-        batch_size = 32
-        index = []
-        for i in xrange(batch_size):
-            each_sample = []
-            each_sample.append(np.random.randint(value_range))
-            index.append(each_sample)
-        feeder = DataFeeder([('input', data_type.integer_value(value_range))],
-                            {'input': 0})
-        arg = feeder(index)
-        output = arg.getSlotIds(0).copyToNumpyArray()
-        index = np.array(index, dtype='int')
-        self.assertEqual(output.all(), index.flatten().all())
-
-    def test_integer_sequence(self):
-        value_range = 10000
-        batch_size = 32
-        start = [0]
-        data = []
-        for i in xrange(batch_size):
-            each_sample = []
-            each_sample.append(
-                self.sparse_binary_reader(
-                    value_range, 30, non_empty=True))
-            data.append(each_sample)
-            start.append(len(each_sample[0]) + start[-1])
-        feeder = DataFeeder(
-            [('input', data_type.integer_value_sequence(value_range))],
-            {'input': 0})
-        arg = feeder(data)
-        output_data = arg.getSlotIds(0).copyToNumpyArray()
-        output_start = arg.getSlotSequenceStartPositions(0).copyToNumpyArray()
-
-        index = []
-        for dat in data:
-            index.extend(x for x in dat[0])  # only one feature, so dat[0]
-        index = np.array(index, dtype='int')
-        start = np.array(start, dtype='int')
-        self.assertEqual(output_data.all(), index.all())
-        self.assertEqual(output_start.all(), start.all())
-
-    def test_multiple_features(self):
-        batch_size = 2
-        data = []
-        for i in xrange(batch_size):
-            each_sample = []
-            each_sample.append(np.random.randint(10))
-            each_sample.append(
-                self.sparse_binary_reader(
-                    20000, 40, non_empty=True))
-            each_sample.append(self.dense_reader(100))
-            data.append(each_sample)
-
-        # test multiple features
-        data_types = [('fea0', data_type.dense_vector(100)),
-                      ('fea1', data_type.sparse_binary_vector(20000)),
-                      ('fea2', data_type.integer_value(10))]
-        feeder = DataFeeder(data_types, {'fea0': 2, 'fea1': 1, 'fea2': 0})
-        arg = feeder(data)
-        output_dense = arg.getSlotValue(0).copyToNumpyMat()
-        output_sparse = arg.getSlotValue(1)
-        output_index = arg.getSlotIds(2).copyToNumpyArray()
-        for i in xrange(batch_size):
-            self.assertEqual(output_dense[i].all(), data[i][2].all())
-            self.assertEqual(output_sparse.getSparseRowCols(i), data[i][1])
-            self.assertEqual(output_index[i], data[i][0])
-
-        # reader returns 3 features, but only use 2 features
-        data_types = [('fea0', data_type.dense_vector(100)),
-                      ('fea2', data_type.integer_value(10))]
-        feeder = DataFeeder(data_types, {'fea0': 2, 'fea2': 0})
-        arg = feeder(data)
-        output_dense = arg.getSlotValue(0).copyToNumpyMat()
-        output_index = arg.getSlotIds(1).copyToNumpyArray()
-        for i in xrange(batch_size):
-            self.assertEqual(output_dense[i].all(), data[i][2].all())
-            self.assertEqual(output_index[i], data[i][0])
-
-        # reader returns 3 featreus, one is duplicate data
-        data_types = [('fea0', data_type.dense_vector(100)),
-                      ('fea1', data_type.sparse_binary_vector(20000)),
-                      ('fea2', data_type.integer_value(10)),
-                      ('fea3', data_type.dense_vector(100))]
-        feeder = DataFeeder(data_types,
-                            {'fea0': 2,
-                             'fea1': 1,
-                             'fea2': 0,
-                             'fea3': 2})
-        arg = feeder(data)
-        fea0 = arg.getSlotValue(0).copyToNumpyMat()
-        fea1 = arg.getSlotValue(1)
-        fea2 = arg.getSlotIds(2).copyToNumpyArray()
-        fea3 = arg.getSlotValue(3).copyToNumpyMat()
-        for i in xrange(batch_size):
-            self.assertEqual(fea0[i].all(), data[i][2].all())
-            self.assertEqual(fea1.getSparseRowCols(i), data[i][1])
-            self.assertEqual(fea2[i], data[i][0])
-            self.assertEqual(fea3[i].all(), data[i][2].all())
-
-    def test_multiple_features_tuple(self):
-        batch_size = 2
-        data = []
-        for i in xrange(batch_size):
-            a = np.random.randint(10)
-            b = self.sparse_binary_reader(20000, 40, non_empty=True)
-            c = self.dense_reader(100)
-            each_sample = (a, b, c)
-            data.append(each_sample)
-
-        # test multiple features
-        data_types = [('fea0', data_type.dense_vector(100)),
-                      ('fea1', data_type.sparse_binary_vector(20000)),
-                      ('fea2', data_type.integer_value(10))]
-        feeder = DataFeeder(data_types, {'fea0': 2, 'fea1': 1, 'fea2': 0})
-        arg = feeder(data)
-        out_dense = arg.getSlotValue(0).copyToNumpyMat()
-        out_sparse = arg.getSlotValue(1)
-        out_index = arg.getSlotIds(2).copyToNumpyArray()
-        for i in xrange(batch_size):
-            self.assertEqual(out_dense[i].all(), data[i][2].all())
-            self.assertEqual(out_sparse.getSparseRowCols(i), data[i][1])
-            self.assertEqual(out_index[i], data[i][0])
-
-    def test_dense_set_shape(self):
-        # test 2-D data
-        def gen_data(batch_size, shape):
-            data = []
-            for i in xrange(batch_size):
-                each_sample = []
-                each_sample.append(np.random.random(shape))
-                data.append(each_sample)
-            return data
-
-        feeder = DataFeeder([('image', data_type.dense_array(2352))],
-                            {'image': 0})
-        arg = feeder(gen_data(32, (3, 28, 28)))
-        h = arg.getSlotFrameHeight(0)
-        w = arg.getSlotFrameWidth(0)
-        self.assertEqual(h, 28)
-        self.assertEqual(w, 28)
-
-        arg = feeder(gen_data(32, (3, 30, 32)))
-        h = arg.getSlotFrameHeight(0)
-        w = arg.getSlotFrameWidth(0)
-        self.assertEqual(h, 30)
-        self.assertEqual(w, 32)
-
-
-if __name__ == '__main__':
-    api.initPaddle("--use_gpu=0")
-    suite = unittest.TestLoader().loadTestsFromTestCase(DataFeederTest)
-    unittest.TextTestRunner().run(suite)
-    if api.isGpuVersion():
-        api.setUseGpu(True)
-        unittest.main()
diff --git a/python/paddle/v2/tests/test_image.py b/python/paddle/v2/tests/test_image.py
deleted file mode 100644
index c78bbdc40a..0000000000
--- a/python/paddle/v2/tests/test_image.py
+++ /dev/null
@@ -1,43 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-import numpy as np
-
-import paddle.v2.image as image
-
-
-class Image(unittest.TestCase):
-    def test_resize_flip_chw(self):
-        # resize
-        im = image.load_image('cat.jpg')
-        im = image.resize_short(im, 256)
-        self.assertEqual(256, min(im.shape[:2]))
-        self.assertEqual(3, im.shape[2])
-
-        # flip
-        im = image.left_right_flip(im)
-        im2 = np.flip(im, 1)
-        self.assertEqual(im.all(), im2.all())
-
-        # to_chw
-        h, w, c = im.shape
-        im = image.to_chw(im)
-        self.assertEqual(c, im.shape[0])
-        self.assertEqual(h, im.shape[1])
-        self.assertEqual(w, im.shape[2])
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py
deleted file mode 100644
index b169a0f38e..0000000000
--- a/python/paddle/v2/tests/test_layer.py
+++ /dev/null
@@ -1,290 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import paddle.v2.activation as activation
-import paddle.v2.attr as attr
-import paddle.v2.data_type as data_type
-import paddle.v2.layer as layer
-import paddle.v2.pooling as pooling
-import paddle.v2.networks as networks
-import paddle.v2.evaluator as evaluator
-
-pixel = layer.data(name='pixel', type=data_type.dense_vector(128))
-label = layer.data(name='label', type=data_type.integer_value(10))
-weight = layer.data(name='weight', type=data_type.dense_vector(1))
-combine_weight = layer.data(
-    name='weight_combine', type=data_type.dense_vector(10))
-score = layer.data(name='score', type=data_type.dense_vector(1))
-
-hidden = layer.fc(input=pixel,
-                  size=100,
-                  act=activation.Sigmoid(),
-                  param_attr=attr.Param(name='hidden'))
-inference = layer.fc(input=hidden, size=10, act=activation.Softmax())
-conv = layer.img_conv(
-    input=pixel,
-    filter_size=1,
-    filter_size_y=1,
-    num_channels=8,
-    num_filters=16,
-    act=activation.Linear())
-
-
-class ImageLayerTest(unittest.TestCase):
-    def test_conv_layer(self):
-        conv_shift = layer.conv_shift(a=pixel, b=score)
-        print layer.parse_network(conv, conv_shift)
-
-    def test_pooling_layer(self):
-        maxpool = layer.img_pool(
-            input=conv,
-            pool_size=2,
-            num_channels=16,
-            padding=1,
-            pool_type=pooling.Max())
-        spp = layer.spp(input=conv,
-                        pyramid_height=2,
-                        num_channels=16,
-                        pool_type=pooling.Max())
-        maxout = layer.maxout(input=conv, num_channels=16, groups=4)
-        print layer.parse_network([maxpool, spp, maxout])
-
-    def test_norm_layer(self):
-        norm1 = layer.img_cmrnorm(input=conv, size=5)
-        norm2 = layer.batch_norm(input=conv)
-        norm3 = layer.sum_to_one_norm(input=conv)
-        print layer.parse_network([norm1, norm2, norm3])
-
-
-class AggregateLayerTest(unittest.TestCase):
-    def test_aggregate_layer(self):
-        pool = layer.pooling(
-            input=pixel,
-            pooling_type=pooling.Avg(),
-            agg_level=layer.AggregateLevel.TO_SEQUENCE)
-        last_seq = layer.last_seq(input=pixel)
-        first_seq = layer.first_seq(input=pixel)
-        concat = layer.concat(input=[last_seq, first_seq])
-        seq_concat = layer.seq_concat(a=last_seq, b=first_seq)
-        print layer.parse_network(
-            [pool, last_seq, first_seq, concat, seq_concat])
-
-
-class MathLayerTest(unittest.TestCase):
-    def test_math_layer(self):
-        addto = layer.addto(input=[pixel, pixel])
-        linear_comb = layer.linear_comb(
-            weights=combine_weight, vectors=hidden, size=10)
-        interpolation = layer.interpolation(
-            input=[hidden, hidden], weight=score)
-        bilinear = layer.bilinear_interp(input=conv, out_size_x=4, out_size_y=4)
-        power = layer.power(input=pixel, weight=score)
-        scaling = layer.scaling(input=pixel, weight=score)
-        slope = layer.slope_intercept(input=pixel)
-        tensor = layer.tensor(a=pixel, b=pixel, size=1000)
-        cos_sim = layer.cos_sim(a=pixel, b=pixel)
-        trans = layer.trans(input=tensor)
-        print layer.parse_network([
-            addto, linear_comb, interpolation, power, scaling, slope, tensor,
-            cos_sim, trans
-        ])
-
-
-class ReshapeLayerTest(unittest.TestCase):
-    def test_reshape_layer(self):
-        block_expand = layer.block_expand(
-            input=conv, num_channels=4, stride_x=1, block_x=1)
-        expand = layer.expand(
-            input=weight,
-            expand_as=pixel,
-            expand_level=layer.ExpandLevel.FROM_NO_SEQUENCE)
-        repeat = layer.repeat(input=pixel, num_repeats=4)
-        reshape = layer.seq_reshape(input=pixel, reshape_size=4)
-        rotate = layer.rotate(input=pixel, height=16, width=49)
-        print layer.parse_network(
-            [block_expand, expand, repeat, reshape, rotate])
-
-
-class RecurrentLayerTest(unittest.TestCase):
-    def test_recurrent_layer(self):
-        word = layer.data(name='word', type=data_type.integer_value(12))
-        recurrent = layer.recurrent(input=word)
-        lstm = layer.lstmemory(input=word)
-        gru = layer.grumemory(input=word)
-        print layer.parse_network([recurrent, lstm, gru])
-
-
-class CostLayerTest(unittest.TestCase):
-    def test_cost_layer(self):
-        cost1 = layer.classification_cost(input=inference, label=label)
-        cost2 = layer.classification_cost(
-            input=inference, label=label, weight=weight)
-        cost3 = layer.cross_entropy_cost(input=inference, label=label)
-        cost4 = layer.cross_entropy_with_selfnorm_cost(
-            input=inference, label=label)
-        cost5 = layer.square_error_cost(input=inference, label=label)
-        cost6 = layer.square_error_cost(
-            input=inference, label=label, weight=weight)
-        cost7 = layer.multi_binary_label_cross_entropy_cost(
-            input=inference, label=label)
-        cost8 = layer.rank_cost(left=score, right=score, label=score)
-        cost9 = layer.lambda_cost(input=inference, score=score)
-        cost10 = layer.sum_cost(input=inference)
-        cost11 = layer.huber_regression_cost(input=score, label=label)
-        cost12 = layer.huber_classification_cost(input=score, label=label)
-
-        print layer.parse_network([cost1, cost2])
-        print layer.parse_network([cost3, cost4])
-        print layer.parse_network([cost5, cost6])
-        print layer.parse_network([cost7, cost8, cost9, cost10, cost11, cost12])
-
-        crf = layer.crf(input=inference, label=label)
-        crf_decoding = layer.crf_decoding(input=inference, size=3)
-        ctc = layer.ctc(input=inference, label=label)
-        warp_ctc = layer.warp_ctc(input=pixel, label=label)
-        nce = layer.nce(input=inference, label=label, num_classes=3)
-        hsigmoid = layer.hsigmoid(input=inference, label=label, num_classes=3)
-
-        print layer.parse_network(
-            [crf, crf_decoding, ctc, warp_ctc, nce, hsigmoid])
-
-
-class OtherLayerTest(unittest.TestCase):
-    def test_sampling_layer(self):
-        maxid = layer.max_id(input=inference)
-        sampling_id = layer.sampling_id(input=inference)
-        eos = layer.eos(input=maxid, eos_id=5)
-        layer.printer(maxid)
-        print layer.parse_network([maxid, sampling_id, eos])
-
-    def test_slicing_joining_layer(self):
-        pad = layer.pad(input=conv, pad_c=[2, 3], pad_h=[1, 2], pad_w=[3, 1])
-        print layer.parse_network(pad)
-
-
-class ProjOpTest(unittest.TestCase):
-    def test_projection(self):
-        input = layer.data(name='data2', type=data_type.dense_vector(784))
-        word = layer.data(
-            name='word2', type=data_type.integer_value_sequence(10000))
-        fc0 = layer.fc(input=input, size=100, act=activation.Sigmoid())
-        fc1 = layer.fc(input=input, size=200, act=activation.Sigmoid())
-        mixed0 = layer.mixed(
-            size=256,
-            input=[
-                layer.full_matrix_projection(input=fc0),
-                layer.full_matrix_projection(input=fc1)
-            ])
-        with layer.mixed(size=200) as mixed1:
-            mixed1 += layer.full_matrix_projection(input=fc0)
-            mixed1 += layer.identity_projection(input=fc1)
-
-        table = layer.table_projection(input=word)
-        emb0 = layer.mixed(size=512, input=table)
-        with layer.mixed(size=512) as emb1:
-            emb1 += table
-
-        scale = layer.scaling_projection(input=fc0)
-        scale0 = layer.mixed(size=100, input=scale)
-        with layer.mixed(size=100) as scale1:
-            scale1 += scale
-
-        dotmul = layer.dotmul_projection(input=fc0)
-        dotmul0 = layer.mixed(size=100, input=dotmul)
-        with layer.mixed(size=100) as dotmul1:
-            dotmul1 += dotmul
-
-        context = layer.context_projection(input=fc0, context_len=5)
-        context0 = layer.mixed(size=500, input=context)
-        with layer.mixed(size=500) as context1:
-            context1 += context
-
-        conv = layer.conv_projection(
-            input=input,
-            filter_size=1,
-            num_channels=1,
-            num_filters=128,
-            stride=1,
-            padding=0)
-        conv0 = layer.mixed(input=conv, bias_attr=True)
-        with layer.mixed(bias_attr=True) as conv1:
-            conv1 += conv
-
-        print layer.parse_network(mixed0)
-        print layer.parse_network(mixed1)
-        print layer.parse_network(emb0)
-        print layer.parse_network(emb1)
-        print layer.parse_network(scale0)
-        print layer.parse_network(scale1)
-        print layer.parse_network(dotmul0)
-        print layer.parse_network(dotmul1)
-        print layer.parse_network(conv0)
-        print layer.parse_network(conv1)
-
-    def test_operator(self):
-        ipt0 = layer.data(name='data1', type=data_type.dense_vector(784))
-        ipt1 = layer.data(name='word1', type=data_type.dense_vector(128))
-        fc0 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid())
-        fc1 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid())
-
-        dotmul_op = layer.dotmul_operator(a=fc0, b=fc1)
-        dotmul0 = layer.mixed(input=dotmul_op)
-        with layer.mixed() as dotmul1:
-            dotmul1 += dotmul_op
-
-        conv = layer.conv_operator(
-            img=ipt0,
-            filter=ipt1,
-            filter_size=1,
-            num_channels=1,
-            num_filters=128,
-            stride=1,
-            padding=0)
-        conv0 = layer.mixed(input=conv)
-        with layer.mixed() as conv1:
-            conv1 += conv
-
-        print layer.parse_network(dotmul0)
-        print layer.parse_network(dotmul1)
-        print layer.parse_network(conv0)
-        print layer.parse_network(conv1)
-
-
-class NetworkTests(unittest.TestCase):
-    def test_vgg(self):
-        img = layer.data(name='pixel1', type=data_type.dense_vector(784))
-        vgg_out = networks.small_vgg(
-            input_image=img, num_channels=1, num_classes=2)
-        print layer.parse_network(vgg_out)
-
-
-class EvaluatorTest(unittest.TestCase):
-    def test_evaluator(self):
-        img = layer.data(name='pixel2', type=data_type.dense_vector(784))
-        output = layer.fc(input=img,
-                          size=10,
-                          act=activation.Softmax(),
-                          name='fc_here')
-        lbl = layer.data(name='label2', type=data_type.integer_value(10))
-        cost = layer.cross_entropy_cost(input=output, label=lbl)
-
-        evaluator.classification_error(input=output, label=lbl)
-        print layer.parse_network(cost)
-        print layer.parse_network(output)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/v2/tests/test_op.py b/python/paddle/v2/tests/test_op.py
deleted file mode 100644
index 15d5aef511..0000000000
--- a/python/paddle/v2/tests/test_op.py
+++ /dev/null
@@ -1,51 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import paddle.v2.data_type as data_type
-import paddle.v2.layer as layer
-import paddle.v2.op as op
-
-
-class OpTest(unittest.TestCase):
-    def test_op(self):
-        x = layer.data(name='data', type=data_type.dense_vector(128))
-        x = op.exp(x)
-        x = op.sqrt(x)
-        x = op.reciprocal(x)
-        x = op.log(x)
-        x = op.abs(x)
-        x = op.sigmoid(x)
-        x = op.tanh(x)
-        x = op.square(x)
-        x = op.relu(x)
-        y = 1 + x
-        y = y + 1
-        y = x + y
-        y = y - x
-        y = y - 2
-        y = 2 - y
-        y = 2 * y
-        y = y * 3
-        z = layer.data(name='data_2', type=data_type.dense_vector(1))
-        y = y * z
-        y = z * y
-        y = y + z
-        y = z + y
-        print layer.parse_network(y)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/v2/tests/test_paramconf_order.py b/python/paddle/v2/tests/test_paramconf_order.py
deleted file mode 100644
index 264442be18..0000000000
--- a/python/paddle/v2/tests/test_paramconf_order.py
+++ /dev/null
@@ -1,99 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Copyright PaddlePaddle contributors. All Rights Reservedd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import unittest
-import math
-import paddle.v2 as paddle
-
-
-def wordemb(inlayer):
-    wordemb = paddle.layer.table_projection(
-        input=inlayer,
-        size=5,
-        param_attr=paddle.attr.Param(
-            name="_proj", initial_std=0.001, learning_rate=1, l2_rate=0))
-    return wordemb
-
-
-def train():
-    word_dict = paddle.dataset.imikolov.build_dict()
-    dict_size = len(word_dict)
-    # Every layer takes integer value of range [0, dict_size)
-    firstword = paddle.layer.data(
-        name="firstw", type=paddle.data_type.integer_value(dict_size))
-    secondword = paddle.layer.data(
-        name="secondw", type=paddle.data_type.integer_value(dict_size))
-    thirdword = paddle.layer.data(
-        name="thirdw", type=paddle.data_type.integer_value(dict_size))
-    fourthword = paddle.layer.data(
-        name="fourthw", type=paddle.data_type.integer_value(dict_size))
-    nextword = paddle.layer.data(
-        name="fifthw", type=paddle.data_type.integer_value(dict_size))
-
-    Efirst = wordemb(firstword)
-    Esecond = wordemb(secondword)
-    Ethird = wordemb(thirdword)
-    Efourth = wordemb(fourthword)
-
-    contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
-    hidden1 = paddle.layer.fc(name="fc1",
-                              input=contextemb,
-                              size=128,
-                              act=paddle.activation.Sigmoid(),
-                              layer_attr=paddle.attr.Extra(drop_rate=0.5),
-                              bias_attr=paddle.attr.Param(learning_rate=2),
-                              param_attr=paddle.attr.Param(
-                                  initial_std=1. / math.sqrt(5 * 8),
-                                  learning_rate=1,
-                                  l2_rate=6e-4))
-    predictword = paddle.layer.fc(input=hidden1,
-                                  size=dict_size,
-                                  bias_attr=paddle.attr.Param(learning_rate=2),
-                                  act=paddle.activation.Softmax())
-
-    return paddle.layer.classification_cost(input=predictword, label=nextword)
-
-
-class TestParamConfOrder(unittest.TestCase):
-    def test_param_conf_order(self):
-        paddle.init()
-        cost = train()
-        parameters = paddle.parameters.create(cost)
-        adagrad = paddle.optimizer.AdaGrad(
-            learning_rate=3e-3,
-            regularization=paddle.optimizer.L2Regularization(rate=8e-4))
-
-        trainer = paddle.trainer.SGD(cost, parameters, adagrad)
-        for p in trainer.get_topology_proto().parameters:
-            if p.name == "_fc1.w0":
-                self.assertEqual(p.decay_rate, 6e-4)
-            else:
-                self.assertEqual(p.decay_rate, 8e-4)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/v2/tests/test_parameters.py b/python/paddle/v2/tests/test_parameters.py
deleted file mode 100644
index 3bfd9348a6..0000000000
--- a/python/paddle/v2/tests/test_parameters.py
+++ /dev/null
@@ -1,143 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-import sys
-
-try:
-    import py_paddle
-
-    del py_paddle
-except ImportError:
-    print >> sys.stderr, "It seems swig of Paddle is not installed, this " \
-                         "unittest will not be run."
-    sys.exit(0)
-
-import paddle.v2.parameters as parameters
-import paddle.v2.data_type as data_type
-import paddle.v2.layer as layer
-from paddle.v2.attr import ParamAttr
-from paddle.proto.ParameterConfig_pb2 import ParameterConfig
-import random
-import cStringIO
-import numpy
-
-
-def __rand_param_config__(name, psize=None):
-    conf = ParameterConfig()
-    conf.name = name
-    size = 1
-    if psize is None:
-        for i in xrange(2):
-            dim = random.randint(1, 1000)
-            conf.dims.append(dim)
-            size *= dim
-    else:
-        size = psize
-    conf.size = size
-    assert conf.IsInitialized()
-    return conf
-
-
-class TestParameters(unittest.TestCase):
-    def test_serialization(self):
-        params = parameters.Parameters()
-        params.__append_config__(__rand_param_config__("param_0"))
-        params.__append_config__(__rand_param_config__("param_1"))
-
-        for name in params.names():
-            param = params.get(name)
-            param[:] = numpy.random.uniform(
-                -1.0, 1.0, size=params.get_shape(name))
-            params.set(name, param)
-
-        tmp_file = cStringIO.StringIO()
-        params.to_tar(tmp_file)
-        tmp_file.seek(0)
-        params_dup = parameters.Parameters.from_tar(tmp_file)
-
-        self.assertEqual(params_dup.names(), params.names())
-
-        for name in params.names():
-            self.assertEqual(params.get_shape(name), params_dup.get_shape(name))
-            p0 = params.get(name)
-            p1 = params_dup.get(name)
-            self.assertTrue(numpy.isclose(p0, p1).all())
-
-    def test_initializer(self):
-        def initializer(name):
-            assert name == "fc.w"
-            mat = numpy.ones((3, 2), dtype=numpy.float32)
-            mat[1, 1] = 2
-            return mat
-
-        x = layer.data(name="x", type=data_type.dense_vector(3))
-        y = layer.fc(x,
-                     size=2,
-                     bias_attr=False,
-                     param_attr=ParamAttr(
-                         name="fc.w", initializer=initializer))
-        params = parameters.create(y)
-        val = params["fc.w"]
-        assert val.shape == (3, 2)
-        expected = numpy.array([[1, 1], [1, 2], [1, 1]], numpy.float32)
-        assert numpy.logical_and.reduce(numpy.reshape(val == expected, 6))
-
-    def test_init_from_tar(self):
-        def get_param(names, size):
-            p = parameters.Parameters()
-            for k, v in zip(names, size):
-                p.__append_config__(__rand_param_config__(k, v))
-            for name in p.names():
-                param = p.get(name)
-                param[:] = numpy.random.uniform(
-                    -1.0, 1.0, size=p.get_shape(name))
-                p.set(name, param)
-            return p
-
-        def get_parames():
-            name1 = ['param_0', 'param_1']
-            size1 = [128, 256]
-            p1 = get_param(name1, size1)
-            file1 = cStringIO.StringIO()
-            p1.to_tar(file1)
-            file1.seek(0)
-
-            name2 = ['param_0', 'param_1', 'param_2']
-            size2 = [128, 256, 288]
-            p2 = get_param(name2, size2)
-            file2 = cStringIO.StringIO()
-            p2.to_tar(file2)
-            file2.seek(0)
-            return p1, file1, p2, file2
-
-        p1, file1, p2, file2 = get_parames()
-        p2.init_from_tar(file1)
-        for name in p1.names():
-            self.assertEqual(p1.get_shape(name), p2.get_shape(name))
-            v1 = p1.get(name)
-            v2 = p2.get(name)
-            self.assertTrue(numpy.isclose(v1, v2).all())
-
-        p1, file1, p2, file2 = get_parames()
-        p1.init_from_tar(file2)
-        for name in p1.names():
-            self.assertEqual(p1.get_shape(name), p2.get_shape(name))
-            v1 = p1.get(name)
-            v2 = p2.get(name)
-            self.assertTrue(numpy.isclose(v1, v2).all())
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/v2/tests/test_rnn_layer.py b/python/paddle/v2/tests/test_rnn_layer.py
deleted file mode 100644
index 6ad07167dc..0000000000
--- a/python/paddle/v2/tests/test_rnn_layer.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import difflib
-import unittest
-
-import paddle.trainer_config_helpers as conf_helps
-import paddle.v2.activation as activation
-import paddle.v2.data_type as data_type
-import paddle.v2.layer as layer
-from paddle.trainer_config_helpers.config_parser_utils import \
-    parse_network_config as parse_network
-from paddle.trainer_config_helpers.config_parser_utils import \
-    reset_parser
-
-
-class RNNTest(unittest.TestCase):
-    def test_simple_rnn(self):
-        dict_dim = 10
-        word_dim = 8
-        hidden_dim = 8
-
-        def parse_old_rnn():
-            reset_parser()
-
-            def step(y):
-                mem = conf_helps.memory(name="rnn_state", size=hidden_dim)
-                out = conf_helps.fc_layer(
-                    input=[y, mem],
-                    size=hidden_dim,
-                    act=activation.Tanh(),
-                    bias_attr=True,
-                    name="rnn_state")
-                return out
-
-            def test():
-                data = conf_helps.data_layer(name="word", size=dict_dim)
-                embd = conf_helps.embedding_layer(input=data, size=word_dim)
-                conf_helps.recurrent_group(
-                    name="rnn", step=step, input=embd, reverse=True)
-
-            return str(parse_network(test))
-
-        def parse_new_rnn():
-            reset_parser()
-
-            def new_step(y):
-                mem = layer.memory(name="rnn_state", size=hidden_dim)
-                out = layer.fc(input=[y, mem],
-                               size=hidden_dim,
-                               act=activation.Tanh(),
-                               bias_attr=True,
-                               name="rnn_state")
-                return out
-
-            data = layer.data(
-                name="word", type=data_type.integer_value(dict_dim))
-            embd = layer.embedding(input=data, size=word_dim)
-            rnn_layer = layer.recurrent_group(
-                name="rnn", step=new_step, input=embd, reverse=True)
-            return str(layer.parse_network(rnn_layer))
-
-        diff = difflib.unified_diff(parse_old_rnn().splitlines(1),
-                                    parse_new_rnn().splitlines(1))
-        print ''.join(diff)
-
-    def test_sequence_rnn_multi_input(self):
-        dict_dim = 10
-        word_dim = 8
-        hidden_dim = 8
-        label_dim = 3
-
-        def parse_old_rnn():
-            reset_parser()
-
-            def test():
-                data = conf_helps.data_layer(name="word", size=dict_dim)
-                label = conf_helps.data_layer(name="label", size=label_dim)
-                emb = conf_helps.embedding_layer(input=data, size=word_dim)
-                boot_layer = conf_helps.data_layer(name="boot", size=10)
-                boot_layer = conf_helps.fc_layer(
-                    name='boot_fc', input=boot_layer, size=10)
-
-                def step(y, wid):
-                    z = conf_helps.embedding_layer(input=wid, size=word_dim)
-                    mem = conf_helps.memory(
-                        name="rnn_state",
-                        size=hidden_dim,
-                        boot_layer=boot_layer)
-                    out = conf_helps.fc_layer(
-                        input=[y, z, mem],
-                        size=hidden_dim,
-                        act=conf_helps.TanhActivation(),
-                        bias_attr=True,
-                        name="rnn_state")
-                    return out
-
-                out = conf_helps.recurrent_group(
-                    name="rnn", step=step, input=[emb, data])
-
-                rep = conf_helps.last_seq(input=out)
-                prob = conf_helps.fc_layer(
-                    size=label_dim,
-                    input=rep,
-                    act=conf_helps.SoftmaxActivation(),
-                    bias_attr=True)
-
-                conf_helps.outputs(
-                    conf_helps.classification_cost(
-                        input=prob, label=label))
-
-            return str(parse_network(test))
-
-        def parse_new_rnn():
-            reset_parser()
-            data = layer.data(
-                name="word", type=data_type.dense_vector(dict_dim))
-            label = layer.data(
-                name="label", type=data_type.dense_vector(label_dim))
-            emb = layer.embedding(input=data, size=word_dim)
-            boot_layer = layer.data(
-                name="boot", type=data_type.dense_vector(10))
-            boot_layer = layer.fc(name='boot_fc', input=boot_layer, size=10)
-
-            def step(y, wid):
-                z = layer.embedding(input=wid, size=word_dim)
-                mem = layer.memory(
-                    name="rnn_state", size=hidden_dim, boot_layer=boot_layer)
-                out = layer.fc(input=[y, z, mem],
-                               size=hidden_dim,
-                               act=activation.Tanh(),
-                               bias_attr=True,
-                               name="rnn_state")
-                return out
-
-            out = layer.recurrent_group(
-                name="rnn", step=step, input=[emb, data])
-
-            rep = layer.last_seq(input=out)
-            prob = layer.fc(size=label_dim,
-                            input=rep,
-                            act=activation.Softmax(),
-                            bias_attr=True)
-
-            cost = layer.classification_cost(input=prob, label=label)
-
-            return str(layer.parse_network(cost))
-
-        diff = difflib.unified_diff(parse_old_rnn().splitlines(1),
-                                    parse_new_rnn().splitlines(1))
-        print ''.join(diff)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/v2/tests/test_topology.py b/python/paddle/v2/tests/test_topology.py
deleted file mode 100644
index bacd28ddb7..0000000000
--- a/python/paddle/v2/tests/test_topology.py
+++ /dev/null
@@ -1,85 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-import paddle.v2.layer as layer
-import paddle.v2.topology as topology
-import paddle.v2.data_type as data_type
-import paddle.trainer_config_helpers as conf_helps
-import paddle.trainer.PyDataProvider2 as pydp2
-
-
-class TestTopology(unittest.TestCase):
-    def test_data_type(self):
-        pixel = layer.data(name='pixel', type=data_type.dense_vector(784))
-        label = layer.data(name='label', type=data_type.integer_value(10))
-        hidden = layer.fc(input=pixel,
-                          size=100,
-                          act=conf_helps.SigmoidActivation())
-        inference = layer.fc(input=hidden,
-                             size=10,
-                             act=conf_helps.SoftmaxActivation())
-        cost = layer.classification_cost(input=inference, label=label)
-        topo = topology.Topology(cost)
-        data_types = topo.data_type()
-        self.assertEqual(len(data_types), 2)
-        pixel_data_type = filter(lambda type: type[0] == "pixel", data_types)
-        self.assertEqual(len(pixel_data_type), 1)
-        pixel_data_type = pixel_data_type[0]
-        self.assertEqual(pixel_data_type[1].type, pydp2.DataType.Dense)
-        self.assertEqual(pixel_data_type[1].dim, 784)
-
-        label_data_type = filter(lambda type: type[0] == "label", data_types)
-        self.assertEqual(len(label_data_type), 1)
-        label_data_type = label_data_type[0]
-        self.assertEqual(label_data_type[1].type, pydp2.DataType.Index)
-        self.assertEqual(label_data_type[1].dim, 10)
-
-    def test_get_layer(self):
-        pixel = layer.data(name='pixel2', type=data_type.dense_vector(784))
-        label = layer.data(name='label2', type=data_type.integer_value(10))
-        hidden = layer.fc(input=pixel,
-                          size=100,
-                          act=conf_helps.SigmoidActivation())
-        inference = layer.fc(input=hidden,
-                             size=10,
-                             act=conf_helps.SoftmaxActivation())
-        cost = layer.classification_cost(input=inference, label=label)
-        topo = topology.Topology(cost)
-        pixel_layer = topo.get_layer("pixel2")
-        label_layer = topo.get_layer("label2")
-        self.assertEqual(pixel_layer, pixel)
-        self.assertEqual(label_layer, label)
-
-    def test_parse(self):
-        pixel = layer.data(name='pixel3', type=data_type.dense_vector(784))
-        label = layer.data(name='label3', type=data_type.integer_value(10))
-        hidden = layer.fc(input=pixel,
-                          size=100,
-                          act=conf_helps.SigmoidActivation())
-        inference = layer.fc(input=hidden,
-                             size=10,
-                             act=conf_helps.SoftmaxActivation())
-        maxid = layer.max_id(input=inference)
-        cost1 = layer.classification_cost(input=inference, label=label)
-        cost2 = layer.cross_entropy_cost(input=inference, label=label)
-
-        topology.Topology(cost2).proto()
-        topology.Topology([cost1]).proto()
-        topology.Topology([cost1, cost2]).proto()
-        topology.Topology([inference, maxid]).proto()
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/v2/topology.py b/python/paddle/v2/topology.py
deleted file mode 100644
index 923ccecb0b..0000000000
--- a/python/paddle/v2/topology.py
+++ /dev/null
@@ -1,145 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import collections
-
-from paddle.proto.ModelConfig_pb2 import ModelConfig
-import paddle.trainer_config_helpers as conf_helps
-import layer as v2_layer
-import config_base
-import cPickle
-from paddle.trainer import config_parser as cp
-
-__all__ = ['Topology']
-
-
-class Topology(object):
-    """
-    Topology is used to store the information about all layers
-    and network configs.
-    """
-
-    def __init__(self, layers, extra_layers=None):
-        def __check__(layers):
-            if not isinstance(layers, collections.Sequence):
-                layers = [layers]
-            for layer in layers:
-                __check_layer_type__(layer)
-            return layers
-
-        layers = __check__(layers)
-        self.layers = layers
-        if extra_layers is not None:
-            extra_layers = __check__(extra_layers)
-
-        self.__model_config__ = v2_layer.parse_network(
-            layers, extra_layers=extra_layers)
-
-        if extra_layers is not None:
-            self.layers.extend(extra_layers)
-
-        assert isinstance(self.__model_config__, ModelConfig)
-
-    def update_from_default(self):
-        # HACK(typhoonzero): update ParameterConfig(proto) in case of
-        # optimizers are defined after layers, or between layers.
-        # Must be called from trainer.__init__()
-        for parameter in self.__model_config__.parameters:
-            if parameter.momentum == 0.0 and cp.g_default_momentum:
-                parameter.momentum = cp.g_default_momentum
-            if parameter.decay_rate == 0.0 and cp.g_default_decay_rate:
-                parameter.decay_rate = cp.g_default_decay_rate
-            if parameter.initial_mean == 0.0:
-                parameter.initial_mean = cp.g_default_initial_mean
-            if parameter.initial_std == 0.01:
-                parameter.initial_std = cp.g_default_initial_std
-            if parameter.initial_strategy == 0:
-                parameter.initial_strategy = cp.g_default_initial_strategy
-            if parameter.initial_smart == False:
-                parameter.initial_smart = cp.g_default_initial_smart
-            if parameter.num_batches_regularization == 1 and \
-                cp.g_default_num_batches_regularization:
-                parameter.num_batches_regularization = \
-                    cp.g_default_num_batches_regularization
-            if parameter.gradient_clipping_threshold == 0.0 and \
-                cp.g_default_gradient_clipping_threshold:
-                parameter.gradient_clipping_threshold = \
-                    cp.g_default_gradient_clipping_threshold
-            if parameter.device == -1 and cp.g_default_device:
-                parameter.device = cp.g_default_device
-            # FIXME(typhoonzero): ignored: update_hooks, g_default_compact_func
-
-    def use_sparse_updater(self):
-        """
-        check if any parameter require to use sparse_update
-        :return:
-        """
-        use_sparse = False
-        for parameter in self.__model_config__.parameters:
-            if parameter.sparse_update or parameter.sparse_remote_update:
-                use_sparse = True
-                break
-        return use_sparse
-
-    def proto(self):
-        return self.__model_config__
-
-    def get_layer(self, name):
-        """
-        get v2.Layer Class instance by layer name
-        :param name:
-        :return:
-        """
-        return v2_layer.get_layer(name)
-
-    def data_layers(self):
-        """
-        get all data layer
-        :return:
-        """
-        data_layers = {}
-        for layer in self.proto().layers:
-            l = v2_layer.get_layer(layer.name)
-            if l and l.layer_type == conf_helps.LayerType.DATA:
-                data_layers[layer.name] = l
-        return data_layers
-
-    def data_type(self):
-        """
-        get data_type from proto, such as:
-        [('image', dense_vector(768)), ('label', integer_value(10))]
-        """
-        data_layers = self.data_layers()
-
-        return [(nm, data_layers[nm].data_type)
-                for nm in self.proto().input_layer_names]
-
-    def get_layer_proto(self, name):
-        for layer in self.__model_config__.layers:
-            if layer.name == name:
-                return layer
-        return None
-
-    def serialize_for_inference(self, stream):
-        protobin = self.proto().SerializeToString()
-        data_type = self.data_type()
-        cPickle.dump({
-            'protobin': protobin,
-            'data_type': data_type
-        }, stream, cPickle.HIGHEST_PROTOCOL)
-
-
-def __check_layer_type__(layer):
-    if not isinstance(layer, config_base.Layer):
-        raise ValueError('layer should have type paddle.v2.config_base.Layer')
diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py
deleted file mode 100644
index 5d98d5b6db..0000000000
--- a/python/paddle/v2/trainer.py
+++ /dev/null
@@ -1,258 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Module Trainer
-"""
-import collections
-from topology import Topology
-from . import event as v2_event
-from . import optimizer as v2_optimizer
-from . import parameters as v2_parameters
-
-__all__ = ['SGD']
-
-
-def default_event_handler(event):
-    """
-    Default event handler. It will print some log and save mode.
-
-    TODO(yuyang18): Complete it!
-    :param event:
-    :return:
-    """
-    pass
-
-
-class SGD(object):
-    """
-    Simple SGD Trainer.
-    SGD Trainer combines data reader, network topolopy and update_equation together
-    to train/test a neural network.
-
-    :param cost: Target cost that neural network should be optimized.
-    :type cost: paddle.v2.config_base.Layer
-    :param parameters: The parameters dictionary.
-    :type parameters: paddle.v2.parameters.Parameters
-    :param update_equation: The optimizer object.
-    :type update_equation: paddle.v2.optimizer.Optimizer
-    :param extra_layers: Some layers in the neural network graph are not
-                         in the path of cost layer.
-    :type extra_layers: paddle.v2.config_base.Layer
-    :param is_local: Whether trainning locally
-    :type is_local: bool
-    :param pserver_spec: comma string for pserver location,
-                         eg:127.10.0.10:3000,127.10.0.11:3000,
-                         and this parameter is only used for fault
-                         tolerant mode cluster training.
-    :type pserver_spec: string
-    :param use_etcd: Whether using etcd pserver.
-    :param use_etcd: bool
-    """
-
-    def __init__(self,
-                 cost,
-                 parameters,
-                 update_equation,
-                 extra_layers=None,
-                 is_local=True,
-                 pserver_spec=None,
-                 use_etcd=True):
-
-        if not isinstance(parameters, v2_parameters.Parameters):
-            raise TypeError('parameters should be parameters')
-
-        if not isinstance(update_equation, v2_optimizer.Optimizer):
-            raise TypeError("update equation parameter must be "
-                            "paddle.v2.optimizer.Optimizer")
-        import py_paddle.swig_paddle as api
-        topology = Topology(cost, extra_layers=extra_layers)
-        # HACK(typhoonzero): update ParameterConfig(proto) in case of optimizers
-        # are defined after layers, or between layers.
-        topology.update_from_default()
-        parameters.update_param_conf(topology.proto())
-
-        self.__optimizer__ = update_equation
-        self.__topology__ = topology
-        self.__parameters__ = parameters
-        self.__topology_in_proto__ = topology.proto()
-        self.__is_local__ = is_local
-        self.__pserver_spec__ = pserver_spec
-        self.__use_etcd__ = use_etcd
-
-        self.__use_sparse_updater__ = self.__topology__.use_sparse_updater()
-        # # In local mode, disable sparse_remote_update.
-        if is_local:
-            for param in self.__topology_in_proto__.parameters:
-                if param.sparse_remote_update:
-                    param.sparse_remote_update = False
-
-        self.__gm_create_mode__ = api.CREATE_MODE_NORMAL if not \
-            self.__use_sparse_updater__ else api.CREATE_MODE_SGD_SPARSE_CPU_TRAINING
-        self.__data_types__ = topology.data_type()
-        gm = api.GradientMachine.createFromConfigProto(
-            self.__topology_in_proto__, self.__gm_create_mode__,
-            self.__optimizer__.enable_types())
-        assert isinstance(gm, api.GradientMachine)
-        self.__gradient_machine__ = gm
-        self.__gradient_machine__.randParameters()
-        self.__parameters__.append_gradient_machine(gm)
-        self.__parameter_updater__ = None
-
-    def get_topology_proto(self):
-        return self.__topology_in_proto__
-
-    def __use_remote_sparse_updater__(self):
-        return self.__use_sparse_updater__ and not self.__is_local__
-
-    def __prepare_parameter__(self, in_args):
-        """
-        prepare parameter before forward backward.
-        1. When use remote sparse updater, parameters should be got
-        from ps according to input arguments.
-        :param in_args: input arguments of this batch.
-        :return:
-        """
-        if self.__use_remote_sparse_updater__():
-            self.__gradient_machine__.prefetch(in_args)
-            self.__parameter_updater__.getParametersRemote()
-
-    def save_parameter_to_tar(self, f):
-        self.__parameter_updater__.catchUpWith()
-        self.__parameter_updater__.apply()
-        self.__parameter_updater__.getParametersRemote(True, True)
-        self.__parameters__.to_tar(f)
-        self.__parameter_updater__.restore()
-
-    def train(self, reader, num_passes=1, event_handler=None, feeding=None):
-        """
-        Training method. Will train num_passes of input data.
-
-        :param reader: A reader that reads and yeilds data items. Usually we use a
-                       batched reader to do mini-batch training.
-        :type reader: collections.Iterable
-        :param num_passes: The total train passes.
-        :param event_handler: Event handler. A method will be invoked when event
-                              occurred.
-        :type event_handler: (BaseEvent) => None
-        :param feeding: Feeding is a map of neural network input name and array
-                        index that reader returns.
-        :type feeding: dict|list
-        :return:
-        """
-        import py_paddle.swig_paddle as api
-        from data_feeder import DataFeeder
-        if event_handler is None:
-            event_handler = default_event_handler
-        __check_train_args__(**locals())
-
-        self.__parameter_updater__ = self.__optimizer__.create_updater(
-            self.__is_local__, num_passes, self.__use_sparse_updater__,
-            self.__pserver_spec__, self.__use_etcd__)
-        self.__parameter_updater__.init(self.__gradient_machine__)
-
-        self.__gradient_machine__.start()
-        batch_evaluator = self.__gradient_machine__.makeEvaluator()
-        assert isinstance(batch_evaluator, api.Evaluator)
-        pass_evaluator = self.__gradient_machine__.makeEvaluator()
-        assert isinstance(pass_evaluator, api.Evaluator)
-        out_args = api.Arguments.createArguments(0)
-        feeder = DataFeeder(self.__data_types__, feeding)
-        for pass_id in xrange(num_passes):
-            event_handler(v2_event.BeginPass(pass_id))
-            pass_evaluator.start()
-            self.__parameter_updater__.startPass()
-            for batch_id, data_batch in enumerate(reader()):
-                batch_evaluator.start()
-                event_handler(
-                    v2_event.BeginIteration(
-                        pass_id=pass_id, batch_id=batch_id))
-                pass_type = self.__parameter_updater__.startBatch(
-                    len(data_batch))
-                in_args = feeder(data_batch)
-                self.__prepare_parameter__(in_args)
-                self.__gradient_machine__.forwardBackward(in_args, out_args,
-                                                          pass_type)
-                self.__gradient_machine__.eval(pass_evaluator)
-                self.__gradient_machine__.eval(batch_evaluator)
-                event_handler(
-                    v2_event.EndForwardBackward(
-                        pass_id=pass_id,
-                        batch_id=batch_id,
-                        gm=self.__gradient_machine__))
-                for each_param in self.__gradient_machine__.getNonStaticParameters(
-                ):
-                    self.__parameter_updater__.update(each_param)
-                cost_sum = out_args.sum()
-                cost = cost_sum / len(data_batch)
-                self.__parameter_updater__.finishBatch(cost)
-                batch_evaluator.finish()
-                event_handler(
-                    v2_event.EndIteration(
-                        pass_id=pass_id,
-                        batch_id=batch_id,
-                        cost=cost,
-                        evaluator=batch_evaluator,
-                        gm=self.__gradient_machine__))
-
-            self.__parameter_updater__.finishPass()
-            pass_evaluator.finish()
-            event_handler(
-                v2_event.EndPass(
-                    pass_id,
-                    evaluator=pass_evaluator,
-                    gm=self.__gradient_machine__))
-        self.__gradient_machine__.finish()
-
-    def test(self, reader, feeding=None):
-        """
-        Testing method. Will test input data.
-
-        :param reader: A batch reader that reads and yeilds data items,
-                       it should be a paddle.v2.batch.
-        :type reader: collections.Iterable
-        :param feeding: Feeding is a map of neural network input name and array
-                        index that reader returns.
-        :type feeding: dict
-        :return:
-        """
-        import py_paddle.swig_paddle as api
-        from data_feeder import DataFeeder
-        feeder = DataFeeder(self.__data_types__, feeding)
-        evaluator = self.__gradient_machine__.makeEvaluator()
-        out_args = api.Arguments.createArguments(0)
-        evaluator.start()
-        total_cost = 0
-        num_samples = 0.0
-        for data_batch in reader():
-            num_samples += len(data_batch)
-            in_args = feeder(data_batch)
-            self.__prepare_parameter__(in_args)
-            self.__gradient_machine__.forward(in_args, out_args, api.PASS_TEST)
-            total_cost += out_args.sum()
-            self.__gradient_machine__.eval(evaluator)
-
-        evaluator.finish()
-        return v2_event.TestResult(
-            evaluator=evaluator, cost=total_cost / num_samples)
-
-
-def __check_train_args__(reader, event_handler, **kwargs):
-    """
-    Check train function's argument types
-    """
-    if not callable(reader) or not isinstance(reader(), collections.Iterator):
-        raise TypeError('train_data_reader should be a function, '
-                        'which can return a iterator')
-    if not callable(event_handler):
-        raise TypeError('event handler should be a function')
-- 
GitLab