diff --git a/Dockerfile b/Dockerfile
index b6f99ca539d077164c71d797a5ccda7b1b5c44ba..39af60966b6cab7d8b9e644f4ea658613f8ba518 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -30,7 +30,8 @@ RUN apt-get update && \
     python-numpy python-matplotlib gcc g++ \
     automake locales clang-format-3.8 swig doxygen cmake  \
     liblapack-dev liblapacke-dev libboost-dev \
-    clang-3.8 llvm-3.8 libclang-3.8-dev && \
+    clang-3.8 llvm-3.8 libclang-3.8-dev \
+    net-tools && \
     apt-get clean -y
 
 # Install Go
diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst
index 7c22b67775274e31a25c995a248e8e7c654d53fe..f273474e6316d3af272acfbb9b5d6c089fe5f4bb 100644
--- a/doc/api/v2/config/layer.rst
+++ b/doc/api/v2/config/layer.rst
@@ -135,7 +135,7 @@ recurrent_group
 ---------------
 ..  autoclass:: paddle.v2.layer.recurrent_group
     :noindex:
-    
+
 lstm_step
 ---------
 ..  autoclass:: paddle.v2.layer.lstm_step
@@ -150,12 +150,12 @@ beam_search
 ------------
 ..  autoclass:: paddle.v2.layer.beam_search
     :noindex:
-    
+
 get_output
 ----------
 ..  autoclass:: paddle.v2.layer.get_output
     :noindex:
-    
+
 Mixed Layer
 ===========
 
@@ -208,7 +208,7 @@ trans_full_matrix_projection
 ----------------------------
 ..  autoclass:: paddle.v2.layer.trans_full_matrix_projection
     :noindex:
-    
+
 Aggregate Layers
 ================
 
@@ -445,10 +445,19 @@ smooth_l1_cost
 ..  autoclass:: paddle.v2.layer.smooth_l1_cost
     :noindex:
 
-Check Layer 
+Check Layer
 ============
 
 eos
 ---
 ..  autoclass:: paddle.v2.layer.eos
     :noindex:
+
+Activation with learnable parameter
+===================================
+
+prelu
+--------
+..  autoclass:: paddle.v2.layer.prelu
+    :noindex:
+
diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp
index 6d9365af2d14673146d9e427138bf6dd5f5b41b6..5beced3bb5a1050078f88dfd4350a2df71d27f35 100644
--- a/paddle/parameter/Argument.cpp
+++ b/paddle/parameter/Argument.cpp
@@ -632,7 +632,7 @@ void Argument::printValueString(std::ostream& stream,
                                 const std::string& prefix) const {
   std::unordered_map<std::string, std::string> out;
   getValueString(&out);
-  for (auto field : {"value", "id", "sequence pos", "sub-sequence pos"}) {
+  for (auto field : {"value", "ids", "sequence pos", "sub-sequence pos"}) {
     auto it = out.find(field);
     if (it != out.end()) {
       stream << prefix << field << ":\n" << it->second;
diff --git a/paddle/pserver/LightNetwork.cpp b/paddle/pserver/LightNetwork.cpp
index 8c8ba0a2e51b85bde0544c6780b07130336a6bdd..922f25734dee0a6db7fbcfcef3d29d2bad5b7858 100644
--- a/paddle/pserver/LightNetwork.cpp
+++ b/paddle/pserver/LightNetwork.cpp
@@ -383,20 +383,23 @@ void SocketClient::TcpClient(const std::string &serverAddr, int serverPort) {
   setOption(sockfd);
 
   /// Now connect to the server
-  int retry_second = 0;
-  int error = 0;
+  int retry_count = 0;
   do {
-    error = connect(sockfd, (sockaddr *)&serv_addr, sizeof(serv_addr));
-    if (error == ECONNREFUSED) {
+    if (connect(sockfd, (sockaddr *)&serv_addr, sizeof(serv_addr)) == 0) {
+      break;
+    }
+
+    if (errno == ECONNREFUSED) {
       LOG(WARNING) << "connection refused by pserver, try again!";
-      if (retry_second++ >= 7) {
+      if (retry_count++ >= 7) {
         LOG(FATAL) << "connection refused by pserver, maybe pserver failed!";
       }
       std::this_thread::sleep_for(std::chrono::seconds(1));
     } else {
-      PCHECK(error >= 0) << "ERROR connecting to " << serverAddr;
+      PCHECK(errno != 0) << "ERROR connecting to " << serverAddr << ":"
+                         << serverPort << "errorno: " << errno;
     }
-  } while (error == ECONNREFUSED);
+  } while (errno == ECONNREFUSED);
 
   channel_.reset(new SocketChannel(sockfd, serverAddr));
   tcpRdma_ = F_TCP;
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index 9066ce05f3366787f83af1a1a15396348004f23b..dd72be8fd510c813d134a53d54ef226e709fca92 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -73,7 +73,6 @@ To use this from paddle_trainer, paddle_trainer should be called with
 --config_args=extension_module_name=[MODULE_NAME]
 
 '''
-
 import copy
 import logging
 import os
@@ -1731,9 +1730,10 @@ class ParameterReluLayer(LayerBase):
     def __init__(self, name, inputs, partial_sum=1, **args):
         super(ParameterReluLayer, self).__init__(
             name, self.layer_type, 0, inputs=inputs, **args)
-        config_assert(len(self.inputs) == 1)
-        config_assert(self.input_layer.size % partial_sum == 0)
         input_layer = self.get_input_layer(0)
+        config_assert(len(self.inputs) == 1, "prelu layer has only one input.")
+        config_assert(input_layer.size % partial_sum == 0,
+                      "a wrong setting for partial_sum")
         self.set_layer_size(input_layer.size)
         self.create_input_parameter(0, input_layer.size / partial_sum)
 
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index 9fc83e81e8402d6d448f8ceeec017c4190dc4a78..1fd62cda508bc53b09067e70671cfa733f0818df 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -31,31 +31,31 @@ except ImportError:
 import copy
 
 __all__ = [
-    "full_matrix_projection",
-    "AggregateLevel",
-    "ExpandLevel",
-    "identity_projection",
-    "dotmul_projection",
-    "dotmul_operator",
-    "repeat_layer",
-    "seq_reshape_layer",
-    "table_projection",
-    "mixed_layer",
-    "data_layer",
-    "embedding_layer",
-    "fc_layer",
-    "grumemory",
-    "pooling_layer",
-    "lstmemory",
-    "last_seq",
-    "first_seq",
-    "cos_sim",
-    "hsigmoid",
-    "conv_projection",
-    "mse_cost",
-    "regression_cost",
+    'full_matrix_projection',
+    'AggregateLevel',
+    'ExpandLevel',
+    'identity_projection',
+    'dotmul_projection',
+    'dotmul_operator',
+    'repeat_layer',
+    'seq_reshape_layer',
+    'table_projection',
+    'mixed_layer',
+    'data_layer',
+    'embedding_layer',
+    'fc_layer',
+    'grumemory',
+    'pooling_layer',
+    'lstmemory',
+    'last_seq',
+    'first_seq',
+    'cos_sim',
+    'hsigmoid',
+    'conv_projection',
+    'mse_cost',
+    'regression_cost',
     'classification_cost',
-    "LayerOutput",
+    'LayerOutput',
     'img_conv_layer',
     'img_pool_layer',
     'batch_norm_layer',
@@ -122,6 +122,7 @@ __all__ = [
     'layer_support',
     'multiplex_layer',
     'row_conv_layer',
+    'prelu_layer',
 ]
 
 
@@ -130,26 +131,26 @@ class LayerType(object):
     Layer type enumerations.
     """
 
-    DATA = "data"
-    MIXED_LAYER = "mixed"
-    LSTMEMORY = "lstmemory"
-    GRUMEMORY = "gated_recurrent"
-    SEQUENCE_LAST_INSTANCE = "seqlastins"
-    SEQUENCE_FIRST_INSTANCE = "seqfirstins"
-    SEQUENCE_RESHAPE = "seqreshape"
-    POOLING_MAX = "max"
+    DATA = 'data'
+    MIXED_LAYER = 'mixed'
+    LSTMEMORY = 'lstmemory'
+    GRUMEMORY = 'gated_recurrent'
+    SEQUENCE_LAST_INSTANCE = 'seqlastins'
+    SEQUENCE_FIRST_INSTANCE = 'seqfirstins'
+    SEQUENCE_RESHAPE = 'seqreshape'
+    POOLING_MAX = 'max'
     POOLING_AVG = 'average'
-    FC_LAYER = "fc"
+    FC_LAYER = 'fc'
     COST = 'cost'
     COSINE_SIM_VEC = 'cos_vm'
     COSINE_SIM = 'cos'
     HSIGMOID = 'hsigmoid'
-    CONV_LAYER = "conv"
-    CONVTRANS_LAYER = "convt"
-    EXCONV_LAYER = "exconv"
-    EXCONVTRANS_LAYER = "exconvt"
-    CUDNNCONV_LAYER = "cudnn_conv"
-    POOL_LAYER = "pool"
+    CONV_LAYER = 'conv'
+    CONVTRANS_LAYER = 'convt'
+    EXCONV_LAYER = 'exconv'
+    EXCONVTRANS_LAYER = 'exconvt'
+    CUDNNCONV_LAYER = 'cudnn_conv'
+    POOL_LAYER = 'pool'
     BATCH_NORM_LAYER = 'batch_norm'
     NORM_LAYER = 'norm'
     SUM_TO_ONE_NORM_LAYER = 'sum_to_one_norm'
@@ -190,25 +191,19 @@ class LayerType(object):
     PAD_LAYER = "pad"
     MULTIPLEX_LAYER = "multiplex"
     ROW_CONV_LAYER = "row_conv"
-
-    PRINT_LAYER = "print"
-    PRIORBOX_LAYER = "priorbox"
-
-    CTC_LAYER = "ctc"
-    WARP_CTC_LAYER = "warp_ctc"
-    CRF_LAYER = "crf"
-    CRF_DECODING_LAYER = "crf_decoding"
     NCE_LAYER = 'nce'
 
-    RANK_COST = "rank-cost"
-    LAMBDA_COST = "lambda_cost"
-    HUBER = "huber"
-    CROSS_ENTROPY = "multi-class-cross-entropy"
-    CROSS_ENTROPY_WITH_SELFNORM = "multi_class_cross_entropy_with_selfnorm"
-    SOFT_BIN_CLASS_CROSS_ENTROPY = "soft_binary_class_cross_entropy"
-    MULTI_BIN_LABEL_CROSS_ENTROPY = "multi_binary_label_cross_entropy"
-    SUM_COST = "sum_cost"
-    SMOOTH_L1 = "smooth_l1"
+    RANK_COST = 'rank-cost'
+    LAMBDA_COST = 'lambda_cost'
+    HUBER = 'huber'
+    CROSS_ENTROPY = 'multi-class-cross-entropy'
+    CROSS_ENTROPY_WITH_SELFNORM = 'multi_class_cross_entropy_with_selfnorm'
+    SOFT_BIN_CLASS_CROSS_ENTROPY = 'soft_binary_class_cross_entropy'
+    MULTI_BIN_LABEL_CROSS_ENTROPY = 'multi_binary_label_cross_entropy'
+    SUM_COST = 'sum_cost'
+    SMOOTH_L1 = 'smooth_l1'
+
+    PRELU = 'prelu'
 
     @staticmethod
     def is_layer_type(type_name):
@@ -3862,7 +3857,6 @@ def classification_cost(input,
                         label,
                         weight=None,
                         name=None,
-                        top_k=None,
                         evaluator=classification_error_evaluator,
                         layer_attr=None):
     """
@@ -3877,8 +3871,6 @@ def classification_cost(input,
     :param weight: The weight affects the cost, namely the scale of cost.
                    It is an optional argument.
     :type weight: LayerOutput
-    :param top_k: number k in top-k error rate
-    :type top_k: int
     :param evaluator: Evaluator method.
     :param layer_attr: layer's extra attribute.
     :type layer_attr: ExtraLayerAttribute
@@ -3906,7 +3898,7 @@ def classification_cost(input,
         assert isinstance(e.for_classification, bool)
         assert e.for_classification
 
-        e(name=e.__name__, input=input, label=label, weight=weight, top_k=top_k)
+        e(name=e.__name__, input=input, label=label, weight=weight)
 
     if not isinstance(evaluator, collections.Sequence):
         evaluator = [evaluator]
@@ -4727,7 +4719,7 @@ def ctc_layer(input,
         fc_layer with softmax activation, should be num_classes + 1. The size of ctc_layer
         should also be num_classes + 1.
 
-    The simple usage:
+    The example usage is:
 
     .. code-block:: python
 
@@ -4814,7 +4806,7 @@ def warp_ctc_layer(input,
         - As a native 'softmax' activation is interated to the warp-ctc library,
           'linear' activation is expected instead in the 'input' layer.
 
-    The simple usage:
+    The example usage is:
 
     .. code-block:: python
 
@@ -4875,7 +4867,7 @@ def crf_layer(input,
     A layer for calculating the cost of sequential conditional random
     field model.
 
-    The simple usage:
+    The example usage is:
 
     .. code-block:: python
 
@@ -4949,7 +4941,7 @@ def crf_decoding_layer(input,
     this layer will also calculate error. output.value[i] is 1 for incorrect
     decoding or 0 for correct decoding.
 
-    The simple usage:
+    The example usage is:
 
     .. code-block:: python
 
@@ -5142,7 +5134,7 @@ def rank_cost(left,
       - :math:`o_i` and :math:`o_j`: the left output and right output.
         Their dimension is one.
 
-    The simple usage:
+    The example usage is:
 
     .. code-block:: python
 
@@ -5199,7 +5191,7 @@ def lambda_cost(input,
     """
     lambdaCost for lambdaRank LTR approach.
 
-    The simple usage:
+    The example usage is:
 
     .. code-block:: python
 
@@ -5257,6 +5249,8 @@ def cross_entropy(input,
     """
     A loss layer for multi class entropy.
 
+    The example usage is:
+
     .. code-block:: python
 
        cost = cross_entropy(input=input_layer,
@@ -5303,6 +5297,8 @@ def cross_entropy_with_selfnorm(input,
     A loss layer for multi class entropy with selfnorm.
     Input should be a vector of positive numbers, without normalization.
 
+    The example usage is:
+
     .. code-block:: python
 
        cost = cross_entropy_with_selfnorm(input=input_layer,
@@ -5344,6 +5340,8 @@ def sum_cost(input, name=None, layer_attr=None):
     """
     A loss layer which calculate the sum of the input as loss
 
+    The example usage is:
+
     .. code-block:: python
 
        cost = sum_cost(input=input_layer)
@@ -5373,6 +5371,8 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None):
     """
     A loss layer for huber loss.
 
+    The example usage is:
+
     .. code-block:: python
 
        cost = huber_cost(input=input_layer,
@@ -5413,6 +5413,8 @@ def multi_binary_label_cross_entropy(input,
     """
     A loss layer for multi binary label cross entropy.
 
+    The example usage is:
+
     .. code-block:: python
 
        cost = multi_binary_label_cross_entropy(input=input_layer,
@@ -5472,6 +5474,8 @@ def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None):
     More details can be found by referring to `Fast R-CNN
     <https://arxiv.org/pdf/1504.08083v2.pdf>`_
 
+    The example usage is:
+
     .. code-block:: python
 
        cost = smooth_l1_cost(input=input_layer,
@@ -5521,6 +5525,8 @@ def multiplex_layer(input, name=None, layer_attr=None):
     where, y is output. :math:`x_{k}` is the k-th input layer and
     :math:`k = x_{0}[i] + 1`.
 
+    The example usage is:
+
     .. code-block:: python
 
        maxid = multiplex_layer(input=layers)
@@ -5627,3 +5633,63 @@ def row_conv_layer(input,
         **ExtraLayerAttribute.to_kwargs(layer_attr))
     return LayerOutput(
         name, LayerType.ROW_CONV_LAYER, input, activation=act, size=input.size)
+
+
+@layer_support()
+@wrap_name_default()
+@wrap_param_attr_default()
+def prelu_layer(input,
+                name=None,
+                partial_sum=1,
+                param_attr=None,
+                layer_attr=None):
+    """
+    The Parameter Relu activation that actives outputs with a learnable weight.
+
+    Reference:
+        Delving Deep into Rectifiers: Surpassing Human-Level Performance on
+        ImageNet Classification http://arxiv.org/pdf/1502.01852v1.pdf
+
+    .. math::
+       z_i &\\quad if \\quad z_i > 0 \\\\
+       a_i * z_i  &\\quad \\mathrm{otherwise}
+
+    The example usage is:
+
+    .. code-block:: python
+
+       prelu = prelu_layer(input=layers, partial_sum=1)
+
+    :param name: Name of this layer.
+    :type name: basestring
+    :param input: The input layer.
+    :type input: LayerOutput
+    :param partial_sum: this parameter makes a group of inputs share a same weight.
+
+        - partial_sum = 1, indicates the element-wise activation: each element has a weight.
+        - partial_sum = number of elements in one channel, indicates the channel-wise activation, elements in a channel share a same weight.
+        - partial_sum = number of outputs, indicates all elements share a same weight.
+
+    :type partial_sum: int
+    :param param_attr: The parameter attribute. See ParameterAttribute for details.
+    :type param_attr: ParameterAttribute|None
+    :param layer_attr: Extra layer configurations. Default is None.
+    :type layer_attr: ExtraLayerAttribute|None
+    :return: LayerOutput object.
+    :rtype: LayerOutput
+    """
+
+    assert isinstance(input, LayerOutput), 'prelu_layer only accepts one input'
+    assert isinstance(param_attr, ParameterAttribute)
+
+    l = Layer(
+        name=name,
+        type=LayerType.PRELU,
+        inputs=Input(input.name, **param_attr.attr),
+        partial_sum=partial_sum,
+        **ExtraLayerAttribute.to_kwargs(layer_attr))
+    return LayerOutput(
+        name=name,
+        layer_type=LayerType.PRELU,
+        parents=input,
+        size=l.config.size)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
index db3d3c655056d5b5c74122988d6566500a7ae043..c24102255f5bbed0f551b2dbfec20be7daf5f5b4 100755
--- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
@@ -5,6 +5,7 @@ last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
 img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers
 test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight
 test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
-test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_row_conv)
+test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
+test_prelu_layer test_row_conv)
 
 export whole_configs=(test_split_datasource)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..64d227565f2b21ff43d4391c682ca90c0f47908e
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr
@@ -0,0 +1,36 @@
+type: "nn"
+layers {
+  name: "input"
+  type: "data"
+  size: 300
+  active_type: ""
+}
+layers {
+  name: "__prelu_layer_0__"
+  type: "prelu"
+  size: 300
+  active_type: ""
+  inputs {
+    input_layer_name: "input"
+    input_parameter_name: "___prelu_layer_0__.w0"
+  }
+}
+parameters {
+  name: "___prelu_layer_0__.w0"
+  size: 300
+  initial_mean: 0.0
+  initial_std: 0.057735026919
+  initial_strategy: 0
+  initial_smart: true
+}
+input_layer_names: "input"
+output_layer_names: "__prelu_layer_0__"
+sub_models {
+  name: "root"
+  layer_names: "input"
+  layer_names: "__prelu_layer_0__"
+  input_layer_names: "input"
+  output_layer_names: "__prelu_layer_0__"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e3057f323db22ffc3911cce30ec2e8bb95e3dbe
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py
@@ -0,0 +1,6 @@
+from paddle.trainer_config_helpers import *
+
+data = data_layer(name='input', size=300)
+prelu = prelu_layer(input=data)
+
+outputs(prelu)
diff --git a/python/paddle/utils/image_multiproc.py b/python/paddle/utils/image_multiproc.py
index 6ce32f7811d6be6864a567cf41bf408f422409a7..e8db525ff5c388aef1a39d8db56633d509cb4fb9 100644
--- a/python/paddle/utils/image_multiproc.py
+++ b/python/paddle/utils/image_multiproc.py
@@ -12,7 +12,7 @@ from paddle.trainer.config_parser import logger
 try:
     import cv2
 except ImportError:
-    logger.warning("OpenCV2 is not installed, using PIL to prcoess")
+    logger.warning("OpenCV2 is not installed, using PIL to process")
     cv2 = None
 
 __all__ = ["CvTransformer", "PILTransformer", "MultiProcessImageTransformer"]
diff --git a/python/setup.py.in b/python/setup.py.in
index d1c38823080fb3a5c879d8b59cb5371c07902e57..93724f918801ea706517a1df158ceb78a1c2335c 100644
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -11,17 +11,19 @@ packages=['paddle',
           'paddle.v2.reader',
           'paddle.v2.plot']
 
+setup_requires=["requests",
+                "numpy",
+                "protobuf==3.1",
+                "matplotlib",
+                "rarfile"]
+
+if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']:
+    setup_requires+=["opencv-python"]
+
 setup(name='paddle',
       version='${PADDLE_VERSION}',
       description='Parallel Distributed Deep Learning',
-      install_requires=[
-          "requests",
-          "numpy",
-          "protobuf==${PROTOBUF_VERSION}",
-          "matplotlib",
-          "opencv-python",
-          "rarfile"
-      ],
+      install_requires=setup_requires,
       packages=packages,
       package_dir={
           '': '${CMAKE_CURRENT_SOURCE_DIR}'