diff --git a/demo/quick_start/api_predict.sh b/demo/quick_start/api_predict.sh
index c90d3b70548b3ef2a7e0e423c74cd97f1886c0fc..4d9aa9e8854ed79446a47dbc593f419cdda077b4 100755
--- a/demo/quick_start/api_predict.sh
+++ b/demo/quick_start/api_predict.sh
@@ -17,7 +17,7 @@ set -e
 #Note the default model is pass-00002, you shold make sure the model path
 #exists or change the mode path.
 #only test on trainer_config.lr.py
-model=output/pass-00001/
+model=output/model/pass-00001/
 config=trainer_config.lr.py
 label=data/labels.list
 dict=data/dict.txt
diff --git a/demo/quick_start/cluster/cluster_train.sh b/demo/quick_start/cluster/cluster_train.sh
new file mode 100755
index 0000000000000000000000000000000000000000..aac9b89b14b98ac8e2db7def19e5f06c01682493
--- /dev/null
+++ b/demo/quick_start/cluster/cluster_train.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+set -e
+
+# Should run pserver.sh before run this script.
+bin_dir=$(cd `dirname $0`; pwd)
+home_dir=$(cd "${bin_dir}/.."; pwd)
+source "$bin_dir/env.sh"
+
+model_dir="$bin_dir/output"
+log_file="$bin_dir/train.log"
+
+pushd "$home_dir"
+cfg=trainer_config.lr.py
+paddle train \
+  --config=$cfg \
+  --save_dir=${model_dir} \
+  --trainer_count=4 \
+  --local=0 \
+  --log_period=100 \
+  --num_passes=15 \
+  --use_gpu=false \
+  --show_parameter_stats_period=100 \
+  --test_all_data_in_one_period=1 \
+  --num_gradient_servers=1 \
+  --nics=`get_nics` \
+  --port=7164 \
+  --ports_num=1 \
+  --pservers="127.0.0.1" \
+  --comment="paddle_trainer" \
+  2>&1 | tee "$log_file"
+popd
diff --git a/demo/quick_start/cluster/env.sh b/demo/quick_start/cluster/env.sh
new file mode 100644
index 0000000000000000000000000000000000000000..a404993835d0e479f65c89c5561855293b7b66f0
--- /dev/null
+++ b/demo/quick_start/cluster/env.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+set -e
+
+function get_nics() {
+  machine=`uname -s`
+  local nics=""
+  if [ "$machine" == "Linux" ]; then
+    nics="lo"
+  elif [ "$machine" == "Darwin" ]; then
+    nics="lo0"
+  else
+    nics="unsupport"
+  fi
+  echo $nics
+}
diff --git a/demo/quick_start/cluster/pserver.sh b/demo/quick_start/cluster/pserver.sh
new file mode 100755
index 0000000000000000000000000000000000000000..b187c1d9b9108a607ed310253d54ecc096f0e792
--- /dev/null
+++ b/demo/quick_start/cluster/pserver.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+set -e
+bin_dir=$(cd `dirname $0`; pwd)
+source "$bin_dir/env.sh"
+
+paddle pserver \
+  --nics=`get_nics` \
+  --port=7164 \
+  --ports_num=1 \
+  --ports_num_for_sparse=1 \
+  --num_gradient_servers=1 \
+  --comment="paddle_pserver" \
+  2>&1 | tee 'pserver.log'
diff --git a/paddle/api/PaddleAPI.h b/paddle/api/PaddleAPI.h
index 23f681d9e5334aaadd909ef48b1321f6724132bd..e83718448ddfb9154fa4e87bbdd91e7a19eb5f62 100644
--- a/paddle/api/PaddleAPI.h
+++ b/paddle/api/PaddleAPI.h
@@ -20,15 +20,11 @@ limitations under the License. */
 #include <string>
 #include <vector>
 #include "paddle/utils/GlobalConstants.h"
-#include "paddle/utils/TypeDefs.h"
+#include "paddle/utils/common.h"
 
 /// Import PaddlePaddle's enumeration into global namespace.
 using namespace paddle::enumeration_wrapper;  // NOLINT
 
-#define DISABLE_COPY_AND_ASSIGN(classname) \
-  classname(const classname& other);       \
-  classname& operator=(const classname& other)
-
 /**
  * @brief Initialize paddle.
  *
@@ -102,7 +98,7 @@ const size_t NO_SPARSE_ID = -1UL;
 struct MatrixPrivate;
 class Matrix {
   Matrix();  // User Cannot Create Matrix.
-  DISABLE_COPY_AND_ASSIGN(Matrix);
+  DISABLE_COPY(Matrix);
   static Matrix* createByPaddleMatrixPtr(void* sharedPtr);
 
 public:
@@ -242,7 +238,7 @@ private:
 
 struct VectorPrivate;
 class Vector {
-  DISABLE_COPY_AND_ASSIGN(Vector);
+  DISABLE_COPY(Vector);
   Vector();
   static Vector* createByPaddleVectorPtr(void* ptr);
 
@@ -322,7 +318,7 @@ private:
 struct IVectorPrivate;
 class IVector {
   IVector();
-  DISABLE_COPY_AND_ASSIGN(IVector);
+  DISABLE_COPY(IVector);
   static IVector* createByPaddleVectorPtr(void* ptr);
 
 public:
@@ -402,7 +398,7 @@ struct ArgumentsPrivate;
 class Arguments {
 private:
   Arguments();  // Internal Create.
-  DISABLE_COPY_AND_ASSIGN(Arguments);
+  DISABLE_COPY(Arguments);
 
 public:
   /**
@@ -472,7 +468,7 @@ enum GradientMatchineCreateMode {
 
 struct ParameterConfigPrivate;
 class ParameterConfig {
-  DISABLE_COPY_AND_ASSIGN(ParameterConfig);
+  DISABLE_COPY(ParameterConfig);
   ParameterConfig();
 
   /**
@@ -502,7 +498,7 @@ private:
 
 struct OptimizationConfigPrivate;
 class OptimizationConfig {
-  DISABLE_COPY_AND_ASSIGN(OptimizationConfig);
+  DISABLE_COPY(OptimizationConfig);
   OptimizationConfig();
 
 public:
@@ -527,7 +523,7 @@ struct ParameterPrivate;
 class Parameter {
 private:
   Parameter();
-  DISABLE_COPY_AND_ASSIGN(Parameter);
+  DISABLE_COPY(Parameter);
 
 public:
   virtual ~Parameter();
@@ -572,7 +568,7 @@ struct ModelConfigPrivate;
 class ModelConfig {
 private:
   ModelConfig();
-  DISABLE_COPY_AND_ASSIGN(ModelConfig);
+  DISABLE_COPY(ModelConfig);
 
 public:
   virtual ~ModelConfig();
@@ -593,7 +589,7 @@ struct TrainerConfigPrivate;
 class TrainerConfig {
 private:
   TrainerConfig();
-  DISABLE_COPY_AND_ASSIGN(TrainerConfig);
+  DISABLE_COPY(TrainerConfig);
 
 public:
   virtual ~TrainerConfig();
@@ -633,7 +629,7 @@ public:
 
 struct ParameterTraverseCallbackPrivate;
 class ParameterTraverseCallback {
-  DISABLE_COPY_AND_ASSIGN(ParameterTraverseCallback);
+  DISABLE_COPY(ParameterTraverseCallback);
   ParameterTraverseCallback();
 
 public:
@@ -655,7 +651,7 @@ private:
  */
 struct ParameterOptimizerPrivate;
 class ParameterOptimizer {
-  DISABLE_COPY_AND_ASSIGN(ParameterOptimizer);
+  DISABLE_COPY(ParameterOptimizer);
   ParameterOptimizer();
 
 public:
@@ -692,7 +688,7 @@ struct GradientMachinePrivate;
 class GradientMachine {
 private:
   GradientMachine();
-  DISABLE_COPY_AND_ASSIGN(GradientMachine);
+  DISABLE_COPY(GradientMachine);
 
 public:
   virtual ~GradientMachine();
@@ -908,7 +904,7 @@ private:
   TrainerPrivate* m;
   Trainer();
   Trainer(TrainerConfig* optConfig, GradientMachine* gm);
-  DISABLE_COPY_AND_ASSIGN(Trainer);
+  DISABLE_COPY(Trainer);
 
 public:
   virtual ~Trainer();
@@ -974,7 +970,7 @@ public:
 
 struct SequenceGeneratorPrivate;
 class SequenceGenerator {
-  DISABLE_COPY_AND_ASSIGN(SequenceGenerator);
+  DISABLE_COPY(SequenceGenerator);
   SequenceGenerator();
 
 public:
diff --git a/paddle/api/paddle_ld_flags.py b/paddle/api/paddle_ld_flags.py
index 7c8206e3fe09704debf5268f02128cc59e72af8d..b4d27b1cc728f92b2210f30b69f3f5899fe81d65 100644
--- a/paddle/api/paddle_ld_flags.py
+++ b/paddle/api/paddle_ld_flags.py
@@ -141,9 +141,12 @@ try:
 
         def c_flag(self):
             if self.with_coverage:
-                return ["-fprofile-arcs", "-ftest-coverage", "-O0", "-g"]
+                return [
+                    "-fprofile-arcs", "-ftest-coverage", "-O0", "-g",
+                    "-std=c++11"
+                ]
             else:
-                return None
+                return ["-std=c++11"]
 except ImportError:
 
     class PaddleLDFlag(object):
diff --git a/paddle/cuda/include/hl_base.h b/paddle/cuda/include/hl_base.h
index 84c5f2d5c91feb7896643d2c5f60a279ebe944e7..5b9884b786530aee91312c7547496c94cd7a89cb 100644
--- a/paddle/cuda/include/hl_base.h
+++ b/paddle/cuda/include/hl_base.h
@@ -16,7 +16,31 @@ limitations under the License. */
 #define HL_BASE_H_
 
 #include <cstddef>
-#include "paddle/utils/TypeDefs.h"
+
+#ifdef PADDLE_TYPE_DOUBLE
+#define HL_FLOAT_MAX 3.40282347e+38F
+#define HL_FLOAT_MIN 1.17549435e-38F
+using real = double;
+#else
+#define HL_FLOAT_MAX 1.7976931348623157e+308
+#define HL_FLOAT_MIN 2.2250738585072014e-308
+using real = float;
+#endif
+
+/**
+ * The maximum input value for exp, used to avoid overflow problem.
+ * currently only used for tanh function.
+ */
+#define EXP_MAX_INPUT 40.0
+
+/**
+ * @brief DIVUP(x, y) is similar to ceil(x / y).
+ * @note  For CUDA, DIVUP will be used to specify
+ *        the size of blockDim.
+ */
+#ifndef DIVUP
+#define DIVUP(x, y) (((x) + (y)-1) / (y))
+#endif
 
 /**
  * HPPL is an internal high performance parallel computing library
@@ -181,46 +205,6 @@ typedef struct {
   size_t nnz;
 } _hl_sparse_matrix_s, *hl_sparse_matrix_s;
 
-#ifndef PADDLE_TYPE_DOUBLE
-/**
- * HPPL data type: real (float or double)
- *
- * if real == float
- *
- * HL_FLOAT_MAX: 3.40282347e+38F
- *
- * HL_FLOAT_MIN: 1.17549435e-38F
- */
-#define HL_FLOAT_MAX 3.40282347e+38F
-/**
- * if real == double
- *
- * HL_FLOAT_MAX: 1.7976931348623157e+308
- *
- * HL_FLOAT_MIN: 2.2250738585072014e-308
- */
-#define HL_FLOAT_MIN 1.17549435e-38F
-#else
-#define HL_FLOAT_MAX 1.7976931348623157e+308
-#define HL_FLOAT_MIN 2.2250738585072014e-308
-#endif
-
-/**
- * The maximum input value for exp, used to avoid overflow problem.
- *
- * Currently only used for tanh function.
- */
-#define EXP_MAX_INPUT 40.0
-
-/**
- * @brief DIVUP(x, y) is similar to ceil(x / y).
- * @note  For CUDA, DIVUP will be used to specify
- *        the size of blockDim.
- */
-#ifndef DIVUP
-#define DIVUP(x, y) (((x) + (y)-1) / (y))
-#endif
-
 #ifdef __NVCC__
 
 #include "cuda_runtime.h"
diff --git a/paddle/gserver/dataproviders/DataProvider.h b/paddle/gserver/dataproviders/DataProvider.h
index 9b7f7e36cedaa230ae0694d87cc033bd6fa6e652..5f031fc7c0761a8fe97eb16fe1dd8e0a1debfcdb 100644
--- a/paddle/gserver/dataproviders/DataProvider.h
+++ b/paddle/gserver/dataproviders/DataProvider.h
@@ -34,8 +34,8 @@ limitations under the License. */
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/Queue.h"
 #include "paddle/utils/ThreadLocal.h"
-#include "paddle/utils/TypeDefs.h"
 #include "paddle/utils/Util.h"
+#include "paddle/utils/common.h"
 
 namespace paddle {
 /**
diff --git a/paddle/gserver/layers/GruCompute.h b/paddle/gserver/layers/GruCompute.h
index 42c0019319ac9f20f9c3349fb2429c30f03d682b..a56af21317d1d43c836f7fe599a4dc614804bfec 100644
--- a/paddle/gserver/layers/GruCompute.h
+++ b/paddle/gserver/layers/GruCompute.h
@@ -16,7 +16,7 @@ limitations under the License. */
 
 #include "ModelConfig.pb.h"
 #include "hl_gpu.h"
-#include "paddle/utils/TypeDefs.h"
+#include "paddle/utils/common.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/LstmCompute.h b/paddle/gserver/layers/LstmCompute.h
index 140a4c6ecf5cfaf1045cec3ca2db5d4f2e54aca4..0d65b4158ebdc04f199048bbba98317c89fc8beb 100644
--- a/paddle/gserver/layers/LstmCompute.h
+++ b/paddle/gserver/layers/LstmCompute.h
@@ -16,7 +16,7 @@ limitations under the License. */
 
 #include "ModelConfig.pb.h"
 #include "hl_gpu.h"
-#include "paddle/utils/TypeDefs.h"
+#include "paddle/utils/common.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/MultinomialSampler.h b/paddle/gserver/layers/MultinomialSampler.h
index 677b047029305549084770bdb5eadfeaafbfac8a..b48073c80b6f57cd86ceb80b9d749548c3acc1ac 100644
--- a/paddle/gserver/layers/MultinomialSampler.h
+++ b/paddle/gserver/layers/MultinomialSampler.h
@@ -16,7 +16,7 @@ limitations under the License. */
 
 #include <memory>
 #include <random>
-#include "paddle/utils/TypeDefs.h"
+#include "paddle/utils/common.h"
 
 namespace paddle {
 
diff --git a/paddle/math/BaseMatrix.h b/paddle/math/BaseMatrix.h
index 2933c20fbad930248c41969d88d45cf397b9dcf8..8f9bc9e823eb8062535920361899ce3cc06ec3a7 100644
--- a/paddle/math/BaseMatrix.h
+++ b/paddle/math/BaseMatrix.h
@@ -16,7 +16,7 @@ limitations under the License. */
 #include <stdint.h>
 #include <cstddef>
 #include "TensorExpression.h"
-#include "paddle/utils/TypeDefs.h"
+#include "paddle/utils/common.h"
 
 namespace paddle {
 
diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h
index 25ce09e346694298e4901e52ab1ec6a3a8044263..bda863de38675fe481544a7e82b69f445df361bd 100644
--- a/paddle/math/Matrix.h
+++ b/paddle/math/Matrix.h
@@ -27,7 +27,7 @@ limitations under the License. */
 #include "MemoryHandle.h"
 #include "Vector.h"
 #include "paddle/utils/ThreadLocal.h"
-#include "paddle/utils/TypeDefs.h"
+#include "paddle/utils/common.h"
 
 namespace paddle {
 
diff --git a/paddle/math/TensorExpression.h b/paddle/math/TensorExpression.h
index 9bd789e8c511f33d8415e421281e99eb10fc63fe..f3d60e400380f7d7d645559318837b0d7706661d 100644
--- a/paddle/math/TensorExpression.h
+++ b/paddle/math/TensorExpression.h
@@ -17,7 +17,7 @@ limitations under the License. */
 #include <cstddef>
 #include "hl_tensor_ops.h"
 #include "paddle/utils/Logging.h"
-#include "paddle/utils/TypeDefs.h"
+#include "paddle/utils/common.h"
 
 namespace paddle {
 
diff --git a/paddle/math/Vector.h b/paddle/math/Vector.h
index 8a24103bd4107035c8068c24ec3be6ec06957112..b4347a70f874a2a1bf933bbea4d1b15385f36090 100644
--- a/paddle/math/Vector.h
+++ b/paddle/math/Vector.h
@@ -22,7 +22,7 @@ limitations under the License. */
 #include "BaseMatrix.h"
 #include "MemoryHandle.h"
 #include "paddle/utils/Thread.h"
-#include "paddle/utils/TypeDefs.h"
+#include "paddle/utils/common.h"
 
 namespace paddle {
 
diff --git a/paddle/parameter/ParallelParameter.h b/paddle/parameter/ParallelParameter.h
index 417e386dc74d308a6c0aefa2640f0f37de8dbf1f..1ee220d2dc1a26b3f394ca673975cc827f450206 100644
--- a/paddle/parameter/ParallelParameter.h
+++ b/paddle/parameter/ParallelParameter.h
@@ -28,7 +28,7 @@ limitations under the License. */
 #include "paddle/parameter/ParameterUpdateFunctions.h"
 #include "paddle/utils/Flags.h"
 #include "paddle/utils/Locks.h"
-#include "paddle/utils/TypeDefs.h"
+#include "paddle/utils/common.h"
 
 #include "ParameterConfig.pb.h"
 
diff --git a/paddle/parameter/Parameter.h b/paddle/parameter/Parameter.h
index 532c6770e596c33dfe7fd42f32157b2c6c19e18e..e05137b315f254795de26a5ff0ac977e7968f4d8 100644
--- a/paddle/parameter/Parameter.h
+++ b/paddle/parameter/Parameter.h
@@ -29,8 +29,8 @@ limitations under the License. */
 #include "paddle/utils/GlobalConstants.h"
 #include "paddle/utils/Locks.h"
 #include "paddle/utils/ThreadLocal.h"
-#include "paddle/utils/TypeDefs.h"
 #include "paddle/utils/Util.h"
+#include "paddle/utils/common.h"
 
 namespace paddle {
 
diff --git a/paddle/parameter/ParameterUpdateFunctions.h b/paddle/parameter/ParameterUpdateFunctions.h
index 2d277e47e7eafc118fa37343e93e8a331a260aa9..2cb379871716ffd9e75eede607276b6b3f200e6b 100644
--- a/paddle/parameter/ParameterUpdateFunctions.h
+++ b/paddle/parameter/ParameterUpdateFunctions.h
@@ -15,7 +15,7 @@ limitations under the License. */
 #pragma once
 
 #include "paddle/math/Vector.h"
-#include "paddle/utils/TypeDefs.h"
+#include "paddle/utils/common.h"
 
 namespace paddle {
 
diff --git a/paddle/pserver/BaseClient.h b/paddle/pserver/BaseClient.h
index 262afafbe2d61305a158d945fac2d3b265012cbd..ccf05ae1ca3ab76fbe9d36237969207768de4dd2 100644
--- a/paddle/pserver/BaseClient.h
+++ b/paddle/pserver/BaseClient.h
@@ -18,7 +18,7 @@ limitations under the License. */
 #include "paddle/math/Matrix.h"
 #include "paddle/pserver/ProtoServer.h"
 #include "paddle/utils/Queue.h"
-#include "paddle/utils/TypeDefs.h"
+#include "paddle/utils/common.h"
 
 namespace paddle {
 
diff --git a/paddle/pserver/ParameterClient2.h b/paddle/pserver/ParameterClient2.h
index eed71ccb43b0fec76a74a7f00662c32c97c26ff4..70cfc6d70072f399ef97eef1a0e6111a127cbd9f 100644
--- a/paddle/pserver/ParameterClient2.h
+++ b/paddle/pserver/ParameterClient2.h
@@ -26,8 +26,8 @@ limitations under the License. */
 #include "paddle/utils/Flags.h"
 #include "paddle/utils/Locks.h"
 #include "paddle/utils/Queue.h"
-#include "paddle/utils/TypeDefs.h"
 #include "paddle/utils/Util.h"
+#include "paddle/utils/common.h"
 
 #include "ParameterService.pb.h"
 
diff --git a/paddle/pserver/ParameterServer2.h b/paddle/pserver/ParameterServer2.h
index b0cf22e1fb158e76fcee1ce6ef1f375995803ce6..79d1eb97ff149f4f5ca9a924c1b0b7ba629f1e33 100644
--- a/paddle/pserver/ParameterServer2.h
+++ b/paddle/pserver/ParameterServer2.h
@@ -32,7 +32,7 @@ limitations under the License. */
 #include "paddle/utils/Locks.h"
 #include "paddle/utils/Stat.h"
 #include "paddle/utils/ThreadLocal.h"
-#include "paddle/utils/TypeDefs.h"
+#include "paddle/utils/common.h"
 
 #include "ParameterService.pb.h"
 
diff --git a/paddle/setup.py.in b/paddle/setup.py.in
index b4c38a41b86683f89b6d02e9db97b75e9dca89ea..464ad632868bd1fd4d88547212421302ca0b2116 100644
--- a/paddle/setup.py.in
+++ b/paddle/setup.py.in
@@ -30,8 +30,10 @@ is_lin = (system == 'linux')
 # The extra links will passed from COMAKE
 #   because generate paddle LDFLAGS is too complicated to do in setup.py
 #   it just read COMAKE generated LDFLAGS.
+extra_comps = []
 extra_links = []
 obj = api.paddle_ld_flags.PaddleLDFlag()
+extra_comps = obj.c_flag()
 ldflags = obj.ldflag_str()
 if ldflags is not None:
   extra_links.extend(ldflags.split(" "))
@@ -51,20 +53,15 @@ elif is_osx == True:
 
 include_dirs = [np.get_include(), "../"]    # include numpy and paddle.
 
-extra_c = obj.c_flag()
-
-attr=dict()
-if extra_c is not None:
-  attr["extra_compile_args"] = extra_c
-
 setup(name="py_paddle",
   version="@PADDLE_VERSION@",
   ext_modules=[
     Extension('py_paddle._swig_paddle',      # Build SWIG Extension.
        ['Paddle_wrap.cxx'],
+       language = "c++",
        include_dirs = include_dirs,
        extra_link_args = extra_links,
-       **attr
+       extra_compile_args = extra_comps
     )
   ],
   packages=['py_paddle'],
diff --git a/paddle/trainer/ThreadParameterUpdater.h b/paddle/trainer/ThreadParameterUpdater.h
index 880f1f9ddc49a1193ce23901419d988cae84eb88..bc08a9e9f0eda1cab7776ba76c67e88add1028a9 100644
--- a/paddle/trainer/ThreadParameterUpdater.h
+++ b/paddle/trainer/ThreadParameterUpdater.h
@@ -33,8 +33,8 @@ namespace paddle {
    because at the current moment, the merging on CPU is happening on the
    main thread, and the its parameter size can be much larger than the one GPU.
    Thus, for GPU, the parameter updates happens in updateImpl() function, which
-   is called by gradient machines as a callback function as a callback function
-   supplied to backward() and forwardBackward().
+   is called by gradient machines as a callback function supplied to backward()
+   and forwardBackward().
    For CPU, the parameter updates happens in separate threads maintained by this
    class.
  */
diff --git a/paddle/utils/CpuId.h b/paddle/utils/CpuId.h
index 7a354da75851ed7cca4e85e77714624634951f00..1218e8194c4e837ca880744f92e769a68ba474de 100644
--- a/paddle/utils/CpuId.h
+++ b/paddle/utils/CpuId.h
@@ -11,7 +11,7 @@ limitations under the License. */
 
 #pragma once
 
-#include "DisableCopy.h"
+#include "common.h"
 
 namespace paddle {
 
diff --git a/paddle/utils/DisableCopy.h b/paddle/utils/DisableCopy.h
deleted file mode 100644
index 41de98bbde664651803c8db4c0cd7216b2ff4231..0000000000000000000000000000000000000000
--- a/paddle/utils/DisableCopy.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-/**
- * Disable copy macro.
- */
-#define DISABLE_COPY(CLASS_NAME)                \
-  CLASS_NAME(CLASS_NAME &&) = delete;           \
-  CLASS_NAME(const CLASS_NAME &other) = delete; \
-  CLASS_NAME &operator=(const CLASS_NAME &other) = delete
diff --git a/paddle/utils/Locks.h b/paddle/utils/Locks.h
index 0f922f3548d97eb16ca897564faf1bf083f0d5ac..a21872e89ebc172b87c8b5c3731a89302f34f521 100644
--- a/paddle/utils/Locks.h
+++ b/paddle/utils/Locks.h
@@ -19,7 +19,7 @@ limitations under the License. */
 #include <condition_variable>
 #include <mutex>
 
-#include "DisableCopy.h"
+#include "common.h"
 
 namespace paddle {
 
diff --git a/paddle/utils/Util.h b/paddle/utils/Util.h
index e5a89070f1a953d70a43321cb5417656c907ee9d..dc15ada5862d648af27aa1b0e8c8a5cce012ded8 100644
--- a/paddle/utils/Util.h
+++ b/paddle/utils/Util.h
@@ -26,12 +26,11 @@ limitations under the License. */
 #include <unordered_map>
 #include <vector>
 
-#include "DisableCopy.h"
 #include "Logging.h"
 #include "TrainerConfig.pb.h"
+#include "common.h"
 
 #include "Flags.h"
-#include "TypeDefs.h"
 #include "hl_gpu.h"
 
 /**
diff --git a/paddle/utils/Version.h b/paddle/utils/Version.h
index d1a07d9485076e5382d47f7408fcbf032166b1ed..aa5df3243893145dbcc7e7ef2592555fc1c88fc9 100644
--- a/paddle/utils/Version.h
+++ b/paddle/utils/Version.h
@@ -15,7 +15,7 @@ limitations under the License. */
 #pragma once
 #include <stddef.h>
 #include <iostream>
-#include "TypeDefs.h"
+#include "common.h"
 
 namespace paddle {
 
diff --git a/paddle/utils/TypeDefs.h b/paddle/utils/common.h
similarity index 71%
rename from paddle/utils/TypeDefs.h
rename to paddle/utils/common.h
index c50a05e82daefd1273c896f3603957f4484ecd5d..3ff0b869478832d023956d190ddeb89a69acf58b 100644
--- a/paddle/utils/TypeDefs.h
+++ b/paddle/utils/common.h
@@ -15,12 +15,19 @@ limitations under the License. */
 #pragma once
 
 namespace paddle {
+
+/**
+ * Disable copy macro.
+ */
+#define DISABLE_COPY(class_name)                \
+  class_name(class_name &&) = delete;           \
+  class_name(const class_name &other) = delete; \
+  class_name &operator=(const class_name &other) = delete
+
 #ifdef PADDLE_TYPE_DOUBLE
-typedef double real;
+using real = double;
 #else
-typedef float real;
+using real = float;
 #endif
 
 }  // namespace paddle
-
-using paddle::real;
diff --git a/python/paddle/trainer_config_helpers/__init__.py b/python/paddle/trainer_config_helpers/__init__.py
index ef9859f8313efc09fe59f6eb5df92568e9413a40..13155ebddbb49c502d9d4110704ab09f49825be2 100644
--- a/python/paddle/trainer_config_helpers/__init__.py
+++ b/python/paddle/trainer_config_helpers/__init__.py
@@ -21,6 +21,5 @@ from networks import *
 from optimizers import *
 from attrs import *
 from config_parser_utils import *
-
 # This will enable operator overload for LayerOutput
-import math as layer_math
+import layer_math
diff --git a/python/paddle/trainer_config_helpers/attrs.py b/python/paddle/trainer_config_helpers/attrs.py
index 59bb18bfcab30540bd38ca8d1cb300813d30fee8..bf0208834600fef3bcf1b0496da8f5f77aea44c5 100644
--- a/python/paddle/trainer_config_helpers/attrs.py
+++ b/python/paddle/trainer_config_helpers/attrs.py
@@ -19,34 +19,34 @@ __all__ = [
 
 
 def convert_and_compare(x, Type):
-    """                                                                                                                                                                                                
-    Convert x to be the same type as Type and then convert back to                                                                                                                                      
-    check whether there is a loss of information                                                                                                                                                        
-    :param x: object to be checked                                                                                                                                                                      
-    :param Type: target type to check x over                                                                                                                                                           
-    
+    """
+    Convert x to be the same type as Type and then convert back to
+    check whether there is a loss of information
+    :param x: object to be checked
+    :param Type: target type to check x over
+
     """
     return type(x)(Type(x)) == x
 
 
 def is_compatible_with(x, Type):
-    """                                                                                                                                                                                                
-    Check if x has a type compatible with Type                                                                                                                                                         
-    :param x: object to be checked                                                                                                                                                                     
-    :param Type: target type to check x over                                                                                                                                                           
-    
+    """
+    Check if x has a type compatible with Type
+    :param x: object to be checked
+    :param Type: target type to check x over
+
     """
     if type(x) == Type:
         return True
     try:
         if float == Type or int == Type:
-            # avoid those types that can be converted to float/int but not very                                                                                                                            
-            # meaningful and  could potentially lead to error                                                                                                                                              
-            # i.e., str and bool typed value should not be used for initializing float/int variable                                                                                                        
+            # avoid those types that can be converted to float/int but not very
+            # meaningful and  could potentially lead to error
+            # i.e., str and bool typed value should not be used for initializing float/int variable
             if not isinstance(x, str) and not isinstance(x, bool):
                 return convert_and_compare(x, Type)
         elif bool == Type:
-            # should not use string type to initialize bool variable                                                                                                                                   
+            # should not use string type to initialize bool variable
             if not isinstance(x, str):
                 return convert_and_compare(x, Type)
         else:
@@ -88,6 +88,10 @@ class ParameterAttribute(object):
     :type learning_rate: float or None
     :param momentum: The parameter momentum. None means use global value.
     :type momentum: float or None
+    :param gradient_clipping_threshold: gradient clipping threshold. If gradient
+                                        value larger than some value, will be
+                                        clipped.
+    :type gradient_clipping_threshold: float
     :param sparse_update: Enable sparse update for this parameter. It will
                           enable both local and remote sparse update.
     :type sparse_update: bool
@@ -104,6 +108,7 @@ class ParameterAttribute(object):
                  l2_rate=None,
                  learning_rate=None,
                  momentum=None,
+                 gradient_clipping_threshold=None,
                  sparse_update=False):
         # initialize strategy.
         if is_static:
@@ -152,6 +157,11 @@ class ParameterAttribute(object):
             self.attr['sparse_update'] = True
             self.attr['sparse_remote_update'] = True
 
+        if gradient_clipping_threshold is not None and \
+                is_compatible_with(gradient_clipping_threshold, float):
+            self.attr['gradient_clipping_threshold'] = \
+                gradient_clipping_threshold
+
     def set_default_parameter_name(self, name):
         """
         Set default parameter name. If parameter not set, then will use default
diff --git a/python/paddle/trainer_config_helpers/math.py b/python/paddle/trainer_config_helpers/layer_math.py
similarity index 100%
rename from python/paddle/trainer_config_helpers/math.py
rename to python/paddle/trainer_config_helpers/layer_math.py