diff --git a/cmake/external/glog.cmake b/cmake/external/glog.cmake
index 71e20c85276b014c2e33735c3199c3772526c6c7..ab105611c812a4f4b642ac5b1213fdfe93fab97d 100644
--- a/cmake/external/glog.cmake
+++ b/cmake/external/glog.cmake
@@ -1,11 +1,11 @@
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-# 
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
-# 
+#
 # http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -29,12 +29,14 @@ INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR})
 ExternalProject_Add(
     glog
     ${EXTERNAL_PROJECT_LOG_ARGS}
+    DEPENDS gflags
     GIT_REPOSITORY  "https://github.com/google/glog.git"
     PREFIX          ${GLOG_SOURCES_DIR}
     UPDATE_COMMAND  ""
     CMAKE_ARGS      -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR}
     CMAKE_ARGS      -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-    CMAKE_ARGS      -DWITH_GFLAGS=OFF
+    CMAKE_ARGS      -DWITH_GFLAGS=ON
+    CMAKE_ARGS      -Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags
     CMAKE_ARGS      -DBUILD_TESTING=OFF
 )
 
diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake
index 613614c0e3d42fac4147f78edbc1bd6d62847419..84f459033f06f89d3b150317793c7e62274468b2 100644
--- a/cmake/external/protobuf.cmake
+++ b/cmake/external/protobuf.cmake
@@ -29,17 +29,12 @@ IF(WIN32)
         "${PROTOBUF_INSTALL_DIR}/lib/libprotoc.lib" CACHE FILEPATH "protoc library." FORCE)
   SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc.exe" CACHE FILEPATH "protobuf executable." FORCE)
 ELSE(WIN32)
-  IF(${HOST_SYSTEM} STREQUAL "centos")
-    SET(LIB "lib64")
-  ELSE()
-    SET(LIB "lib")
-  ENDIF()
   SET(PROTOBUF_LITE_LIBRARY
-        "${PROTOBUF_INSTALL_DIR}/${LIB}/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE)
+        "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE)
   SET(PROTOBUF_LIBRARY
-        "${PROTOBUF_INSTALL_DIR}/${LIB}/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE)
+        "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE)
   SET(PROTOBUF_PROTOC_LIBRARY
-        "${PROTOBUF_INSTALL_DIR}/${LIB}/libprotoc.a" CACHE FILEPATH "protoc library." FORCE)
+        "${PROTOBUF_INSTALL_DIR}/lib/libprotoc.a" CACHE FILEPATH "protoc library." FORCE)
   SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc" CACHE FILEPATH "protobuf executable." FORCE)
 ENDIF(WIN32)
 
@@ -58,6 +53,7 @@ ExternalProject_Add(
     -DCMAKE_POSITION_INDEPENDENT_CODE=ON
     -DCMAKE_BUILD_TYPE=Release
     -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
+    -DCMAKE_INSTALL_LIBDIR=lib
 )
 
 LIST(APPEND external_project_dependencies protobuf)
diff --git a/cmake/external/python.cmake b/cmake/external/python.cmake
index 209e679f2cb2178423bf20dec73a0bccef199fcb..6372a9a768e580f74f837ccb6c57d4f4395eb779 100644
--- a/cmake/external/python.cmake
+++ b/cmake/external/python.cmake
@@ -26,10 +26,10 @@ IF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
     find_python_module(wheel REQUIRED)
     find_python_module(google.protobuf REQUIRED)
     FIND_PACKAGE(NumPy REQUIRED)
-    IF(${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0")
+    IF(${PY_GOOGLE.PROTOBUF_VERSION} AND ${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0")
         MESSAGE(FATAL_ERROR "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.0.0, "
-        "please use pip to upgrade protobuf.")
-    ENDIF(${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0")
+        "please use pip to upgrade protobuf. pip install -U protobuf")
+    ENDIF()
 ELSE(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
     MESSAGE(FATAL_ERROR "Please install python 2.7 before building PaddlePaddle.")
     ##################################### PYTHON ########################################
diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake
index f5e4b3e1eb39acbe8dbcd0023956ca7e52c1ecd8..172c318b35d611d0432b78f2a18eb58a7d272b90 100644
--- a/cmake/external/warpctc.cmake
+++ b/cmake/external/warpctc.cmake
@@ -54,6 +54,7 @@ ExternalProject_Add(
     CMAKE_ARGS      -DWITH_GPU=${WITH_GPU}
     CMAKE_ARGS      -DWITH_OMP=${USE_OMP}
     CMAKE_ARGS      -DWITH_TORCH=OFF
+    CMAKE_ARGS      -DCMAKE_DISABLE_FIND_PACKAGE_Torch=TRUE
     CMAKE_ARGS      -DBUILD_SHARED=ON
 )
 
diff --git a/cmake/system.cmake b/cmake/system.cmake
index ab124a89dcdc1cc5b865f6a15c1693b2f72ea39a..3e472da7e0bd9c433f92f3e8b52970cd2cc6dcba 100644
--- a/cmake/system.cmake
+++ b/cmake/system.cmake
@@ -12,6 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Detects the OS and sets appropriate variables.
+# CMAKE_SYSTEM_NAME only give us a coarse-grained name,
+# but the name like centos is necessary in some scenes
+# to distinguish system for customization.
+#
+# for instance, protobuf libs path is <install_dir>/lib64
+# on CentOS, but <install_dir>/lib on other systems.
+
 IF(WIN32)
     SET(HOST_SYSTEM "win32")
 ELSE(WIN32)
@@ -30,6 +38,10 @@ ELSE(WIN32)
                 SET(HOST_SYSTEM "debian")
             ELSEIF(LINUX_ISSUE MATCHES "Ubuntu")
                 SET(HOST_SYSTEM "ubuntu")
+            ELSEIF(LINUX_ISSUE MATCHES "Red Hat")
+                SET(HOST_SYSTEM "redhat")
+            ELSEIF(LINUX_ISSUE MATCHES "Fedora")
+                SET(HOST_SYSTEM "fedora")
             ENDIF()
         ENDIF(EXISTS "/etc/issue")
 
@@ -40,6 +52,10 @@ ELSE(WIN32)
             ENDIF()
         ENDIF(EXISTS "/etc/redhat-release")
 
+        IF(NOT HOST_SYSTEM)
+            SET(HOST_SYSTEM ${CMAKE_SYSTEM_NAME})
+        ENDIF()
+
     ENDIF(APPLE)
 ENDIF(WIN32)
 
diff --git a/paddle/gserver/activations/ActivationFunction.cpp b/paddle/gserver/activations/ActivationFunction.cpp
index f8c4bcac2f8eb41400659dc24ba81768e7ae3640..c541b72e104bf2b81e2ac222d4af13ea2f90d289 100644
--- a/paddle/gserver/activations/ActivationFunction.cpp
+++ b/paddle/gserver/activations/ActivationFunction.cpp
@@ -69,8 +69,14 @@ static ClassRegistrar<ActivationFunction> gActivationRegistrar;
 class IdentityActivation : public ActivationFunction {
 public:
   static const std::string name;
-  void forward(Argument& act) { (void)act; }
-  void backward(Argument& act) { (void)act; }
+  Error __must_check forward(Argument& act) {
+    (void)act;
+    return Error();
+  }
+  Error __must_check backward(Argument& act) {
+    (void)act;
+    return Error();
+  }
   const std::string& getName() const { return name; }
 };
 const std::string IdentityActivation::name = "";
@@ -86,8 +92,14 @@ static InitFunction __reg_activation__identity([] {
  * \f]
  */
 BEGIN_DEFINE_ACTIVATION(sigmoid)
-void forward(Argument& act) { act.value->sigmoid(*act.value); }
-void backward(Argument& act) { act.grad->sigmoidDerivative(*act.value); }
+Error __must_check forward(Argument& act) {
+  act.value->sigmoid(*act.value);
+  return Error();
+}
+Error __must_check backward(Argument& act) {
+  act.grad->sigmoidDerivative(*act.value);
+  return Error();
+}
 END_DEFINE_ACTIVATION(sigmoid)
 
 /**
@@ -103,9 +115,12 @@ MatrixPtr sftMaxDot_;
 MatrixPtr one_;
 
 public:
-void forward(Argument& act) { act.value->softmax(*act.value); }
+Error __must_check forward(Argument& act) {
+  act.value->softmax(*act.value);
+  return Error();
+}
 
-void backward(Argument& act) {
+Error __must_check backward(Argument& act) {
   MatrixPtr outputV = act.value;
   MatrixPtr outputG = act.grad;
 
@@ -137,6 +152,7 @@ void backward(Argument& act) {
 
     act.grad->softmaxDerivative(*act.value, *sftMaxSum_);
   }
+  return Error();
 }
 END_DEFINE_ACTIVATION(softmax)
 
@@ -151,8 +167,11 @@ ACTIVATION_CLASS_NAME(softmax) softmax_;
 Argument argument_;
 
 public:
-void forward(Argument& act) {
-  CHECK_EQ(act.value->getWidth(), 1UL);
+Error __must_check forward(Argument& act) {
+  if (act.value->getWidth() != 1UL) {
+    return Error(
+        "Input width for each timestep of sequence softmax should be 1");
+  }
 
   if (!argument_.value) {
     argument_.value = Matrix::create(nullptr,
@@ -169,10 +188,14 @@ void forward(Argument& act) {
 
   auto starts = act.sequenceStartPositions->getVector(useGpu(act.deviceId));
   act.value->sequenceSoftmax(*act.value, *starts);
+  return Error();
 }
 
-void backward(Argument& act) {
-  CHECK_EQ(act.grad->getWidth(), 1UL);
+Error __must_check backward(Argument& act) {
+  if (act.value->getWidth() != 1UL) {
+    return Error(
+        "Input width for each timestep of sequence softmax should be 1");
+  }
 
   size_t numSequences = act.getNumSequences();
   const int* starts = act.sequenceStartPositions->getData(false);
@@ -184,8 +207,10 @@ void backward(Argument& act) {
     argument_.value->setData(act.value->getData() + offset, 1UL, size);
     argument_.grad->setData(act.grad->getData() + offset, 1UL, size);
 
-    softmax_.backward(argument_);
+    Error status = softmax_.backward(argument_);
+    if (!status) return status;
   }
+  return Error();
 }
 END_DEFINE_ACTIVATION(sequence_softmax)
 
@@ -200,9 +225,15 @@ END_DEFINE_ACTIVATION(sequence_softmax)
  *    0 otherwise.
  */
 BEGIN_DEFINE_ACTIVATION(relu)
-void forward(Argument& act) { act.value->relu(*act.value); }
+Error __must_check forward(Argument& act) {
+  act.value->relu(*act.value);
+  return Error();
+}
 
-void backward(Argument& act) { act.grad->reluDerivative(*act.value); }
+Error __must_check backward(Argument& act) {
+  act.grad->reluDerivative(*act.value);
+  return Error();
+}
 END_DEFINE_ACTIVATION(relu)
 
 /**
@@ -219,9 +250,15 @@ END_DEFINE_ACTIVATION(relu)
  * TODO(yuyang18): Remove magic number 24 or make it configuable.
  */
 BEGIN_DEFINE_ACTIVATION(brelu)
-void forward(Argument& act) { act.value->brelu(*act.value); }
+Error __must_check forward(Argument& act) {
+  act.value->brelu(*act.value);
+  return Error();
+}
 
-void backward(Argument& act) { act.grad->breluDerivative(*act.value); }
+Error __must_check backward(Argument& act) {
+  act.grad->breluDerivative(*act.value);
+  return Error();
+}
 END_DEFINE_ACTIVATION(brelu)
 
 /**
@@ -231,9 +268,15 @@ END_DEFINE_ACTIVATION(brelu)
  * \f]
  */
 BEGIN_DEFINE_ACTIVATION(tanh)
-void forward(Argument& act) { act.value->tanh(*act.value); }
+Error __must_check forward(Argument& act) {
+  act.value->tanh(*act.value);
+  return Error();
+}
 
-void backward(Argument& act) { act.grad->tanhDerivative(*act.value); }
+Error __must_check backward(Argument& act) {
+  act.grad->tanhDerivative(*act.value);
+  return Error();
+}
 END_DEFINE_ACTIVATION(tanh)
 
 /**
@@ -248,10 +291,14 @@ real a, b;
 
 public:
 ACTIVATION_CLASS_NAME(stanh)() : a(1.7159), b(2. / 3.) {}
-void forward(Argument& act) { act.value->scaledTanh(*act.value, a, b); }
+Error __must_check forward(Argument& act) {
+  act.value->scaledTanh(*act.value, a, b);
+  return Error();
+}
 
-void backward(Argument& act) {
+Error __must_check backward(Argument& act) {
   act.grad->scaledTanhDerivative(*act.value, a, b);
+  return Error();
 }
 END_DEFINE_ACTIVATION(stanh)
 
@@ -262,9 +309,15 @@ END_DEFINE_ACTIVATION(stanh)
  * \f]
  */
 BEGIN_DEFINE_ACTIVATION(softrelu)
-void forward(Argument& act) { act.value->softrelu(*act.value); }
+Error __must_check forward(Argument& act) {
+  act.value->softrelu(*act.value);
+  return Error();
+}
 
-void backward(Argument& act) { act.grad->softreluDerivative(*act.value); }
+Error __must_check backward(Argument& act) {
+  act.grad->softreluDerivative(*act.value);
+  return Error();
+}
 END_DEFINE_ACTIVATION(softrelu)
 
 /**
@@ -280,7 +333,7 @@ END_DEFINE_ACTIVATION(softrelu)
  *     0   if z=0
  */
 BEGIN_DEFINE_ACTIVATION(abs)
-void forward(Argument& act) {
+Error __must_check forward(Argument& act) {
   SetDevice device(act.deviceId);
   Matrix::resizeOrCreate(act.in,
                          act.value->getHeight(),
@@ -290,9 +343,13 @@ void forward(Argument& act) {
 
   act.in->copyFrom(*act.value);
   act.value->abs2(*act.value);
+  return Error();
 }
 
-void backward(Argument& act) { act.grad->absDerivative(*act.in); }
+Error __must_check backward(Argument& act) {
+  act.grad->absDerivative(*act.in);
+  return Error();
+}
 END_DEFINE_ACTIVATION(abs)
 
 /**
@@ -302,7 +359,7 @@ END_DEFINE_ACTIVATION(abs)
  * \f]
  */
 BEGIN_DEFINE_ACTIVATION(square)
-void forward(Argument& act) {
+Error __must_check forward(Argument& act) {
   SetDevice device(act.deviceId);
   Matrix::resizeOrCreate(act.in,
                          act.value->getHeight(),
@@ -312,9 +369,13 @@ void forward(Argument& act) {
 
   act.in->copyFrom(*act.value);
   act.value->square2(*act.value);
+  return Error();
 }
 
-void backward(Argument& act) { act.grad->squareDerivative(*act.in); }
+Error __must_check backward(Argument& act) {
+  act.grad->squareDerivative(*act.in);
+  return Error();
+}
 END_DEFINE_ACTIVATION(square)
 
 /**
@@ -324,9 +385,15 @@ END_DEFINE_ACTIVATION(square)
  * \f]
  */
 BEGIN_DEFINE_ACTIVATION(exponential)
-void forward(Argument& act) { act.value->exp2(*act.value); }
+Error __must_check forward(Argument& act) {
+  act.value->exp2(*act.value);
+  return Error();
+}
 
-void backward(Argument& act) { act.grad->expDerivative(*act.value); }
+Error __must_check backward(Argument& act) {
+  act.grad->expDerivative(*act.value);
+  return Error();
+}
 END_DEFINE_ACTIVATION(exponential)
 
 /**
@@ -336,7 +403,7 @@ END_DEFINE_ACTIVATION(exponential)
  * \f]
  */
 BEGIN_DEFINE_ACTIVATION(log)
-void forward(Argument& act) {
+Error __must_check forward(Argument& act) {
   SetDevice device(act.deviceId);
   Matrix::resizeOrCreate(act.in,
                          act.value->getHeight(),
@@ -346,9 +413,13 @@ void forward(Argument& act) {
 
   act.in->copyFrom(*act.value);
   act.value->log2(*act.value);
+  return Error();
 }
 
-void backward(Argument& act) { act.grad->dotDiv(*act.grad, *act.in); }
+Error __must_check backward(Argument& act) {
+  act.grad->dotDiv(*act.grad, *act.in);
+  return Error();
+}
 END_DEFINE_ACTIVATION(log)
 
 ActivationFunction* ActivationFunction::create(const std::string& type) {
diff --git a/paddle/gserver/activations/ActivationFunction.h b/paddle/gserver/activations/ActivationFunction.h
index 601e3b6c0cd401ec007e8cf51e44416f82832e58..f208224e304a79125679c6f3a5c0be09552465ef 100644
--- a/paddle/gserver/activations/ActivationFunction.h
+++ b/paddle/gserver/activations/ActivationFunction.h
@@ -15,6 +15,7 @@ limitations under the License. */
 #pragma once
 #include <string>
 #include <vector>
+#include "paddle/utils/Error.h"
 
 namespace paddle {
 
@@ -48,7 +49,7 @@ public:
    *
    * Usually, act is Layer::output_
    */
-  virtual void forward(Argument& act) = 0;
+  virtual Error __must_check forward(Argument& act) = 0;
 
   /**
    * @brief Backward propagaion
@@ -57,7 +58,7 @@ public:
    * - Before calling backward(), act.grad = dE / dy, where E is the error/cost
    * - After backward() returns, act.grad = dE / dx = (dE/dy) * (dy/dx)
    */
-  virtual void backward(Argument& act) = 0;
+  virtual Error __must_check backward(Argument& act) = 0;
 
   virtual const std::string& getName() const = 0;
 };
diff --git a/paddle/gserver/layers/GatedRecurrentLayer.cpp b/paddle/gserver/layers/GatedRecurrentLayer.cpp
index 930d9a056164e7c677adb53b7b67901364da1309..d3aeea921801da301b2829736059130aec14cef6 100644
--- a/paddle/gserver/layers/GatedRecurrentLayer.cpp
+++ b/paddle/gserver/layers/GatedRecurrentLayer.cpp
@@ -314,13 +314,13 @@ void GatedRecurrentLayer::forwardBatch(int batchSize,
 
   batchValue_->resizeOrCreate(*output_.value);
   batchValue_->copy(*inputValue, *gate_.value, /* seq2batch */ true);
-  if (bias_ && bias_->getWGrad()) {
+  if (bias_) {
     gate_.value->addBias(*(bias_->getW()), 1);
   }
 
   {
     int numBatch = batchValue_->getNumBatch();
-    int batchSize = 0;
+    int curBatchSize = 0;
     AsyncGpuBlock asyncGpuBlock;
     for (int n = 0; n < numBatch; n++) {
       MatrixPtr outputValueTmp = batchValue_->getBatchValue(n);
@@ -330,16 +330,17 @@ void GatedRecurrentLayer::forwardBatch(int batchSize,
       gruValue.resetOutputValue =
           (batchValue_->getBatchValue(*resetOutput_.value, n))->getData();
 
-      batchSize = outputValueTmp->getHeight();
+      curBatchSize = outputValueTmp->getHeight();
       gruValue.prevOutValue =
-          (n == 0 ? nullptr
-                  : (batchValue_->getBatchValue(n - 1, batchSize))->getData());
+          (n == 0
+               ? nullptr
+               : (batchValue_->getBatchValue(n - 1, curBatchSize))->getData());
 
       {
         if (useGpu_) {
-          GruCompute::forward<1>(gruValue, getSize(), batchSize);
+          GruCompute::forward<1>(gruValue, getSize(), curBatchSize);
         } else {
-          GruCompute::forward<0>(gruValue, getSize(), batchSize);
+          GruCompute::forward<0>(gruValue, getSize(), curBatchSize);
         }
       }
     }
diff --git a/paddle/gserver/layers/Layer.cpp b/paddle/gserver/layers/Layer.cpp
index c47943f81c01589eada4b825d54be5c69314b6fa..f76d41ad3e8a3b1730f9d50c0773ee4f61ddb541 100644
--- a/paddle/gserver/layers/Layer.cpp
+++ b/paddle/gserver/layers/Layer.cpp
@@ -15,6 +15,7 @@ limitations under the License. */
 #include "paddle/utils/Util.h"
 
 #include "paddle/math/SparseMatrix.h"
+#include "paddle/utils/Error.h"
 #include "paddle/utils/Logging.h"
 
 #include "AddtoLayer.h"
@@ -334,7 +335,8 @@ void Layer::showOutputStats() {
 
 void Layer::forwardActivation() {
   /* activation */
-  activation_->forward(output_);
+  auto status = activation_->forward(output_);
+  status.check();
 
   /* dropout */
   if (config_.drop_rate() > 0) {
@@ -372,7 +374,8 @@ void Layer::backwardActivation() {
     oGrad->dotMul(*oGrad, *dropOutMask_);
   }
 
-  activation_->backward(output_);
+  auto status = activation_->backward(output_);
+  status.check();
 }
 
 void Layer::forwardDropOut() {
diff --git a/paddle/gserver/layers/MDLstmLayer.cpp b/paddle/gserver/layers/MDLstmLayer.cpp
index fb41af563195496a57eafcc52b49eadac697fa0a..88d934d782b549a984f1d7798e54bcc4436ea0cf 100644
--- a/paddle/gserver/layers/MDLstmLayer.cpp
+++ b/paddle/gserver/layers/MDLstmLayer.cpp
@@ -506,9 +506,12 @@ void MDLstmLayer::forwardGate2OutputSequence(int start,
           *frameState_[start + preOffsetV[i]].value, *checkFgOneDim, 1.0, 1.0);
     }
   }
-  activationGate_->forward(frameInputGate_[idxCurr]);
-  activationGate_->forward(frameForgetGate_[idxCurr]);
-  activation_->forward(frameInputNode_[idxCurr]);
+  auto status = activationGate_->forward(frameInputGate_[idxCurr]);
+  status.check();
+  status = activationGate_->forward(frameForgetGate_[idxCurr]);
+  status.check();
+  status = activation_->forward(frameInputNode_[idxCurr]);
+  status.check();
 
   frameState_[idxCurr].value->zeroMem();
   for (int i = 0; i < numDims_; i++) {
@@ -530,10 +533,12 @@ void MDLstmLayer::forwardGate2OutputSequence(int start,
 
   frameOutputGate_[idxCurr].value->addDotMul(
       *frameState_[idxCurr].value, *checkOg_, 1.0, 1.0);
-  activationGate_->forward(frameOutputGate_[idxCurr]);
+  status = activationGate_->forward(frameOutputGate_[idxCurr]);
+  status.check();
 
   framePreOutput_[idxCurr].value->copyFrom(*(frameState_[idxCurr].value));
-  activationState_->forward(framePreOutput_[idxCurr]);
+  status = activationState_->forward(framePreOutput_[idxCurr]);
+  status.check();
 
   frameOutput_[idxCurr].value->dotMul(*framePreOutput_[idxCurr].value,
                                       *frameOutputGate_[idxCurr].value);
@@ -640,12 +645,12 @@ void MDLstmLayer::backwardGate2OutputSequence(int start,
 
   framePreOutput_[idxCurr].grad->dotMul(*frameOutput_[idxCurr].grad,
                                         *frameOutputGate_[idxCurr].value);
-  activationState_->backward(framePreOutput_[idxCurr]);
+  activationState_->backward(framePreOutput_[idxCurr]).check();
   frameState_[idxCurr].grad->copyFrom(*(framePreOutput_[idxCurr].grad));
 
   frameOutputGate_[idxCurr].grad->dotMul(*frameOutput_[idxCurr].grad,
                                          *framePreOutput_[idxCurr].value);
-  activationGate_->backward(frameOutputGate_[idxCurr]);
+  activationGate_->backward(frameOutputGate_[idxCurr]).check();
 
   frameState_[idxCurr].grad->addDotMul(
       *frameOutputGate_[idxCurr].grad, *checkOg_, 1.0, 1.0);
@@ -702,9 +707,9 @@ void MDLstmLayer::backwardGate2OutputSequence(int start,
     }
   }
 
-  activationGate_->backward(frameInputGate_[idxCurr]);
-  activationGate_->backward(frameForgetGate_[idxCurr]);
-  activation_->backward(frameInputNode_[idxCurr]);
+  activationGate_->backward(frameInputGate_[idxCurr]).check();
+  activationGate_->backward(frameForgetGate_[idxCurr]).check();
+  activation_->backward(frameInputNode_[idxCurr]).check();
 
   if (bias_->getWGrad()) {
     for (int i = 0; i < numDims_; i++) {
diff --git a/paddle/gserver/layers/NCELayer.cpp b/paddle/gserver/layers/NCELayer.cpp
index 5ab765247f63dfe6e6651ca4d27dc7183a9f33e1..3542e739df8d03470bf2c455b4f3492a7f9e973a 100644
--- a/paddle/gserver/layers/NCELayer.cpp
+++ b/paddle/gserver/layers/NCELayer.cpp
@@ -193,7 +193,8 @@ public:
       forwardOneInput(l);
     }
 
-    activation_->forward(sampleOut_);
+    auto status = activation_->forward(sampleOut_);
+    status.check();
 
     forwardCost();
   }
@@ -207,7 +208,8 @@ public:
 
     backwardCost();
 
-    activation_->backward(sampleOut_);
+    auto status = activation_->backward(sampleOut_);
+    status.check();
 
     if (biases_->getWGrad()) {
       backwardBias(callback);
diff --git a/paddle/gserver/layers/RecurrentLayer.cpp b/paddle/gserver/layers/RecurrentLayer.cpp
index 55e0fdfb9048c02b2dcd474c6887eee180328260..b843fa1265cf3c0ad0814fb90f69e245ee5ab4ad 100644
--- a/paddle/gserver/layers/RecurrentLayer.cpp
+++ b/paddle/gserver/layers/RecurrentLayer.cpp
@@ -217,21 +217,22 @@ void RecurrentLayer::forwardOneSequence(int start, int length) {
     if (prevOutput_) {
       frameOutput_[start].value->mul(*prevOutput_, *weight_->getW(), 1, 1);
     }
-    activation_->forward(frameOutput_[start]);
+    activation_->forward(frameOutput_[start]).check();
+
     for (int i = 1; i < length; ++i) {
       frameOutput_[start + i].value->mul(
           *frameOutput_[start + i - 1].value, *weight_->getW(), 1, 1);
-      activation_->forward(frameOutput_[start + i]);
+      activation_->forward(frameOutput_[start + i]).check();
     }
     if (prevOutput_) {
       prevOutput_->assign(*frameOutput_[start + length - 1].value);
     }
   } else {
-    activation_->forward(frameOutput_[start + length - 1]);
+    activation_->forward(frameOutput_[start + length - 1]).check();
     for (int i = length - 2; i >= 0; --i) {
       frameOutput_[start + i].value->mul(
           *frameOutput_[start + i + 1].value, *weight_->getW(), 1, 1);
-      activation_->forward(frameOutput_[start + i]);
+      activation_->forward(frameOutput_[start + i]).check();
     }
   }
 }
@@ -280,11 +281,11 @@ void RecurrentLayer::backwardOneSequence(int start, int length) {
   MatrixPtr weightT = weight_->getW()->getTranspose();
   if (!reversed_) {
     for (int i = length - 1; i > 0; --i) {
-      activation_->backward(frameOutput_[start + i]);
+      activation_->backward(frameOutput_[start + i]).check();
       frameOutput_[start + i - 1].grad->mul(
           *frameOutput_[start + i].grad, *weightT, 1, 1);
     }
-    activation_->backward(frameOutput_[start]);
+    activation_->backward(frameOutput_[start]).check();
     if (weight_->getWGrad()) {
       weight_->getWGrad()->mul(
           *output_.value->subMatrix(start, length - 1)->getTranspose(),
@@ -294,11 +295,11 @@ void RecurrentLayer::backwardOneSequence(int start, int length) {
     }
   } else {
     for (int i = 0; i < length - 1; ++i) {
-      activation_->backward(frameOutput_[start + i]);
+      activation_->backward(frameOutput_[start + i]).check();
       frameOutput_[start + i + 1].grad->mul(
           *frameOutput_[start + i].grad, *weightT, 1, 1);
     }
-    activation_->backward(frameOutput_[start + length - 1]);
+    activation_->backward(frameOutput_[start + length - 1]).check();
     if (weight_->getWGrad()) {
       weight_->getWGrad()->mul(
           *output_.value->subMatrix(start + 1, length - 1)->getTranspose(),
@@ -333,7 +334,7 @@ void RecurrentLayer::forwardBatch(int batchSize,
       }
       Argument arg;
       arg.value = batch2;
-      activation_->forward(arg);
+      activation_->forward(arg).check();
     }
   }
   batchValue_->copyBackSeq(*output_.value);
@@ -363,7 +364,7 @@ void RecurrentLayer::backwardBatch(int batchSize,
       Argument arg;
       arg.value = batch1;
       arg.grad = batch2;
-      activation_->backward(arg);
+      activation_->backward(arg).check();
 
       if (n != 0) {
         batch1 = batchGrad_->getBatchValue(n - 1, batch2->getHeight());
diff --git a/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp b/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp
index 5eacff6b7143996130bea64766ef42c66f4c7310..d9a91de8a6f4daf514f089a3d63cb519223bfdd0 100644
--- a/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp
+++ b/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp
@@ -192,7 +192,8 @@ void SelectiveFullyConnectedLayer::forward(PassType passType) {
                                nnz,
                                /*trans=*/false,
                                /*useGpu=*/useGpu_);
-    activation_->forward(arg);
+    //! TODO(yuyang18): Why we cannot invoke forwardActivation here?
+    activation_->forward(arg).check();
   } else /* train and test in train, not generating */ {
     // during training, this layer output value is *Matrix*, which is input of
     // eg. multi-class-cross-entropy
diff --git a/paddle/gserver/tests/test_WarpCTCLayer.cpp b/paddle/gserver/tests/test_WarpCTCLayer.cpp
index 23ae95852e84216c9065f1b123d35ce868fbb90f..55427e2f12fd7b77c6eea1f65b3229e6fd29d71d 100644
--- a/paddle/gserver/tests/test_WarpCTCLayer.cpp
+++ b/paddle/gserver/tests/test_WarpCTCLayer.cpp
@@ -148,11 +148,11 @@ LayerPtr createCTCLayer(string name,
 
   ActivationFunction* softmaxActivation = ActivationFunction::create("softmax");
 
-  softmaxActivation->forward(dataLayer->getOutput());
+  softmaxActivation->forward(dataLayer->getOutput()).check();
   layer->forward(PASS_GC);
 
   layer->backward();
-  softmaxActivation->backward(dataLayer->getOutput());
+  softmaxActivation->backward(dataLayer->getOutput()).check();
 
   return layer;
 }
diff --git a/paddle/utils/Compiler.h b/paddle/utils/Compiler.h
new file mode 100644
index 0000000000000000000000000000000000000000..cebca5a2a3766110b83231eb0705e48800a7bda6
--- /dev/null
+++ b/paddle/utils/Compiler.h
@@ -0,0 +1,33 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+/**
+ * This header defines some useful attribute by each compiler. It is the
+ * abstract layer of compilers.
+ */
+#ifdef __GNUC__
+#define GCC_VERSION \
+  (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
+#else
+#define GCC_VERSION
+#endif
+
+/**
+ * __must_check macro. It make the function's return value must be used,
+ * otherwise it will raise a compile warning. And also Paddle treat all compile
+ * warnings as errors.
+ */
+#if GCC_VERSION >= 30400
+#define __must_check __attribute__((warn_unused_result))
+#else
+#define __must_check
+#endif
diff --git a/paddle/utils/Error.h b/paddle/utils/Error.h
new file mode 100644
index 0000000000000000000000000000000000000000..2b4fbef4e015e7c6895745f220bd444f3883c121
--- /dev/null
+++ b/paddle/utils/Error.h
@@ -0,0 +1,130 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <glog/logging.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <memory>
+#include <string>
+#include "Compiler.h"
+
+namespace paddle {
+
+/**
+ * Error is Paddle error code. It only contain a std::string as error message.
+ *
+ *
+ * There are two styles to return error in Paddle.
+ *
+ * 1. Return Error
+ *    When method return a status, the return must use `__must_check` attribute.
+ *    Example as below.
+ * @code{cpp}
+ * Error __must_check foo();
+ *
+ * Error __must_check bar() {
+ *   // do something.
+ *   Status s = foo();  // invoke other method return status.
+ *   if (!s) return s;
+ *   // do something else.
+ *   return Status();
+ * }
+ * @endcode{cpp}
+ *
+ * 2. Return by parameter.
+ *    It is another way to return an error, by using a pointer parameter.
+ *    Example as below.
+ *
+ * @code{cpp}
+ * Error bar();
+ *
+ * int foo(Error* error) {
+ *   // Do something.
+ *   Error s = bar();
+ *   if (!s) {
+ *     *error = s;
+ *     return 0;
+ *   }
+ *   // Do something else.
+ *   if (someInternalErrorHappend) {
+ *     *error = Error("Some dimension is too large, %d", dimension);
+ *     return 0;
+ *   }
+ *   // End of method.
+ *   return someValue;
+ * }
+ *
+ * Error foobar() {
+ *   Error s;
+ *   // do something.
+ *   foo(&s);
+ *   if (!s) return s;
+ * }
+ * @endcode{cpp}
+ *
+ *
+ * Currently there is a helper method 'check' in status, because Paddle always
+ * use log(FATAL) or CHECK to make program exit before. When we clean all
+ * log(FATAL) and CHECK in Paddle, 'check' method will be removed.
+ */
+class Error {
+public:
+  /**
+   * Construct a no-error value.
+   */
+  Error() {}
+
+  /**
+   * @brief Create an Error use printf syntax.
+   */
+  explicit Error(const char* fmt, ...) {
+    va_list ap;
+    va_start(ap, fmt);
+    constexpr size_t kBufferSize = 1024;
+    char buffer[kBufferSize];
+    vsnprintf(buffer, kBufferSize, fmt, ap);
+    this->msg_.reset(new std::string(buffer));
+    va_end(ap);
+  }
+
+  /**
+   * @brief msg will return the error message. If no error, return nullptr.
+   */
+  const char* msg() const {
+    if (msg_) {
+      return msg_->c_str();
+    } else {
+      return nullptr;
+    }
+  }
+
+  /**
+   * @brief operator bool, return True if there is no error.
+   */
+  operator bool() const { return msg_ == nullptr; }
+
+  /**
+   * @brief check this status by glog.
+   * @note It is a temp method used during cleaning Paddle code. It will be
+   *       removed later.
+   */
+  void check() const { CHECK(*this) << msg(); }
+
+private:
+  std::shared_ptr<std::string> msg_;
+};
+
+}  // namespace paddle
diff --git a/paddle/utils/Util.cpp b/paddle/utils/Util.cpp
index 411a64aa8d0737a8d57e62fbd0788ffaacfbc9f7..220aac1ff11e0ff263df8459f539237944b94c81 100644
--- a/paddle/utils/Util.cpp
+++ b/paddle/utils/Util.cpp
@@ -144,20 +144,20 @@ void runInitFunctions() {
 }
 
 void initMain(int argc, char** argv) {
-  initializeLogging(argc, argv);
   installLayerStackTracer();
   std::string line;
   for (int i = 0; i < argc; ++i) {
     line += argv[i];
     line += ' ';
   }
-  LOG(INFO) << "commandline: " << line;
 
 #ifndef GFLAGS_GFLAGS_H_
   namespace gflags = google;
 #endif
 
   gflags::ParseCommandLineFlags(&argc, &argv, true);
+  initializeLogging(argc, argv);
+  LOG(INFO) << "commandline: " << line;
   CHECK_EQ(argc, 1) << "Unknown commandline argument: " << argv[1];
 
   installProfilerSwitch();
diff --git a/paddle/utils/tests/CMakeLists.txt b/paddle/utils/tests/CMakeLists.txt
index 26fafbd1ab3f2967b765b8bcb973fb745c0e6422..aa923b355377752f9b297a125f5c43c364ba9b06 100644
--- a/paddle/utils/tests/CMakeLists.txt
+++ b/paddle/utils/tests/CMakeLists.txt
@@ -4,6 +4,7 @@ add_simple_unittest(test_CustomStackTrace)
 add_simple_unittest(test_ThreadBarrier)
 add_simple_unittest(test_SpinLock)
 add_simple_unittest(test_SIMDFlags)
+add_simple_unittest(test_Error)
 
 add_executable(
     test_CustomStackTracePrint
diff --git a/paddle/utils/tests/test_Error.cpp b/paddle/utils/tests/test_Error.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..85156466e2cafd36d49941836c066a542dbbd60e
--- /dev/null
+++ b/paddle/utils/tests/test_Error.cpp
@@ -0,0 +1,34 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/utils/Error.h"
+
+#include <gtest/gtest.h>
+
+TEST(Error, testAll) {
+  paddle::Error error;
+  ASSERT_TRUE(error);
+  error = paddle::Error("I'm the error");
+  ASSERT_FALSE(error);
+  ASSERT_STREQ("I'm the error", error.msg());
+
+  error = paddle::Error("error2");
+  ASSERT_FALSE(error);
+  ASSERT_STREQ("error2", error.msg());
+
+  int i = 3;
+  auto error3 = paddle::Error("error%d", i);
+  ASSERT_FALSE(error3);
+  ASSERT_STREQ("error3", error3.msg());
+}