diff --git a/paddle/gserver/activations/ActivationFunction.cpp b/paddle/gserver/activations/ActivationFunction.cpp
index 78e958e06fac84fa956abc9faea60157bf6132eb..8b7b2e9b65898950e036ebc023cd28990cef303f 100644
--- a/paddle/gserver/activations/ActivationFunction.cpp
+++ b/paddle/gserver/activations/ActivationFunction.cpp
@@ -22,9 +22,12 @@ limitations under the License. */
 #include <type_traits>
 #include "paddle/parameter/Argument.h"
 #include "paddle/utils/ClassRegistrar.h"
-
 #include "paddle/utils/Logging.h"
 
+#ifdef PADDLE_USE_MKLDNN
+#include "MKLDNNActivation.h"
+#endif
+
 namespace paddle {
 
 static ClassRegistrar<ActivationFunction> gActivationRegistrar;
@@ -456,6 +459,12 @@ Error __must_check backward(Argument& act) {
 END_DEFINE_ACTIVATION(log)
 
 ActivationFunction* ActivationFunction::create(const std::string& type) {
+#ifdef PADDLE_USE_MKLDNN
+  if (!type.empty() && type.compare(0, 7, "mkldnn_") == 0) {
+    return MKLDNNActivation::create(type);
+  }
+#endif
+
   return gActivationRegistrar.createByType(type);
 }
 
diff --git a/paddle/gserver/activations/MKLDNNActivation.cpp b/paddle/gserver/activations/MKLDNNActivation.cpp
index 7fa5a4587c471d5d07b1bf7d8f9b5c6c73e84da8..ac50937ef3e28c1ac5aae651f9cf266ad07abcc4 100644
--- a/paddle/gserver/activations/MKLDNNActivation.cpp
+++ b/paddle/gserver/activations/MKLDNNActivation.cpp
@@ -29,24 +29,27 @@ static ClassRegistrar<ActivationFunction> gMKLDNNActivationRegistrar;
 /**
  * @def DEFINE_MKLDNN_ELTWISE_ACTIVATION
  */
-#define DEFINE_MKLDNN_ELTWISE_ACTIVATION(ACT_TYPE, ALPHA)            \
-  class MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)                       \
-      : public MKLDNNEltwiseActivation {                             \
-  private:                                                           \
-    static const std::string name;                                   \
-    static const float alpha;                                        \
-                                                                     \
-  public:                                                            \
-    const std::string& getName() const { return name; }              \
-    float getAlpha() const { return alpha; }                         \
-  };                                                                 \
-  const std::string MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::name =   \
-      "mkldnn_" #ACT_TYPE;                                           \
-  const float MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::alpha = ALPHA; \
-  static InitFunction __reg_activation__mkldnn_##ACT_TYPE([] {       \
-    gMKLDNNActivationRegistrar                                       \
-        .registerClass<MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)>(      \
-            "mkldnn_" #ACT_TYPE);                                    \
+#define DEFINE_MKLDNN_ELTWISE_ACTIVATION(ACT_TYPE, ALPHA, BWD_ALPHA)        \
+  class MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)                              \
+      : public MKLDNNEltwiseActivation {                                    \
+  private:                                                                  \
+    static const std::string name;                                          \
+    static const float alpha;                                               \
+    static const float bwdAlpha;                                            \
+                                                                            \
+  public:                                                                   \
+    const std::string& getName() const { return name; }                     \
+    float getAlpha() const { return alpha; }                                \
+    float getBwdAlpha() const { return bwdAlpha; }                          \
+  };                                                                        \
+  const std::string MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::name =          \
+      "mkldnn_" #ACT_TYPE;                                                  \
+  const float MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::alpha = ALPHA;        \
+  const float MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::bwdAlpha = BWD_ALPHA; \
+  static InitFunction __reg_activation__mkldnn_##ACT_TYPE([] {              \
+    gMKLDNNActivationRegistrar                                              \
+        .registerClass<MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)>(             \
+            "mkldnn_" #ACT_TYPE);                                           \
   });
 
 /**
@@ -54,21 +57,21 @@ static ClassRegistrar<ActivationFunction> gMKLDNNActivationRegistrar;
  * Actually mkldnn_relu is Leaky Relu.
  *  f(x) = x                   (x >= 0)
  *  f(x) = negative_slope * x  (x <  0)
- * @note the negative_slope should be -0.f
+ * @note the negative_slope should be -0.f in forward
  */
-DEFINE_MKLDNN_ELTWISE_ACTIVATION(relu, -0.f)
+DEFINE_MKLDNN_ELTWISE_ACTIVATION(relu, -0.f, 0.f)
 
 /**
  * @brief MKLDNN Tanh Activation.
  */
-DEFINE_MKLDNN_ELTWISE_ACTIVATION(tanh, 0.f)
+DEFINE_MKLDNN_ELTWISE_ACTIVATION(tanh, 0.f, 0.f)
 
 /**
  * @brief MKLDNN ELU(Exponential Linear Unit) Activation.
  *  f(x) = x                              (x >= 0)
  *  f(x) = negative_slope * (exp(x) - 1)  (x <  0)
  */
-DEFINE_MKLDNN_ELTWISE_ACTIVATION(elu, 0.f)
+DEFINE_MKLDNN_ELTWISE_ACTIVATION(elu, 0.f, 0.f)
 
 ActivationFunction* MKLDNNActivation::create(const std::string& type) {
   return gMKLDNNActivationRegistrar.createByType(type);
diff --git a/paddle/gserver/activations/MKLDNNActivation.h b/paddle/gserver/activations/MKLDNNActivation.h
index 3afab609bec94b03a4d6be445f0dfee09a9ab3f0..bda9bbebe5600dbe26d11ff32058f7b2647b763e 100644
--- a/paddle/gserver/activations/MKLDNNActivation.h
+++ b/paddle/gserver/activations/MKLDNNActivation.h
@@ -30,6 +30,9 @@ class MKLDNNActivation : public ActivationFunction {
 protected:
   // input value element count
   size_t cnt_;
+  // should not merge the resetBwd into resetFwd,
+  // because the grad data would be changing before backward.
+  bool needResetBwd_;
   // mkldnn matrix, primitive, stream and pipeline
   MKLDNNMatrixPtr val_;
   MKLDNNMatrixPtr grad_;
@@ -40,7 +43,7 @@ protected:
   std::vector<mkldnn::primitive> pipelineBwd_;
 
 public:
-  MKLDNNActivation() : cnt_(0) {}
+  MKLDNNActivation() : cnt_(0), needResetBwd_(true) {}
   ~MKLDNNActivation() {}
   static ActivationFunction* create(const std::string& type);
   static std::vector<std::string> getAllRegisteredTypes();
@@ -57,19 +60,43 @@ class MKLDNNEltwiseActivation : public MKLDNNActivation {
   typedef mkldnn::eltwise_forward eltwise_fwd;
   typedef mkldnn::eltwise_backward eltwise_bwd;
 
+protected:
+  // save the forward primitive desc, which can be used backward
+  std::shared_ptr<eltwise_fwd::primitive_desc> fwdPD_;
+  // eltwise_bwd need src input value
+  MKLDNNMatrixPtr inVal_;
+  // use for copy data
+  std::shared_ptr<mkldnn::reorder> copyInVal_;
+
 public:
   MKLDNNEltwiseActivation() {}
 
   ~MKLDNNEltwiseActivation() {}
 
   virtual const std::string& getName() const = 0;
+
+  // in common, the alpha of forward and backward should be equal.
+  // but for relu, to avoid negative value, they should be opposite
   virtual float getAlpha() const = 0;
+  virtual float getBwdAlpha() const = 0;
   virtual float getBeta() const { return 0.f; }
+  virtual mkldnn::algorithm getAlgo(const std::string& type) const {
+    if (type == "mkldnn_relu") {
+      return mkldnn::algorithm::eltwise_relu;
+    } else if (type == "mkldnn_tanh") {
+      return mkldnn::algorithm::eltwise_tanh;
+    } else if (type == "mkldnn_elu") {
+      return mkldnn::algorithm::eltwise_elu;
+    } else {
+      LOG(FATAL) << "Unkown eltwise activation type: " << type;
+    }
+    return (mkldnn::algorithm)0;
+  }
 
   /**
-   * reshape and reset the forward and backward primitives
+   * reshape and reset the forward primitives
    */
-  void resetPrimitives(Argument& act) {
+  void resetFwd(Argument& act) {
     if (cnt_ == act.value->getElementCnt()) {
       return;
     }
@@ -78,21 +105,13 @@ public:
     auto eng = CPUEngine::Instance().getEngine();
 
     // get algo setting
-    mkldnn::algorithm algo;
-    if (this->getName() == "mkldnn_relu") {
-      algo = mkldnn::algorithm::eltwise_relu;
-    } else if (this->getName() == "mkldnn_tanh") {
-      algo = mkldnn::algorithm::eltwise_tanh;
-    } else if (this->getName() == "mkldnn_elu") {
-      algo = mkldnn::algorithm::eltwise_elu;
-    } else {
-      LOG(FATAL) << "Unkown eltwise activation type: " << this->getName();
-    }
+    mkldnn::algorithm algo = getAlgo(this->getName());
     // note: alpha represents the NegativeSlope when used in relu.
     float alpha = getAlpha();
     float beta = getBeta();
 
     /// forward
+    pipelineFwd_.clear();
     val_ = std::dynamic_pointer_cast<MKLDNNMatrix>(act.value);
     if (val_ == nullptr) {
       int bs = act.getBatchSize();
@@ -109,33 +128,52 @@ public:
                                      val_->getMemoryDesc(),
                                      alpha,
                                      beta);
-    auto fwdPD = eltwise_fwd::primitive_desc(fwdDesc, eng);
-    // inplace buffer, dst = src
-    fwd_.reset(new eltwise_fwd(fwdPD, *val_, *val_));
-    pipelineFwd_.clear();
+    fwdPD_.reset(new eltwise_fwd::primitive_desc(fwdDesc, eng));
+    // use inplace for forward but save input value before submit
+    inVal_ = val_;
+    if (act.grad) {
+      // only copy when need do backward
+      inVal_ = MKLDNNMatrix::create(nullptr, val_->getPrimitiveDesc());
+      copyInVal_ = std::make_shared<mkldnn::reorder>(*val_, *inVal_);
+      CHECK(copyInVal_) << "should not be emptry";
+      pipelineFwd_.push_back(*copyInVal_);
+    }
+    fwd_.reset(new eltwise_fwd(*fwdPD_, *val_, *val_));
     pipelineFwd_.push_back(*fwd_);
+    needResetBwd_ = true;
+  }
 
-    /// backward
-    if (act.grad == nullptr) {
-      grad_ = nullptr;
+  /**
+   * reset the backward primitives, can not merge into resetFwd as the grad data
+   * would be changing before backward.
+   */
+  void resetBwd(Argument& act) {
+    if (!needResetBwd_) {
       return;
     }
+    needResetBwd_ = false;
+    mkldnn::algorithm algo = getAlgo(this->getName());
+    float alpha = getBwdAlpha();
+    float beta = getBeta();
     grad_ = MKLDNNMatrix::create(act.grad, val_->getPrimitiveDesc());
+    auto eng = CPUEngine::Instance().getEngine();
     auto bwdDesc = eltwise_bwd::desc(
         algo, grad_->getMemoryDesc(), val_->getMemoryDesc(), alpha, beta);
-    auto bwdPD = eltwise_bwd::primitive_desc(bwdDesc, eng, fwdPD);
-    bwd_.reset(new eltwise_bwd(bwdPD, *val_, *grad_, *grad_));
+    auto bwdPD = eltwise_bwd::primitive_desc(bwdDesc, eng, *fwdPD_);
+    CHECK(inVal_);
+    bwd_.reset(new eltwise_bwd(bwdPD, *inVal_, *grad_, *grad_));
     pipelineBwd_.clear();
     pipelineBwd_.push_back(*bwd_);
   }
 
   Error __must_check forward(Argument& act) {
-    resetPrimitives(act);
+    resetFwd(act);
     stream_->submit(pipelineFwd_);
     return Error();
   }
 
   Error __must_check backward(Argument& act) {
+    resetBwd(act);
     stream_->submit(pipelineBwd_);
     return Error();
   }
diff --git a/paddle/gserver/tests/MKLDNNTester.cpp b/paddle/gserver/tests/MKLDNNTester.cpp
index 2f48e5b2d3ffc9337ed1314f6db6549e56263fdd..f59618be9d09d146be52fb51cae84f4d24c15ef1 100644
--- a/paddle/gserver/tests/MKLDNNTester.cpp
+++ b/paddle/gserver/tests/MKLDNNTester.cpp
@@ -64,15 +64,17 @@ void MKLDNNTester::reset(const TestConfig& dnn,
         configs_[i], &(layerMaps_[i]), &(parameters_[i]), &(testLayers_[i]));
   }
   refLayer_ = testLayers_[REF];
-  dnnLayer_ = std::dynamic_pointer_cast<MKLDNNLayer>(testLayers_[DNN]);
-  CHECK(dnnLayer_);
-  // for comparison with Paddle reference results,
-  // need manually add cpu device output for test
-  dnnLayer_->addOutputArgument(CPU_DEVICE);
+  dnnLayer_ = testLayers_[DNN];
   EXPECT_EQ(dataLayers_[DNN].size(), dataLayers_[REF].size());
   EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size());
-
   setInputImgSize();
+
+  // for comparison with Paddle reference results,
+  // need manually add cpu device output for test
+  MKLDNNLayerPtr dnnLayer = std::dynamic_pointer_cast<MKLDNNLayer>(dnnLayer_);
+  if (dnnLayer) {
+    dnnLayer->addOutputArgument(CPU_DEVICE);
+  }
 }
 
 void MKLDNNTester::setInputImgSize() {
@@ -122,7 +124,7 @@ void MKLDNNTester::randomTopDiffs() {
 void MKLDNNTester::checkForward() {
   VLOG(MKLDNN_ALL) << "Check Forward";
   printTopDatas();
-  double delta = compareMatrix(dnnLayer_->getOutput(-1).value,
+  double delta = compareMatrix(dnnLayer_->getOutput(CPU_DEVICE).value,
                                refLayer_->getOutputValue());
   EXPECT_LE(fabs(delta), eps_);
 }
@@ -155,7 +157,10 @@ void MKLDNNTester::checkBackwardWgts() {
   vector<VectorPtr> dnnWgts;  // used to temply save mkldnn weights
   saveWgt(parameters_[DNN], dnnWgts);
 
-  dnnLayer_->convertWeightsToPaddle();
+  MKLDNNLayerPtr dnnLayer = std::dynamic_pointer_cast<MKLDNNLayer>(dnnLayer_);
+  if (dnnLayer) {
+    dnnLayer->convertWeightsToPaddle();
+  }
   for (size_t i = 0; i < parameters_[DNN].size(); ++i) {
     const VectorPtr& dnn = parameters_[DNN][i]->getBuf(PARAMETER_VALUE);
     const VectorPtr& ref = parameters_[REF][i]->getBuf(PARAMETER_VALUE);
@@ -322,6 +327,10 @@ void MKLDNNTester::runOnce() {
   // and clearTopDatas(REF) should be coverd by ref layers
   clearBotDiffs(REF);
   clearWgtDiffs(REF);
+  // it is necessary to clear bottom diffs when only activation is dnn type
+  if (configs_[DNN].layerConfig.active_type().compare(0, 7, "mkldnn_") == 0) {
+    clearBotDiffs(DNN);
+  }
 }
 
 void MKLDNNTester::run(const TestConfig& dnn,
@@ -333,8 +342,19 @@ void MKLDNNTester::run(const TestConfig& dnn,
                        float epsilon,
                        bool log,
                        int level) {
-  VLOG(MKLDNN_TESTS) << "Test MKLDNN functionality: " << dnn.layerConfig.type()
-                     << " vs " << ref.layerConfig.type();
+  CHECK(dnn.layerConfig.type().compare(0, 7, "mkldnn_") == 0 ||
+        dnn.layerConfig.active_type().compare(0, 7, "mkldnn_") == 0)
+      << "should be MKLDNN layer or MKLDNN activation";
+  if (dnn.layerConfig.type() == ref.layerConfig.type()) {
+    VLOG(MKLDNN_TESTS) << "Test MKLDNN functionality: "
+                       << dnn.layerConfig.active_type() << " vs "
+                       << ref.layerConfig.active_type();
+  } else {
+    VLOG(MKLDNN_TESTS) << "Test MKLDNN functionality: "
+                       << dnn.layerConfig.type() << " vs "
+                       << ref.layerConfig.type();
+  }
+
   ih_ = inputImgH;
   iw_ = inputImgW;
   iter_ = iter;
diff --git a/paddle/gserver/tests/MKLDNNTester.h b/paddle/gserver/tests/MKLDNNTester.h
index 5ac885638cde7693a0c847733e7a6149c1b7e6c2..171d176ee757f1164c38d86273bdf9e5aefeda06 100644
--- a/paddle/gserver/tests/MKLDNNTester.h
+++ b/paddle/gserver/tests/MKLDNNTester.h
@@ -41,8 +41,7 @@ protected:
   vector<LayerMap> layerMaps_;
   vector<vector<ParameterPtr>> parameters_;
   vector<LayerPtr> testLayers_;
-  LayerPtr refLayer_;
-  MKLDNNLayerPtr dnnLayer_;
+  LayerPtr refLayer_, dnnLayer_;
 
   /// run some iterations, all the result should pass
   size_t iter_;
diff --git a/paddle/gserver/tests/test_MKLDNN.cpp b/paddle/gserver/tests/test_MKLDNN.cpp
index 7620365efa345dbd78605223994a425b6582c259..406181370faf90d29167b62173ce4c8af44d243e 100644
--- a/paddle/gserver/tests/test_MKLDNN.cpp
+++ b/paddle/gserver/tests/test_MKLDNN.cpp
@@ -17,6 +17,7 @@ limitations under the License. */
 #include <vector>
 #include "MKLDNNTester.h"
 #include "ModelConfig.pb.h"
+#include "paddle/gserver/activations/MKLDNNActivation.h"
 #include "paddle/math/MathUtils.h"
 
 using namespace paddle;  // NOLINT
@@ -190,7 +191,7 @@ void testPoolLayer(const testPoolDesc& pm) {
   }
 }
 
-TEST(MkldnnLayer, PoolLayer) {
+TEST(MKLDNNLayer, PoolLayer) {
   /* bs, ch, ih, iw, oh, ow, fh, fw, ph, pw, sh, sw*/
   testPoolLayer({2, 1, 4, 4, 2, 2, 3, 3, 0, 0, 2, 2});
   testPoolLayer({10, 8, 16, 16, 8, 8, 2, 2, 0, 0, 2, 2});
@@ -202,6 +203,49 @@ TEST(MkldnnLayer, PoolLayer) {
   testPoolLayer({2, 8, 56, 56, 29, 29, 3, 3, 1, 1, 2, 2});
 }
 
+struct testActDesc {
+  int bs, ch;
+  int ih, iw;
+};
+
+static void getAddtoConfig(TestConfig& cfg, const testActDesc& pm) {
+  cfg.biasSize = 0;
+  cfg.layerConfig.set_type("addto");
+  cfg.layerConfig.set_size(pm.ch * pm.ih * pm.iw);
+  cfg.inputDefs.push_back(
+      {INPUT_DATA,
+       "layer_0",
+       /* size of input layer= */ size_t(pm.ch * pm.ih * pm.iw),
+       0});
+  cfg.layerConfig.add_inputs();
+}
+
+void testActivation(std::string& type, const testActDesc& pm) {
+  const std::string compareTypes[] = {type, type.erase(0, 7)};
+  TestConfig cfg;
+  getAddtoConfig(cfg, pm);
+
+  TestConfig ref = cfg;
+  cfg.layerConfig.set_active_type(compareTypes[0]);
+  ref.layerConfig.set_active_type(compareTypes[1]);
+  MKLDNNTester tester;
+  for (auto bs : {pm.bs, 1}) {
+    tester.run(cfg, ref, bs, pm.ih, pm.iw);
+  }
+}
+
+TEST(MKLDNNActivation, Activations) {
+  auto types = MKLDNNActivation::getAllRegisteredTypes();
+  // TODO(TJ): mkldnn_softmax not implemented, paddle do not have elu activation
+  std::set<string> excluded{"mkldnn_softmax", "mkldnn_elu"};
+  for (auto type : types) {
+    if (excluded.count(type)) {
+      continue;
+    }
+    testActivation(type, {16, 64, 32, 32});
+  }
+}
+
 // TODO(TJ): add branch test
 
 int main(int argc, char** argv) {