diff --git a/paddle/operators/activation_op.cc b/paddle/operators/activation_op.cc
index ffa5c26da3b8a28bc01598d8607ee7ad241e1d30..8ada158ff3eaef9085b6356bcfa9769f4d0c1f1c 100644
--- a/paddle/operators/activation_op.cc
+++ b/paddle/operators/activation_op.cc
@@ -46,7 +46,7 @@ class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
       : OpProtoAndCheckerMaker(proto, op_checker) {
     AddInput("X", "Input of Sigmoid operator");
     AddOutput("Y", "Output of Sigmoid operator");
-    AddComment("Sigmoid activation operator");
+    AddComment("Sigmoid activation operator, sigmoid = 1 / (1 + exp(-x))");
   }
 };
 
@@ -56,7 +56,7 @@ class ExpOpMaker : public framework::OpProtoAndCheckerMaker {
       : OpProtoAndCheckerMaker(proto, op_checker) {
     AddInput("X", "Input of Exp operator");
     AddOutput("Y", "Output of Exp operator");
-    AddComment("Exp activation operator");
+    AddComment("Exp activation operator, exp(x) = e^x");
   }
 };
 
@@ -66,7 +66,129 @@ class ReluOpMaker : public framework::OpProtoAndCheckerMaker {
       : OpProtoAndCheckerMaker(proto, op_checker) {
     AddInput("X", "Input of Relu operator");
     AddOutput("Y", "Output of Relu operator");
-    AddComment("Relu activation operator");
+    AddComment("Relu activation operator, relu(x) = max(x, 0)");
+  }
+};
+
+class TanhOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  TanhOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "Input of Tanh operator");
+    AddOutput("Y", "Output of Tanh operator");
+    AddComment(
+        "Tanh activation operator, tanh = (exp(x) - exp(-x)) / (exp(x) + "
+        "exp(-x))");
+  }
+};
+
+class SqrtOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  SqrtOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "Input of Sqrt operator");
+    AddOutput("Y", "Output of Sqrt operator");
+    AddComment("Sqrt activation operator, sqrt(x) = x^(1/2)");
+  }
+};
+
+class AbsOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  AbsOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "Input of Abs operator");
+    AddOutput("Y", "Output of Abs operator");
+    AddComment("Abs activation operator, abs(x) = |x|");
+  }
+};
+
+class ReciprocalOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  ReciprocalOpMaker(framework::OpProto *proto,
+                    framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "Input of Reciprocal operator");
+    AddOutput("Y", "Output of Reciprocal operator");
+    AddComment("Reciprocal activation operator, reciprocal(x) = 1 / x");
+  }
+};
+
+class LogOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  LogOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "Input of Log operator");
+    AddOutput("Y", "Output of Log operator");
+    AddComment("Log activation operator, log(x) = natural logarithm of x");
+  }
+};
+
+class SquareOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  SquareOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "Input of Square operator");
+    AddOutput("Y", "Output of Square operator");
+    AddComment("Square activation operator, square(x) = x^2");
+  }
+};
+
+template <typename AttrType>
+class BReluOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  BReluOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "Input of BRelu operator");
+    AddOutput("Y", "Output of BRelu operator");
+    AddComment("BRelu activation operator, brelu = max(min(x, t_min), t_max)");
+    AddAttr<AttrType>("t_min", "The min marginal value of BRelu")
+        .SetDefault(static_cast<AttrType>(0));
+    AddAttr<AttrType>("t_max", "The max marginal value of BRelu")
+        .SetDefault(static_cast<AttrType>(24));
+  }
+};
+
+template <typename AttrType>
+class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  SoftReluOpMaker(framework::OpProto *proto,
+                  framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "Input of SoftRelu operator");
+    AddOutput("Y", "Output of SoftRelu operator");
+    AddComment(
+        "SoftRelu activation operator, soft_relu = log(1 + exp(max(min(x, "
+        "threshold), threshold)))");
+    AddAttr<AttrType>("threshold", "The threshold value of SoftRelu")
+        .SetDefault(static_cast<AttrType>(40));
+  }
+};
+
+template <typename AttrType>
+class PowOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  PowOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "Input of Pow operator");
+    AddOutput("Y", "Output of Pow operator");
+    AddComment("Pow activation operator, pow(x, factor) = x^factor");
+    AddAttr<AttrType>("factor", "The exponential factor of Pow")
+        .SetDefault(static_cast<AttrType>(1));
+  }
+};
+
+template <typename AttrType>
+class STanhOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  STanhOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "Input of STanh operator");
+    AddOutput("Y", "Output of STanh operator");
+    AddComment("STanh activation operator, stanh = b * tanh(a * x)");
+    AddAttr<AttrType>("scale_a", "The scale parameter of a for the input")
+        .SetDefault(static_cast<AttrType>(2 / 3));
+    AddAttr<AttrType>("scale_b", "The scale parameter of b for the input")
+        .SetDefault(static_cast<AttrType>(1.7159));
   }
 };
 
@@ -78,10 +200,10 @@ REGISTER_OP(sigmoid, ops::ActivationOp, ops::SigmoidOpMaker, sigmoid_grad,
             ops::ActivationOpGrad);
 REGISTER_OP_CPU_KERNEL(sigmoid,
                        ops::ActivationKernel<paddle::platform::CPUPlace, float,
-                                             ops::SigmoidFunctor>);
+                                             ops::SigmoidFunctor<float>>);
 REGISTER_OP_CPU_KERNEL(
     sigmoid_grad, ops::ActivationGradKernel<paddle::platform::CPUPlace, float,
-                                            ops::SigmoidGradFunctor>);
+                                            ops::SigmoidGradFunctor<float>>);
 
 REGISTER_OP(exp, ops::ActivationOp, ops::ExpOpMaker, exp_grad,
             ops::ActivationOpGrad);
@@ -100,3 +222,85 @@ REGISTER_OP_CPU_KERNEL(relu,
 REGISTER_OP_CPU_KERNEL(
     relu_grad, ops::ActivationGradKernel<paddle::platform::CPUPlace, float,
                                          ops::ReluGradFunctor<float>>);
+
+REGISTER_OP(tanh, ops::ActivationOp, ops::TanhOpMaker, tanh_grad,
+            ops::ActivationOpGrad);
+REGISTER_OP_CPU_KERNEL(
+    tanh,
+    ops::ActivationKernel<paddle::platform::CPUPlace, float, ops::TanhFunctor>);
+REGISTER_OP_CPU_KERNEL(
+    tanh_grad, ops::ActivationGradKernel<paddle::platform::CPUPlace, float,
+                                         ops::TanhGradFunctor<float>>);
+
+REGISTER_OP(sqrt, ops::ActivationOp, ops::SqrtOpMaker, sqrt_grad,
+            ops::ActivationOpGrad);
+REGISTER_OP_CPU_KERNEL(
+    sqrt,
+    ops::ActivationKernel<paddle::platform::CPUPlace, float, ops::SqrtFunctor>);
+REGISTER_OP_CPU_KERNEL(
+    sqrt_grad, ops::ActivationGradKernel<paddle::platform::CPUPlace, float,
+                                         ops::SqrtGradFunctor<float>>);
+
+REGISTER_OP(abs, ops::ActivationOp, ops::AbsOpMaker, abs_grad,
+            ops::ActivationOpGrad);
+REGISTER_OP_CPU_KERNEL(
+    abs,
+    ops::ActivationKernel<paddle::platform::CPUPlace, float, ops::AbsFunctor>);
+REGISTER_OP_CPU_KERNEL(abs_grad,
+                       ops::ActivationGradKernel<paddle::platform::CPUPlace,
+                                                 float, ops::AbsGradFunctor>);
+
+REGISTER_OP(reciprocal, ops::ActivationOp, ops::ReciprocalOpMaker,
+            reciprocal_grad, ops::ActivationOpGrad);
+REGISTER_OP_CPU_KERNEL(reciprocal,
+                       ops::ActivationKernel<paddle::platform::CPUPlace, float,
+                                             ops::ReciprocalFunctor<float>>);
+REGISTER_OP_CPU_KERNEL(
+    reciprocal_grad,
+    ops::ActivationGradKernel<paddle::platform::CPUPlace, float,
+                              ops::ReciprocalGradFunctor<float>>);
+
+REGISTER_OP(log, ops::ActivationOp, ops::LogOpMaker, log_grad,
+            ops::ActivationOpGrad);
+REGISTER_OP_CPU_KERNEL(
+    log,
+    ops::ActivationKernel<paddle::platform::CPUPlace, float, ops::LogFunctor>);
+REGISTER_OP_CPU_KERNEL(
+    log_grad, ops::ActivationGradKernel<paddle::platform::CPUPlace, float,
+                                        ops::LogGradFunctor<float>>);
+
+REGISTER_OP(square, ops::ActivationOp, ops::SquareOpMaker, square_grad,
+            ops::ActivationOpGrad);
+REGISTER_OP_CPU_KERNEL(square,
+                       ops::ActivationKernel<paddle::platform::CPUPlace, float,
+                                             ops::SquareFunctor>);
+REGISTER_OP_CPU_KERNEL(
+    square_grad, ops::ActivationGradKernel<paddle::platform::CPUPlace, float,
+                                           ops::SquareGradFunctor<float>>);
+
+REGISTER_OP(brelu, ops::ActivationOp, ops::BReluOpMaker<float>, brelu_grad,
+            ops::ActivationOpGrad);
+REGISTER_OP_CPU_KERNEL(brelu,
+                       ops::BReluKernel<paddle::platform::CPUPlace, float>);
+REGISTER_OP_CPU_KERNEL(brelu_grad,
+                       ops::BReluGradKernel<paddle::platform::CPUPlace, float>);
+
+REGISTER_OP(soft_relu, ops::ActivationOp, ops::SoftReluOpMaker<float>,
+            soft_relu_grad, ops::ActivationOpGrad);
+REGISTER_OP_CPU_KERNEL(soft_relu,
+                       ops::SoftReluKernel<paddle::platform::CPUPlace, float>);
+REGISTER_OP_CPU_KERNEL(
+    soft_relu_grad, ops::SoftReluGradKernel<paddle::platform::CPUPlace, float>);
+
+REGISTER_OP(pow, ops::ActivationOp, ops::PowOpMaker<float>, pow_grad,
+            ops::ActivationOpGrad);
+REGISTER_OP_CPU_KERNEL(pow, ops::PowKernel<paddle::platform::CPUPlace, float>);
+REGISTER_OP_CPU_KERNEL(pow_grad,
+                       ops::PowGradKernel<paddle::platform::CPUPlace, float>);
+
+REGISTER_OP(stanh, ops::ActivationOp, ops::STanhOpMaker<float>, stanh_grad,
+            ops::ActivationOpGrad);
+REGISTER_OP_CPU_KERNEL(stanh,
+                       ops::STanhKernel<paddle::platform::CPUPlace, float>);
+REGISTER_OP_CPU_KERNEL(stanh_grad,
+                       ops::STanhGradKernel<paddle::platform::CPUPlace, float>);
diff --git a/paddle/operators/activation_op.cu b/paddle/operators/activation_op.cu
index 3b2c147f466c391ef84547ef65353c06861ef68c..112b33d22535f2fb6bada6e3edca3a15222eefd8 100644
--- a/paddle/operators/activation_op.cu
+++ b/paddle/operators/activation_op.cu
@@ -36,3 +36,85 @@ REGISTER_OP_GPU_KERNEL(relu,
 REGISTER_OP_GPU_KERNEL(
     relu_grad, ops::ActivationGradKernel<paddle::platform::GPUPlace, float,
                                          ops::ReluGradFunctor<float>>);
+
+REGISTER_OP(tanh, ops::ActivationOp, ops::TanhOpMaker, tanh_grad,
+            ops::ActivationOpGrad);
+REGISTER_OP_GPU_KERNEL(tanh,
+                       ops::ActivationKernel<paddle::platform::GPUPlace, float,
+                                             ops::TanhFunctor<float>>);
+REGISTER_OP_GPU_KERNEL(
+    tanh_grad, ops::ActivationGradKernel<paddle::platform::GPUPlace, float,
+                                         ops::TanhGradFunctor<float>>);
+
+REGISTER_OP(sqrt, ops::ActivationOp, ops::SqrtOpMaker, sqrt_grad,
+            ops::ActivationOpGrad);
+REGISTER_OP_GPU_KERNEL(sqrt,
+                       ops::ActivationKernel<paddle::platform::GPUPlace, float,
+                                             ops::SqrtFunctor<float>>);
+REGISTER_OP_GPU_KERNEL(
+    sqrt_grad, ops::ActivationGradKernel<paddle::platform::GPUPlace, float,
+                                         ops::SqrtGradFunctor<float>>);
+
+REGISTER_OP(abs, ops::ActivationOp, ops::AbsOpMaker, abs_grad,
+            ops::ActivationOpGrad);
+REGISTER_OP_GPU_KERNEL(abs,
+                       ops::ActivationKernel<paddle::platform::GPUPlace, float,
+                                             ops::AbsFunctor<float>>);
+REGISTER_OP_GPU_KERNEL(
+    abs_grad, ops::ActivationGradKernel<paddle::platform::GPUPlace, float,
+                                        ops::AbsGradFunctor<float>>);
+
+REGISTER_OP(reciprocal, ops::ActivationOp, ops::ReciprocalOpMaker,
+            reciprocal_grad, ops::ActivationOpGrad);
+REGISTER_OP_GPU_KERNEL(reciprocal,
+                       ops::ActivationKernel<paddle::platform::GPUPlace, float,
+                                             ops::ReciprocalFunctor<float>>);
+REGISTER_OP_GPU_KERNEL(
+    reciprocal_grad,
+    ops::ActivationGradKernel<paddle::platform::GPUPlace, float,
+                              ops::ReciprocalGradFunctor<float>>);
+
+REGISTER_OP(log, ops::ActivationOp, ops::LogOpMaker, log_grad,
+            ops::ActivationOpGrad);
+REGISTER_OP_GPU_KERNEL(log,
+                       ops::ActivationKernel<paddle::platform::GPUPlace, float,
+                                             ops::LogFunctor<float>>);
+REGISTER_OP_GPU_KERNEL(
+    log_grad, ops::ActivationGradKernel<paddle::platform::GPUPlace, float,
+                                        ops::LogGradFunctor<float>>);
+
+REGISTER_OP(square, ops::ActivationOp, ops::SquareOpMaker, square_grad,
+            ops::ActivationOpGrad);
+REGISTER_OP_GPU_KERNEL(square,
+                       ops::ActivationKernel<paddle::platform::GPUPlace, float,
+                                             ops::squareFunctor<float>>);
+REGISTER_OP_GPU_KERNEL(
+    square_grad, ops::ActivationGradKernel<paddle::platform::GPUPlace, float,
+                                           ops::SquareGradFunctor<float>>);
+
+REGISTER_OP(brelu, ops::ActivationOp, ops::BReluOpMaker<float>, brelu_grad,
+            ops::ActivationOpGrad);
+REGISTER_OP_GPU_KERNEL(brelu,
+                       ops::BReluKernel<paddle::platform::GPUPlace, float>);
+REGISTER_OP_GPU_KERNEL(brelu_grad,
+                       ops::BReluGradKernel<paddle::platform::GPUPlace, float>);
+
+REGISTER_OP(soft_relu, ops::ActivationOp, ops::SoftReluOpMaker<float>,
+            soft_relu_grad, ops::ActivationOpGrad);
+REGISTER_OP_GPU_KERNEL(soft_relu,
+                       ops::SoftReluKernel<paddle::platform::GPUPlace, float>);
+REGISTER_OP_GPU_KERNEL(
+    soft_relu_grad, ops::SoftReluGradKernel<paddle::platform::GPUPlace, float>);
+
+REGISTER_OP(pow, ops::ActivationOp, ops::PowOpMaker<float>, pow_grad,
+            ops::ActivationOpGrad);
+REGISTER_OP_GPU_KERNEL(pow, ops::PowKernel<paddle::platform::GPUPlace, float>);
+REGISTER_OP_GPU_KERNEL(pow_grad,
+                       ops::PowGradKernel<paddle::platform::GPUPlace, float>);
+
+REGISTER_OP(stanh, ops::ActivationOp, ops::STanhOpMaker<float>, stanh_grad,
+            ops::ActivationOpGrad);
+REGISTER_OP_GPU_KERNEL(stanh,
+                       ops::STanhKernel<paddle::platform::GPUPlace, float>);
+REGISTER_OP_GPU_KERNEL(stanh_grad,
+                       ops::STanhGradKernel<paddle::platform::GPUPlace, float>);
\ No newline at end of file
diff --git a/paddle/operators/activation_op.h b/paddle/operators/activation_op.h
index 9bf340f2ed46a8e7b64a15ea23a26feb68460c63..15f8afb4ba45cc989fe7576b82b8bf853b1df7de 100644
--- a/paddle/operators/activation_op.h
+++ b/paddle/operators/activation_op.h
@@ -55,19 +55,20 @@ class ActivationGradKernel : public framework::OpKernel {
   }
 };
 
-// sigmoid = 1 / (1 + exp(-x)
+// sigmoid(x) = 1 / (1 + exp(-x))
 template <typename T>
 struct SigmoidFunctor {
   template <typename Device, typename X, typename Y>
   void operator()(Device d, X x, Y y) {
-    y.device(d) = 1. / (1. + (-x).exp());
+    y.device(d) = static_cast<T>(1) / (static_cast<T>(1) + (-x).exp());
   }
 };
 
+template <typename T>
 struct SigmoidGradFunctor {
   template <typename Device, typename X, typename Y, typename dY, typename dX>
   void operator()(Device d, X x, Y y, dY dy, dX dx) {
-    dx.device(d) = dy * y * (1. - y);
+    dx.device(d) = dy * y * (static_cast<T>(1) - y);
   }
 };
 
@@ -103,7 +104,7 @@ struct ReluGradFunctor {
   }
 };
 
-// tanh = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
+// tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
 struct TanhFunctor {
   template <typename Device, typename X, typename Y>
   void operator()(Device d, X x, Y y) {
@@ -115,7 +116,7 @@ template <typename T>
 struct TanhGradFunctor {
   template <typename Device, typename X, typename Y, typename dY, typename dX>
   void operator()(Device d, X x, Y y, dY dy, dX dx) {
-    dx.device(d) = dy * (T(1) - y * y);
+    dx.device(d) = dy * (static_cast<T>(1) - y * y);
   }
 };
 
@@ -131,7 +132,7 @@ template <typename T>
 struct SqrtGradFunctor {
   template <typename Device, typename X, typename Y, typename dY, typename dX>
   void operator()(Device d, X x, Y y, dY dy, dX dx) {
-    const T y_conj = Eigen::numext::conj(y);
+    const Y y_conj = Eigen::numext::conj(y);
     dx.device(d) = static_cast<T>(0.5) * dy / y_conj;
   }
 };
@@ -144,19 +145,27 @@ struct AbsFunctor {
   }
 };
 
+struct AbsGradFunctor {
+  template <typename Device, typename X, typename Y, typename dY, typename dX>
+  void operator()(Device d, X x, Y y, dY dy, dX dx) {
+    dx.device(d) = dy * x.sign();
+  }
+};
+
 // reciprocal(x) = 1 / x
 template <typename T>
 struct ReciprocalFunctor {
   template <typename Device, typename X, typename Y>
   void operator()(Device d, X x, Y y) {
-    y.device(d) = 1. / x;
+    y.device(d) = static_cast<T>(1) / x;
   }
 };
 
+template <typename T>
 struct ReciprocalGradFunctor {
   template <typename Device, typename X, typename Y, typename dY, typename dX>
   void operator()(Device d, X x, Y y, dY dy, dX dx) {
-    dx.device(d) = dy * (-1.0) * y * y;
+    dx.device(d) = dy * static_cast<T>(-1) * y * y;
   }
 };
 
@@ -168,10 +177,11 @@ struct LogFunctor {
   }
 };
 
+template <typename T>
 struct LogGradFunctor {
   template <typename Device, typename X, typename Y, typename dY, typename dX>
   void operator()(Device d, X x, Y y, dY dy, dX dx) {
-    dx.device(d) = dy * (1. / x);
+    dx.device(d) = dy * (static_cast<T>(1) / x);
   }
 };
 
@@ -181,12 +191,161 @@ struct SquareFunctor {
   void operator()(Device d, X x, Y y) {
     y.device(d) = x.square();
   }
-}
+};
 
+template <typename T>
 struct SquareGradFunctor {
   template <typename Device, typename X, typename Y, typename dY, typename dX>
   void operator()(Device d, X x, Y y, dY dy, dX dx) {
-    dx.device(d) = dy * 2 * x;
+    dx.device(d) = dy * static_cast<T>(2) * x;
+  }
+};
+
+template <typename Place, typename T, typename AttrType = T>
+class BReluKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* X = context.Input<framework::Tensor>("X");
+    auto* Y = context.Output<framework::Tensor>("Y");
+    auto t_min = static_cast<T>(context.Attr<AttrType>("t_min"));
+    auto t_max = static_cast<T>(context.Attr<AttrType>("t_max"));
+    Y->mutable_data<T>(context.GetPlace());
+
+    auto x = framework::EigenVector<T>::Flatten(*X);
+    auto y = framework::EigenVector<T>::Flatten(*Y);
+    auto place = context.GetEigenDevice<Place>();
+    y.device(place) = x.cwiseMax(t_min).cwiseMin(t_max);
+  }
+};
+
+template <typename Place, typename T, typename AttrType = T>
+class BReluGradKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* X = context.Input<framework::Tensor>("X");
+    auto* dY = context.Input<framework::Tensor>(framework::GradVarName("Y"));
+    auto* dX = context.Output<framework::Tensor>(framework::GradVarName("X"));
+    auto t_min = static_cast<T>(context.Attr<AttrType>("t_min"));
+    auto t_max = static_cast<T>(context.Attr<AttrType>("t_max"));
+    dX->mutable_data<T>(context.GetPlace());
+
+    auto dy = framework::EigenVector<T>::Flatten(*dY);
+    auto x = framework::EigenVector<T>::Flatten(*X);
+    auto dx = framework::EigenVector<T>::Flatten(*dX);
+    auto place = context.GetEigenDevice<Place>();
+
+    dx.device(place) = dy * ((x > t_min) * (x < t_max)).template cast<T>();
+  }
+};
+
+template <typename Place, typename T, typename AttrType = T>
+class SoftReluKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* X = context.Input<framework::Tensor>("X");
+    auto* Y = context.Output<framework::Tensor>("Y");
+    auto threshold = static_cast<T>(context.Attr<AttrType>("threshold"));
+    Y->mutable_data<T>(context.GetPlace());
+
+    auto x = framework::EigenVector<T>::Flatten(*X);
+    auto y = framework::EigenVector<T>::Flatten(*Y);
+    auto place = context.GetEigenDevice<Place>();
+    auto temp = x.cwiseMax(-threshold).cwiseMin(threshold).eval();
+    y.device(place) = (static_cast<T>(1) + temp.exp()).log();
+  }
+};
+
+template <typename Place, typename T, typename AttrType = T>
+class SoftReluGradKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* X = context.Input<framework::Tensor>("X");
+    auto* Y = context.Input<framework::Tensor>("Y");
+    auto* dY = context.Input<framework::Tensor>(framework::GradVarName("Y"));
+    auto* dX = context.Output<framework::Tensor>(framework::GradVarName("X"));
+    auto threshold = static_cast<T>(context.Attr<AttrType>("threshold"));
+    dX->mutable_data<T>(context.GetPlace());
+
+    auto x = framework::EigenVector<T>::Flatten(*X);
+    auto y = framework::EigenVector<T>::Flatten(*Y);
+    auto dy = framework::EigenVector<T>::Flatten(*dY);
+    auto dx = framework::EigenVector<T>::Flatten(*dX);
+    auto place = context.GetEigenDevice<Place>();
+    auto temp = ((x > -threshold) * (x < threshold)).template cast<T>().eval();
+    dx.device(place) = dy * (static_cast<T>(1) - (-y).exp()) * temp;
+  }
+};
+
+template <typename Place, typename T, typename AttrType = T>
+class PowKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* X = context.Input<framework::Tensor>("X");
+    auto* Y = context.Output<framework::Tensor>("Y");
+    auto factor = static_cast<T>(context.Attr<AttrType>("factor"));
+    Y->mutable_data<T>(context.GetPlace());
+
+    auto x = framework::EigenVector<T>::Flatten(*X);
+    auto y = framework::EigenVector<T>::Flatten(*Y);
+    auto place = context.GetEigenDevice<Place>();
+    y.device(place) = x.pow(factor);
+  }
+};
+
+template <typename Place, typename T, typename AttrType = T>
+class PowGradKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* X = context.Input<framework::Tensor>("X");
+    auto* dY = context.Input<framework::Tensor>(framework::GradVarName("Y"));
+    auto* dX = context.Output<framework::Tensor>(framework::GradVarName("X"));
+    auto factor = static_cast<T>(context.Attr<AttrType>("factor"));
+    dX->mutable_data<T>(context.GetPlace());
+
+    auto dy = framework::EigenVector<T>::Flatten(*dY);
+    auto x = framework::EigenVector<T>::Flatten(*X);
+    auto dx = framework::EigenVector<T>::Flatten(*dX);
+    auto place = context.GetEigenDevice<Place>();
+
+    dx.device(place) = dy * factor * x.pow(factor - static_cast<T>(1));
+  }
+};
+
+template <typename Place, typename T, typename AttrType = T>
+class STanhKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* X = context.Input<framework::Tensor>("X");
+    auto* Y = context.Output<framework::Tensor>("Y");
+    auto scale_a = static_cast<T>(context.Attr<AttrType>("scale_a"));
+    auto scale_b = static_cast<T>(context.Attr<AttrType>("scale_b"));
+    Y->mutable_data<T>(context.GetPlace());
+
+    auto x = framework::EigenVector<T>::Flatten(*X);
+    auto y = framework::EigenVector<T>::Flatten(*Y);
+    auto place = context.GetEigenDevice<Place>();
+    y.device(place) = scale_b * (scale_a * x).tanh();
+  }
+};
+
+template <typename Place, typename T, typename AttrType = T>
+class STanhGradKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* X = context.Input<framework::Tensor>("X");
+    auto* dY = context.Input<framework::Tensor>(framework::GradVarName("Y"));
+    auto* dX = context.Output<framework::Tensor>(framework::GradVarName("X"));
+    auto scale_a = static_cast<T>(context.Attr<AttrType>("scale_a"));
+    auto scale_b = static_cast<T>(context.Attr<AttrType>("scale_b"));
+    dX->mutable_data<T>(context.GetPlace());
+
+    auto dy = framework::EigenVector<T>::Flatten(*dY);
+    auto x = framework::EigenVector<T>::Flatten(*X);
+    auto dx = framework::EigenVector<T>::Flatten(*dX);
+    auto place = context.GetEigenDevice<Place>();
+
+    auto temp = (scale_a * x).tanh() * (scale_a * x).tanh();
+    dx.device(place) = dy * scale_a * scale_b * (static_cast<T>(1) - temp);
   }
 };
 
diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc
index bd964c5d0797f03a173e6d869603c3a0a2616af0..28195b1b0acbb77e051522e27000b72a13bd649e 100644
--- a/paddle/pybind/pybind.cc
+++ b/paddle/pybind/pybind.cc
@@ -55,8 +55,6 @@ USE_OP(squared_l2_distance);
 USE_OP(sum);
 USE_OP(reshape);
 USE_OP(sigmoid);
-USE_OP(exp);
-USE_OP(relu);
 
 namespace paddle {
 namespace framework {
diff --git a/python/paddle/v2/framework/tests/op_test.py b/python/paddle/v2/framework/tests/op_test.py
index 4fec4c9109bf247abb2068177583acb47a8ebd97..899d3ae991e5f18a26703ed1e92dacd668fc0271 100644
--- a/python/paddle/v2/framework/tests/op_test.py
+++ b/python/paddle/v2/framework/tests/op_test.py
@@ -196,7 +196,7 @@ class OpTest(unittest.TestCase):
                 self.assertTrue(
                     np.allclose(
                         actual, expect, atol=1e-05),
-                    "output name: " + out_name + "has diff")
+                    "output name: " + out_name + " has diff")
 
     def check_output(self):
         places = [core.CPUPlace()]
diff --git a/python/paddle/v2/framework/tests/test_activation_op.py b/python/paddle/v2/framework/tests/test_activation_op.py
index 23ff58439646f64b80dacf6fb5d3fa3558ca0435..7cd39dfe9163a68defa00d85c25761d99e48e9bb 100644
--- a/python/paddle/v2/framework/tests/test_activation_op.py
+++ b/python/paddle/v2/framework/tests/test_activation_op.py
@@ -21,7 +21,9 @@ class TestExp(OpTest):
 class TestRelu(OpTest):
     def setUp(self):
         self.op_type = "relu"
-        self.inputs = {'X': np.random.uniform(-1, 1, [4, 4]).astype("float32")}
+        x = np.random.uniform(-1, 1, [11, 17]).astype("float32")
+        x = np.sign(x) * np.exp(np.abs(x))
+        self.inputs = {'X': x}
         self.outputs = {'Y': np.maximum(self.inputs['X'], 0)}
 
     def test_check_output(self):
@@ -42,6 +44,167 @@ class TestSigmoid(OpTest):
     def test_check_output(self):
         self.check_output()
 
+    def test_check_grad(self):
+        self.check_grad(['X'], 'Y', max_relative_error=0.008)
+
+
+class TestTanh(OpTest):
+    def setUp(self):
+        self.op_type = "tanh"
+        self.inputs = {
+            'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32")
+        }
+        self.outputs = {'Y': np.tanh(self.inputs['X'])}
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad(self):
+        self.check_grad(['X'], 'Y', max_relative_error=0.007)
+
+
+class TestSqrt(OpTest):
+    def setUp(self):
+        self.op_type = "sqrt"
+        self.inputs = {
+            'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32")
+        }
+        self.outputs = {'Y': np.sqrt(self.inputs['X'])}
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad(self):
+        self.check_grad(['X'], 'Y', max_relative_error=0.007)
+
+
+class TestAbs(OpTest):
+    def setUp(self):
+        self.op_type = "abs"
+        x = np.random.uniform(-1, 1, [11, 17]).astype("float32")
+        x = np.sign(x) * np.exp(np.abs(x))
+        self.inputs = {'X': x}
+        self.outputs = {'Y': np.abs(self.inputs['X'])}
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad(self):
+        self.check_grad(['X'], 'Y', max_relative_error=0.007)
+
+
+class TestReciprocal(OpTest):
+    def setUp(self):
+        self.op_type = "reciprocal"
+        self.inputs = {'X': np.random.uniform(1, 2, [11, 17]).astype("float32")}
+        self.outputs = {'Y': np.reciprocal(self.inputs['X'])}
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad(self):
+        self.check_grad(['X'], 'Y', max_relative_error=0.01)
+
+
+class TestLog(OpTest):
+    def setUp(self):
+        self.op_type = "log"
+        self.inputs = {
+            'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32")
+        }
+        self.outputs = {'Y': np.log(self.inputs['X'])}
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad(self):
+        self.check_grad(['X'], 'Y', max_relative_error=0.007)
+
+
+class TestSquare(OpTest):
+    def setUp(self):
+        self.op_type = "square"
+        self.inputs = {
+            'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32")
+        }
+        self.outputs = {'Y': np.square(self.inputs['X'])}
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad(self):
+        self.check_grad(['X'], 'Y', max_relative_error=0.007)
+
+
+class TestBRelu(OpTest):
+    def setUp(self):
+        self.op_type = "brelu"
+        x = np.random.uniform(-1, 1, [4, 4]).astype("float32")
+        x = 2 * np.sign(x) * np.exp(np.abs(x))
+        self.inputs = {'X': x}
+        t_min = 0
+        t_max = 4
+        self.attrs = {'t_min': t_min, 't_max': t_max}
+        t = np.copy(x)
+        t[t < t_min] = t_min
+        t[t > t_max] = t_max
+        self.outputs = {'Y': t}
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad(self):
+        self.check_grad(['X'], 'Y', max_relative_error=0.02)
+
+
+class TestSoftRelu(OpTest):
+    def setUp(self):
+        self.op_type = "soft_relu"
+        x = np.random.uniform(-1, 1, [4, 4]).astype("float32")
+        x = 2 * np.sign(x) * np.exp(np.abs(x))
+        self.inputs = {'X': x}
+        threshold = 4
+        self.attrs = {'threshold': threshold}
+        t = np.copy(x)
+        t[t < -threshold] = -threshold
+        t[t > threshold] = threshold
+        self.outputs = {'Y': np.log((np.exp(t) + 1))}
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad(self):
+        self.check_grad(['X'], 'Y', max_relative_error=0.02)
+
+
+class TestPow(OpTest):
+    def setUp(self):
+        self.op_type = "pow"
+        self.inputs = {'X': np.random.uniform(1, 2, [11, 17]).astype("float32")}
+        self.attrs = {'factor': 3}
+        self.outputs = {'Y': np.power(self.inputs['X'], 3)}
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad(self):
+        self.check_grad(['X'], 'Y', max_relative_error=0.02)
+
+
+class TestSTanh(OpTest):
+    def setUp(self):
+        self.op_type = "stanh"
+        self.inputs = {
+            'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32")
+        }
+        scale_a = 2.0 / 3.0
+        scale_b = 1.7159
+        self.attrs = {'scale_a': scale_a, 'scale_b': scale_b}
+        self.outputs = {'Y': scale_b * np.tanh(self.inputs['X'] * scale_a)}
+
+    def test_check_output(self):
+        self.check_output()
+
     def test_check_grad(self):
         self.check_grad(['X'], 'Y', max_relative_error=0.007)