diff --git a/paddle/framework/init.cc b/paddle/framework/init.cc
index 682cff168d4d31e0565fc987604f97a671566fbd..3bea8f3d0a31a54ef9eebe5731696b64ceed13f8 100644
--- a/paddle/framework/init.cc
+++ b/paddle/framework/init.cc
@@ -75,5 +75,10 @@ bool InitDevices(const std::vector<std::string> &devices) {
   return true;
 }
 
+void InitGLOG(const std::string &prog_name) {
+  google::InitGoogleLogging(prog_name.c_str());
+  google::InstallFailureSignalHandler();
+}
+
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/framework/init.h b/paddle/framework/init.h
index 33907f9eb00fb3469b53dcf8151557cc7a2d3791..9c84a03ded52632047841f95badbcf44bc9f48d1 100644
--- a/paddle/framework/init.h
+++ b/paddle/framework/init.h
@@ -22,6 +22,8 @@ namespace framework {
 
 void InitGflags(std::vector<std::string> &argv);
 
+void InitGLOG(const std::string &prog_name);
+
 bool InitDevices(const std::vector<std::string> &devices);
 
 }  // namespace framework
diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc
index 7b6dc09bdb5535488c8c4dbc71c9cd6a7998bd0b..92b3d7fccd1539b033684e33cb6e584b2167fa77 100644
--- a/paddle/framework/lod_tensor.cc
+++ b/paddle/framework/lod_tensor.cc
@@ -177,6 +177,9 @@ void AppendLoD(LoD *lod, const LoD &lod_length) {
       lod->empty() || lod->size() == lod_length.size(),
       "The lod_length should has the same size with the appended lod.");
   if (lod->empty()) {
+    for (size_t i = 0; i < lod_length.size(); ++i) {
+      lod->emplace_back(1, 0);  // size = 1, value = 0;
+    }
     *lod = LoD(lod_length.size(), std::vector<size_t>({0}));
   }
   for (size_t i = 0; i < lod->size(); ++i) {
diff --git a/paddle/framework/threadpool.h b/paddle/framework/threadpool.h
index c644e7d2966555ce45f81078c91ce419f3d20d0e..31c12e9e1f7029767300464987f4d39c3e48a3a3 100644
--- a/paddle/framework/threadpool.h
+++ b/paddle/framework/threadpool.h
@@ -29,7 +29,6 @@ namespace framework {
 class ThreadPool {
  public:
   typedef std::packaged_task<void()> Task;
-  typedef std::function<void()> Fun;
 
   /**
    * @brief   Get a instance of threadpool, the thread number will
@@ -67,7 +66,8 @@ class ThreadPool {
    * @return    std::future<void>, we could wait for the task finished by
    *            f.wait().
    */
-  std::future<void> Run(const Fun& fn) {
+  template <typename Callback>
+  std::future<void> Run(Callback fn) {
     std::unique_lock<std::mutex> lock(mutex_);
     Task task(std::bind(fn));
     std::future<void> f = task.get_future();
@@ -162,10 +162,9 @@ class ThreadPool {
 // Run a function asynchronously.
 // NOTE: The function must return void. If the function need to return a value,
 // you can use lambda to capture a value pointer.
-template <typename Callback, typename ARGS>
-std::future<void> Async(Callback callback, ARGS... args) {
-  return ThreadPool::GetInstance()->Run(
-      [&] { callback(std::forward(args)...); });
+template <typename Callback>
+std::future<void> Async(Callback callback) {
+  return ThreadPool::GetInstance()->Run(callback);
 };
 
 }  // namespace framework
diff --git a/paddle/operators/activation_op.h b/paddle/operators/activation_op.h
index 0885f7c570b9b52dc51597347295734fd689da8d..88c3d1c597a853abdee7753a5110be4a1726e905 100644
--- a/paddle/operators/activation_op.h
+++ b/paddle/operators/activation_op.h
@@ -15,6 +15,7 @@ limitations under the License. */
 #pragma once
 #include "paddle/framework/eigen.h"
 #include "paddle/framework/op_registry.h"
+#include "paddle/operators/detail/safe_ref.h"
 
 namespace paddle {
 namespace operators {
@@ -26,12 +27,16 @@ class ActivationKernel
   using T = typename Functor::ELEMENT_TYPE;
 
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* X = context.Input<framework::Tensor>("X");
-    auto* Out = context.Output<framework::Tensor>("Out");
-    Out->mutable_data<T>(context.GetPlace());
-
-    auto x = framework::EigenVector<T>::Flatten(*X);
-    auto out = framework::EigenVector<T>::Flatten(*Out);
+    auto& X = detail::Ref(context.Input<framework::Tensor>("X"),
+                          "Cannot get input tensor X, variable name = %s",
+                          context.op().Input("X"));
+
+    auto& Out = detail::Ref(context.Output<framework::Tensor>("Out"),
+                            "Cannot get output tensor Out, variable name = %s",
+                            context.op().Output("Out"));
+    Out.mutable_data<T>(context.GetPlace());
+    auto x = framework::EigenVector<T>::Flatten(X);
+    auto out = framework::EigenVector<T>::Flatten(Out);
     auto* place =
         context.template device_context<DeviceContext>().eigen_device();
     Functor functor;
diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc
index 04485ce7c1ab87f8655b0e6cbaecc36b3382f647..364db62cba6acd7ac380b5017d09f22eefa24813 100644
--- a/paddle/pybind/pybind.cc
+++ b/paddle/pybind/pybind.cc
@@ -427,6 +427,7 @@ All parameter, weight, gradient are variables in Paddle.
 
   m.def("unique_integer", UniqueIntegerGenerator);
   m.def("init_gflags", framework::InitGflags);
+  m.def("init_glog", framework::InitGLOG);
   m.def("init_devices", &framework::InitDevices);
 
   m.def("is_compile_gpu", IsCompileGPU);
diff --git a/python/paddle/v2/fluid/__init__.py b/python/paddle/v2/fluid/__init__.py
index 225b41c5043b5792abb90bbad53cbbfce9a3156e..e1155296650cc7a30fc0041fc745a1f1d812d77e 100644
--- a/python/paddle/v2/fluid/__init__.py
+++ b/python/paddle/v2/fluid/__init__.py
@@ -27,7 +27,7 @@ __all__ = framework.__all__ + executor.__all__ + [
 ]
 
 
-def __read_gflags_from_env__():
+def __bootstrap__():
     """
     Enable reading gflags from environment variables.
 
@@ -41,6 +41,7 @@ def __read_gflags_from_env__():
         read_env_flags.append('fraction_of_gpu_memory_to_use')
     core.init_gflags([sys.argv[0]] +
                      ["--tryfromenv=" + ",".join(read_env_flags)])
+    core.init_glog(sys.argv[0])
 
     if core.is_compile_gpu():
         core.init_devices(["CPU", "GPU:0"])
@@ -48,4 +49,4 @@ def __read_gflags_from_env__():
         core.init_devices(["CPU"])
 
 
-__read_gflags_from_env__()
+__bootstrap__()
diff --git a/python/paddle/v2/fluid/layers/ops.py b/python/paddle/v2/fluid/layers/ops.py
index d2ff6841a317aaf6903edadc9213f69ef6c41216..23fe13f9bbf3e81802ac86415472e6aa603711b1 100644
--- a/python/paddle/v2/fluid/layers/ops.py
+++ b/python/paddle/v2/fluid/layers/ops.py
@@ -1,9 +1,24 @@
 from ..registry import register_layer
-__all__ = [
-    'mean', 'mul', 'dropout', 'reshape', 'sigmoid', 'scale', 'transpose',
-    'sigmoid_cross_entropy_with_logits', 'elementwise_add', 'elementwise_div',
-    'elementwise_sub', 'elementwise_mul', 'clip', 'abs', 'sequence_softmax'
+
+__activations__ = [
+    'abs', 'tanh', 'sigmoid', 'relu', 'sqrt', 'ceil', 'floor', 'log', 'round'
 ]
 
+__all__ = [
+    'mean',
+    'mul',
+    'dropout',
+    'reshape',
+    'scale',
+    'transpose',
+    'sigmoid_cross_entropy_with_logits',
+    'elementwise_add',
+    'elementwise_div',
+    'elementwise_sub',
+    'elementwise_mul',
+    'clip',
+    'sequence_softmax',
+] + __activations__
+
 for _OP in set(__all__):
     globals()[_OP] = register_layer(_OP)