Merge branch 'develop' of github.com:baidu/Paddle into feature/async_drop_kid

24181fdc · Yang Yu · 8ae84a57 · cd5fad13 · 24181fdc · 24181fdc
8 changed file
--- a/paddle/framework/init.cc
+++ b/paddle/framework/init.cc
@@ -75,5 +75,10 @@ bool InitDevices(const std::vector<std::string> &devices) {
  return true;
 }
+void InitGLOG(const std::string &prog_name) {
+  google::InitGoogleLogging(prog_name.c_str());
+  google::InstallFailureSignalHandler();
+}
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/framework/init.h
+++ b/paddle/framework/init.h
@@ -22,6 +22,8 @@ namespace framework {
 void InitGflags(std::vector<std::string> &argv);
+void InitGLOG(const std::string &prog_name);
 bool InitDevices(const std::vector<std::string> &devices);
 }  // namespace framework

--- a/paddle/framework/lod_tensor.cc
+++ b/paddle/framework/lod_tensor.cc
@@ -177,6 +177,9 @@ void AppendLoD(LoD *lod, const LoD &lod_length) {
      lod->empty() || lod->size() == lod_length.size(),
      "The lod_length should has the same size with the appended lod.");
  if (lod->empty()) {
+    for (size_t i = 0; i < lod_length.size(); ++i) {
+      lod->emplace_back(1, 0);  // size = 1, value = 0;
+    }
    *lod = LoD(lod_length.size(), std::vector<size_t>({0}));
  }
  for (size_t i = 0; i < lod->size(); ++i) {

--- a/paddle/framework/threadpool.h
+++ b/paddle/framework/threadpool.h
@@ -29,7 +29,6 @@ namespace framework {
 class ThreadPool {
 public:
  typedef std::packaged_task<void()> Task;
-  typedef std::function<void()> Fun;
  /**
   * @brief   Get a instance of threadpool, the thread number will
@@ -67,7 +66,8 @@ class ThreadPool {
   * @return    std::future<void>, we could wait for the task finished by
   *            f.wait().
   */
-  std::future<void> Run(const Fun& fn) {
+  template <typename Callback>
+  std::future<void> Run(Callback fn) {
    std::unique_lock<std::mutex> lock(mutex_);
    Task task(std::bind(fn));
    std::future<void> f = task.get_future();
@@ -162,10 +162,9 @@ class ThreadPool {
 // Run a function asynchronously.
 // NOTE: The function must return void. If the function need to return a value,
 // you can use lambda to capture a value pointer.
-template <typename Callback, typename ARGS>
+template <typename Callback>
-std::future<void> Async(Callback callback, ARGS... args) {
+std::future<void> Async(Callback callback) {
-  return ThreadPool::GetInstance()->Run(
+  return ThreadPool::GetInstance()->Run(callback);
-      [&] { callback(std::forward(args)...); });
 };
 }  // namespace framework

--- a/paddle/operators/activation_op.h
+++ b/paddle/operators/activation_op.h
@@ -15,6 +15,7 @@ limitations under the License. */
 #pragma once
 #include "paddle/framework/eigen.h"
 #include "paddle/framework/op_registry.h"
+#include "paddle/operators/detail/safe_ref.h"
 namespace paddle {
 namespace operators {
@@ -26,12 +27,16 @@ class ActivationKernel
  using T = typename Functor::ELEMENT_TYPE;
  void Compute(const framework::ExecutionContext& context) const override {
-    auto* X = context.Input<framework::Tensor>("X");
+    auto& X = detail::Ref(context.Input<framework::Tensor>("X"),
-    auto* Out = context.Output<framework::Tensor>("Out");
+                          "Cannot get input tensor X, variable name = %s",
-    Out->mutable_data<T>(context.GetPlace());
+                          context.op().Input("X"));
-    auto x = framework::EigenVector<T>::Flatten(*X);
+    auto& Out = detail::Ref(context.Output<framework::Tensor>("Out"),
-    auto out = framework::EigenVector<T>::Flatten(*Out);
+                            "Cannot get output tensor Out, variable name = %s",
+                            context.op().Output("Out"));
+    Out.mutable_data<T>(context.GetPlace());
+    auto x = framework::EigenVector<T>::Flatten(X);
+    auto out = framework::EigenVector<T>::Flatten(Out);
    auto* place =
        context.template device_context<DeviceContext>().eigen_device();
    Functor functor;

--- a/paddle/pybind/pybind.cc
+++ b/paddle/pybind/pybind.cc
@@ -427,6 +427,7 @@ All parameter, weight, gradient are variables in Paddle.
  m.def("unique_integer", UniqueIntegerGenerator);
  m.def("init_gflags", framework::InitGflags);
+  m.def("init_glog", framework::InitGLOG);
  m.def("init_devices", &framework::InitDevices);
  m.def("is_compile_gpu", IsCompileGPU);

--- a/python/paddle/v2/fluid/__init__.py
+++ b/python/paddle/v2/fluid/__init__.py
@@ -27,7 +27,7 @@ __all__ = framework.__all__ + executor.__all__ + [
 ]
-def __read_gflags_from_env__():
+def __bootstrap__():
    """
    Enable reading gflags from environment variables.
@@ -41,6 +41,7 @@ def __read_gflags_from_env__():
        read_env_flags.append('fraction_of_gpu_memory_to_use')
    core.init_gflags([sys.argv[0]] +
                     ["--tryfromenv=" + ",".join(read_env_flags)])
+    core.init_glog(sys.argv[0])
    if core.is_compile_gpu():
        core.init_devices(["CPU", "GPU:0"])
@@ -48,4 +49,4 @@ def __read_gflags_from_env__():
        core.init_devices(["CPU"])
-__read_gflags_from_env__()
+__bootstrap__()
--- a/python/paddle/v2/fluid/layers/ops.py
+++ b/python/paddle/v2/fluid/layers/ops.py
 from ..registry import register_layer
-__all__ = [
-    'mean', 'mul', 'dropout', 'reshape', 'sigmoid', 'scale', 'transpose',
+__activations__ = [
-    'sigmoid_cross_entropy_with_logits', 'elementwise_add', 'elementwise_div',
+    'abs', 'tanh', 'sigmoid', 'relu', 'sqrt', 'ceil', 'floor', 'log', 'round'
-    'elementwise_sub', 'elementwise_mul', 'clip', 'abs', 'sequence_softmax'
 ]
+__all__ = [
+    'mean',
+    'mul',
+    'dropout',
+    'reshape',
+    'scale',
+    'transpose',
+    'sigmoid_cross_entropy_with_logits',
+    'elementwise_add',
+    'elementwise_div',
+    'elementwise_sub',
+    'elementwise_mul',
+    'clip',
+    'sequence_softmax',
+] + __activations__
 for _OP in set(__all__):
    globals()[_OP] = register_layer(_OP)