From be26b71b9eae07781838d422f4d10d1354f88286 Mon Sep 17 00:00:00 2001
From: Qiao Longfei <qiaolongfei@baidu.com>
Date: Tue, 22 May 2018 09:40:17 +0800
Subject: [PATCH] Add cpp trainer lib and demo (#10681)

add cpp trainer lib and demo
---
 paddle/fluid/train/demo/CMakeLists.txt  |  66 +++++++++++++++
 paddle/fluid/train/demo/README.md       |  66 +++++++++++++++
 paddle/fluid/train/demo/demo_network.py |  47 +++++++++++
 paddle/fluid/train/demo/demo_trainer.cc | 103 ++++++++++++++++++++++++
 4 files changed, 282 insertions(+)
 create mode 100644 paddle/fluid/train/demo/CMakeLists.txt
 create mode 100644 paddle/fluid/train/demo/README.md
 create mode 100644 paddle/fluid/train/demo/demo_network.py
 create mode 100644 paddle/fluid/train/demo/demo_trainer.cc

diff --git a/paddle/fluid/train/demo/CMakeLists.txt b/paddle/fluid/train/demo/CMakeLists.txt
new file mode 100644
index 000000000..78d6e5ff5
--- /dev/null
+++ b/paddle/fluid/train/demo/CMakeLists.txt
@@ -0,0 +1,66 @@
+cmake_minimum_required(VERSION 3.0)
+
+project(cpp_train_demo CXX C)
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+
+if(NOT DEFINED PADDLE_LIB)
+  message(FATAL_ERROR "please set PADDLE_LIB with -DPADDLE_LIB=/paddle/lib/dir")
+endif()
+
+option(WITH_MKLDNN     "Compile PaddlePaddle with MKLDNN"                                   OFF)
+option(WITH_MKL        "Compile PaddlePaddle with MKL support, default use openblas."       OFF)
+
+include_directories("${PADDLE_LIB}")
+include_directories("${PADDLE_LIB}/third_party/install/protobuf/include")
+include_directories("${PADDLE_LIB}/third_party/install/glog/include")
+include_directories("${PADDLE_LIB}/third_party/install/gflags/include")
+include_directories("${PADDLE_LIB}/third_party/install/snappy/include")
+include_directories("${PADDLE_LIB}/third_party/install/snappystream/include")
+include_directories("${PADDLE_LIB}/third_party/install/zlib/include")
+
+include_directories("${PADDLE_LIB}/third_party/boost")
+include_directories("${PADDLE_LIB}/third_party/eigen3")
+
+link_directories("${PADDLE_LIB}/third_party/install/snappy/lib")
+link_directories("${PADDLE_LIB}/third_party/install/snappystream/lib")
+link_directories("${PADDLE_LIB}/third_party/install/protobuf/lib")
+link_directories("${PADDLE_LIB}/third_party/install/glog/lib")
+link_directories("${PADDLE_LIB}/third_party/install/gflags/lib")
+link_directories("${PADDLE_LIB}/third_party/install/zlib/lib")
+
+add_executable(demo_trainer demo_trainer.cc)
+
+if(WITH_MKLDNN)
+  include_directories("${PADDLE_LIB}/third_party/install/mkldnn/include")
+  set(MKLDNN_LIB ${PADDLE_LIB}/third_party/install/mkldnn/lib/libmkldnn.so.0)
+endif()
+
+if(WITH_MKL)
+  include_directories("${PADDLE_LIB}/third_party/install/mklml/include")
+  set(MATH_LIB ${PADDLE_LIB}/third_party/install/mklml/lib/libmklml_intel.so)
+else()
+  if(APPLE)
+    set(MATH_LIB cblas)
+  else(APPLE)
+    set(MATH_LIB ${PADDLE_LIB}/third_party/install/openblas/lib/libopenblas.a)
+  endif(APPLE)
+endif()
+
+if(APPLE)
+  set(MACOS_LD_FLAGS "-undefined dynamic_lookup -Wl,-all_load -framework CoreFoundation -framework Security")
+else(APPLE)
+  set(ARCHIVE_START "-Wl,--whole-archive")
+  set(ARCHIVE_END "-Wl,--no-whole-archive")
+  set(EXTERNAL_LIB "-lrt -ldl -lpthread")
+endif(APPLE)
+
+target_link_libraries(demo_trainer
+        ${MACOS_LD_FLAGS}
+        ${ARCHIVE_START}
+        ${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid.a
+        ${ARCHIVE_END}
+        ${MATH_LIB}
+        ${MKLDNN_LIB}
+        glog gflags protobuf snappystream snappy z
+        ${EXTERNAL_LIB})
diff --git a/paddle/fluid/train/demo/README.md b/paddle/fluid/train/demo/README.md
new file mode 100644
index 000000000..fd80a77b0
--- /dev/null
+++ b/paddle/fluid/train/demo/README.md
@@ -0,0 +1,66 @@
+
+### step 1. build paddle lib
+
+```
+
+# WITH_MKL=ON|OFF
+# WITH_MKLDNN=ON|OFF
+
+PADDLE_LIB=/paddle/lib/dir
+cmake .. -DCMAKE_INSTALL_PREFIX=$PADDLE_LIB \
+         -DCMAKE_BUILD_TYPE=Release \
+         -DWITH_FLUID_ONLY=ON \
+         -DWITH_GPU=OFF \
+         -DWITH_STYLE_CHECK=OFF \
+         -DWITH_MKL=OFF \
+         -DWITH_MKLDNN=OFF
+make -j8
+make -j8 inference_lib_dist
+```
+
+### step 2. generate program desc
+```
+# please install paddle before run this scripe
+pip install --upgrade paddlepaddle-*.whl
+python demo_network.py
+```
+
+This will generate two program desc files:
+  - startup_program: used to init all parameters
+  - main_program: main logic of the network
+
+### step 3. build demo_trainer and run it.
+
+
+```
+# Make a build dir at the same dir of this README.md document.
+# The demo dir can be put anywhere.
+mkdir build
+cd build
+
+# WITH_MKL=ON|OFF
+# WITH_MKLDNN=ON|OFF
+PADDLE_LIB=/paddle/lib/dir
+
+# PADDLE_LIB is the same with CMAKE_INSTALL_PREFIX when building the lib
+cmake .. -DPADDLE_LIB=$PADDLE_LIB \
+         -DWITH_MKLDNN=OFF \
+         -DWITH_MKL=OFF
+make
+
+# copy startup_program and main_program to this dir
+cp ../startup_program .
+cp ../main_program .
+
+# run demo cpp trainer
+./demo_trainer
+
+```
+
+The output will be:
+```
+step: 0 loss: 1069.02
+step: 1 loss: 1069.02
+step: 2 loss: 1069.02
+....
+```
diff --git a/paddle/fluid/train/demo/demo_network.py b/paddle/fluid/train/demo/demo_network.py
new file mode 100644
index 000000000..41e98c6a2
--- /dev/null
+++ b/paddle/fluid/train/demo/demo_network.py
@@ -0,0 +1,47 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.fluid as fluid
+import paddle.fluid.framework as framework
+
+
+def train_network(with_optimize):
+    x = fluid.layers.data(name='x', shape=[13], dtype='float32')
+    y_predict = fluid.layers.fc(input=x, size=1, act=None)
+
+    y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+    cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+    avg_cost = fluid.layers.mean(cost)
+
+    if with_optimize:
+        sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.00001)
+        sgd_optimizer.minimize(avg_cost)
+    else:
+        fluid.backward.append_backward(avg_cost)
+
+
+def save_program_desc(network_func):
+    startup_program = framework.Program()
+    train_program = framework.Program()
+
+    with framework.program_guard(train_program, startup_program):
+        network_func(with_optimize=False)
+
+    with open("startup_program", "w") as f:
+        f.write(startup_program.desc.serialize_to_string())
+    with open("main_program", "w") as f:
+        f.write(train_program.desc.serialize_to_string())
+
+
+save_program_desc(train_network)
diff --git a/paddle/fluid/train/demo/demo_trainer.cc b/paddle/fluid/train/demo/demo_trainer.cc
new file mode 100644
index 000000000..813d83868
--- /dev/null
+++ b/paddle/fluid/train/demo/demo_trainer.cc
@@ -0,0 +1,103 @@
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fstream>
+
+#include "paddle/fluid/framework/executor.h"
+#include "paddle/fluid/framework/init.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/program_desc.h"
+#include "paddle/fluid/framework/tensor_util.h"
+#include "paddle/fluid/platform/device_context.h"
+#include "paddle/fluid/platform/place.h"
+
+namespace paddle {
+namespace train {
+
+void ReadBinaryFile(const std::string& filename, std::string* contents) {
+  std::ifstream fin(filename, std::ios::in | std::ios::binary);
+  PADDLE_ENFORCE(static_cast<bool>(fin), "Cannot open file %s", filename);
+  fin.seekg(0, std::ios::end);
+  contents->clear();
+  contents->resize(fin.tellg());
+  fin.seekg(0, std::ios::beg);
+  fin.read(&(contents->at(0)), contents->size());
+  fin.close();
+}
+
+std::unique_ptr<paddle::framework::ProgramDesc> Load(
+    paddle::framework::Executor* executor, const std::string& model_filename) {
+  VLOG(3) << "loading model from " << model_filename;
+  std::string program_desc_str;
+  ReadBinaryFile(model_filename, &program_desc_str);
+
+  std::unique_ptr<paddle::framework::ProgramDesc> main_program(
+      new paddle::framework::ProgramDesc(program_desc_str));
+  return main_program;
+}
+
+}  // namespace train
+}  // namespace paddle
+
+int main() {
+  paddle::framework::InitDevices(false);
+
+  const auto cpu_place = paddle::platform::CPUPlace();
+
+  paddle::framework::Executor executor(cpu_place);
+  paddle::framework::Scope scope;
+  auto startup_program = paddle::train::Load(&executor, "startup_program");
+  auto train_program = paddle::train::Load(&executor, "main_program");
+
+  std::string loss_name = "";
+  for (auto op_desc : train_program->Block(0).AllOps()) {
+    if (op_desc->Type() == "mean") {
+      loss_name = op_desc->Output("Out")[0];
+      break;
+    }
+  }
+
+  PADDLE_ENFORCE_NE(loss_name, "", "loss not found");
+
+  // init all parameters
+  executor.Run(*startup_program.get(), &scope, 0);
+
+  // prepare data
+  auto x_var = scope.Var("x");
+  auto x_tensor = x_var->GetMutable<paddle::framework::LoDTensor>();
+  x_tensor->Resize({2, 13});
+
+  auto x_data = x_tensor->mutable_data<float>(cpu_place);
+  for (int i = 0; i < 2 * 13; ++i) {
+    x_data[i] = static_cast<float>(i);
+  }
+
+  auto y_var = scope.Var("y");
+  auto y_tensor = y_var->GetMutable<paddle::framework::LoDTensor>();
+  y_tensor->Resize({2, 1});
+  auto y_data = y_tensor->mutable_data<float>(cpu_place);
+  for (int i = 0; i < 2 * 1; ++i) {
+    y_data[i] = static_cast<float>(i);
+  }
+
+  auto loss_var = scope.Var(loss_name);
+
+  for (int i = 0; i < 10; ++i) {
+    executor.Run(*train_program.get(), &scope, 0, false, true);
+    std::cout << "step: " << i << " loss: "
+              << loss_var->Get<paddle::framework::LoDTensor>().data<float>()[0]
+              << std::endl;
+  }
+  return 0;
+}
-- 
GitLab