From 36da52950d53fdcd6903913a627b5e622648f5f4 Mon Sep 17 00:00:00 2001
From: Siddharth Goyal <vi.siddharth78@gmail.com>
Date: Thu, 8 Feb 2018 13:46:33 -0800
Subject: [PATCH] Better version of PR #7985 (Modify load() for inference)
 (#8024)

* Refine load

* Address review comments: round 1

* Make API consistent with python-save/load

* Add another unit test

* Remove commented function

* Fix GPU bug

* Address review comments

* Modify wrt PR 8147

* Fix filenames for combined case

* Fix typo

* Address review comments: round 2

* Unify TestInference by keeping default param in template

* Address review comment

* Fix spacing
---
 paddle/inference/io.cc                        | 73 +++++++++++++++----
 paddle/inference/io.h                         |  8 +-
 paddle/inference/tests/book/test_helper.h     | 17 ++++-
 .../book/test_inference_recognize_digits.cc   | 42 +++++++++++
 python/paddle/v2/fluid/io.py                  | 12 ++-
 .../fluid/tests/book/test_recognize_digits.py | 44 +++++++----
 6 files changed, 159 insertions(+), 37 deletions(-)

diff --git a/paddle/inference/io.cc b/paddle/inference/io.cc
index 1ed14b69c83..784e87970f7 100644
--- a/paddle/inference/io.cc
+++ b/paddle/inference/io.cc
@@ -21,6 +21,17 @@ limitations under the License. */
 namespace paddle {
 namespace inference {
 
+void ReadBinaryFile(const std::string& filename, std::string& contents) {
+  VLOG(3) << "loading model from " << filename;
+  std::ifstream inputfs(filename, std::ios::in | std::ios::binary);
+  inputfs.seekg(0, std::ios::end);
+  contents.clear();
+  contents.resize(inputfs.tellg());
+  inputfs.seekg(0, std::ios::beg);
+  inputfs.read(&contents[0], contents.size());
+  inputfs.close();
+}
+
 bool IsParameter(const framework::VarDesc* var,
                  const framework::ProgramDesc& main_program) {
   if (var->Persistable()) {
@@ -44,12 +55,15 @@ bool IsParameter(const framework::VarDesc* var,
 
 void LoadPersistables(framework::Executor& executor,
                       framework::Scope& scope,
+                      const framework::ProgramDesc& main_program,
                       const std::string& dirname,
-                      const framework::ProgramDesc& main_program) {
+                      const std::string& param_filename) {
   const framework::BlockDesc& global_block = main_program.Block(0);
 
   framework::ProgramDesc* load_program = new framework::ProgramDesc();
   framework::BlockDesc* load_block = load_program->MutableBlock(0);
+  std::vector<std::string> paramlist;
+
   for (auto* var : global_block.AllVars()) {
     if (IsParameter(var, main_program)) {
       VLOG(3) << "parameter's name: " << var->Name();
@@ -61,15 +75,33 @@ void LoadPersistables(framework::Executor& executor,
       new_var->SetLoDLevel(var->GetLoDLevel());
       new_var->SetPersistable(true);
 
-      // append_op
-      framework::OpDesc* op = load_block->AppendOp();
-      op->SetType("load");
-      op->SetOutput("Out", {new_var->Name()});
-      op->SetAttr("file_path", {dirname + "/" + new_var->Name()});
-      op->CheckAttrs();
+      if (!param_filename.empty()) {
+        paramlist.push_back(new_var->Name());
+      } else {
+        // append_op
+        framework::OpDesc* op = load_block->AppendOp();
+        op->SetType("load");
+        op->SetOutput("Out", {new_var->Name()});
+        op->SetAttr("file_path", {dirname + "/" + new_var->Name()});
+        op->CheckAttrs();
+      }
     }
   }
+
+  if (!param_filename.empty()) {
+    // sort paramlist to have consistent ordering
+    std::sort(paramlist.begin(), paramlist.end());
+    // append just the load_combine op
+    framework::OpDesc* op = load_block->AppendOp();
+    op->SetType("load_combine");
+    op->SetOutput("Out", paramlist);
+    op->SetAttr("file_path", {param_filename});
+    op->CheckAttrs();
+  }
+
   executor.Run(*load_program, &scope, 0, true, true);
+
+  VLOG(3) << "Ran loading successfully";
   delete load_program;
 }
 
@@ -77,20 +109,29 @@ std::unique_ptr<framework::ProgramDesc> Load(framework::Executor& executor,
                                              framework::Scope& scope,
                                              const std::string& dirname) {
   std::string model_filename = dirname + "/__model__";
-  LOG(INFO) << "loading model from " << model_filename;
-  std::ifstream inputfs(model_filename, std::ios::in | std::ios::binary);
   std::string program_desc_str;
-  inputfs.seekg(0, std::ios::end);
-  program_desc_str.resize(inputfs.tellg());
-  inputfs.seekg(0, std::ios::beg);
-  LOG(INFO) << "program_desc_str's size: " << program_desc_str.size();
-  inputfs.read(&program_desc_str[0], program_desc_str.size());
-  inputfs.close();
+  ReadBinaryFile(model_filename, program_desc_str);
+
+  std::unique_ptr<framework::ProgramDesc> main_program(
+      new framework::ProgramDesc(program_desc_str));
+
+  LoadPersistables(executor, scope, *main_program, dirname, "");
+  return main_program;
+}
+
+std::unique_ptr<framework::ProgramDesc> Load(
+    framework::Executor& executor,
+    framework::Scope& scope,
+    const std::string& prog_filename,
+    const std::string& param_filename) {
+  std::string model_filename = prog_filename;
+  std::string program_desc_str;
+  ReadBinaryFile(model_filename, program_desc_str);
 
   std::unique_ptr<framework::ProgramDesc> main_program(
       new framework::ProgramDesc(program_desc_str));
 
-  LoadPersistables(executor, scope, dirname, *main_program);
+  LoadPersistables(executor, scope, *main_program, "", param_filename);
   return main_program;
 }
 
diff --git a/paddle/inference/io.h b/paddle/inference/io.h
index 962b6c4e20d..a7d7c499690 100644
--- a/paddle/inference/io.h
+++ b/paddle/inference/io.h
@@ -26,12 +26,18 @@ namespace inference {
 
 void LoadPersistables(framework::Executor& executor,
                       framework::Scope& scope,
+                      const framework::ProgramDesc& main_program,
                       const std::string& dirname,
-                      const framework::ProgramDesc& main_program);
+                      const std::string& param_filename);
 
 std::unique_ptr<framework::ProgramDesc> Load(framework::Executor& executor,
                                              framework::Scope& scope,
                                              const std::string& dirname);
 
+std::unique_ptr<framework::ProgramDesc> Load(framework::Executor& executor,
+                                             framework::Scope& scope,
+                                             const std::string& prog_filename,
+                                             const std::string& param_filename);
+
 }  // namespace inference
 }  // namespace paddle
diff --git a/paddle/inference/tests/book/test_helper.h b/paddle/inference/tests/book/test_helper.h
index 32db643fca2..3e66ced94fe 100644
--- a/paddle/inference/tests/book/test_helper.h
+++ b/paddle/inference/tests/book/test_helper.h
@@ -67,17 +67,28 @@ void CheckError(paddle::framework::LoDTensor& output1,
   EXPECT_EQ(count, 0) << "There are " << count << " different elements.";
 }
 
-template <typename Place, typename T>
+template <typename Place, typename T, bool IsCombined = false>
 void TestInference(const std::string& dirname,
                    const std::vector<paddle::framework::LoDTensor*>& cpu_feeds,
                    std::vector<paddle::framework::LoDTensor*>& cpu_fetchs) {
-  // 1. Define place, executor and scope
+  // 1. Define place, executor, scope and inference_program
   auto place = Place();
   auto executor = paddle::framework::Executor(place);
   auto* scope = new paddle::framework::Scope();
+  std::unique_ptr<paddle::framework::ProgramDesc> inference_program;
 
   // 2. Initialize the inference_program and load all parameters from file
-  auto inference_program = paddle::inference::Load(executor, *scope, dirname);
+  if (IsCombined) {
+    // Hard-coding the names for combined params case
+    std::string prog_filename = "__model_combined__";
+    std::string param_filename = "__params_combined__";
+    inference_program = paddle::inference::Load(executor,
+                                                *scope,
+                                                dirname + "/" + prog_filename,
+                                                dirname + "/" + param_filename);
+  } else {
+    inference_program = paddle::inference::Load(executor, *scope, dirname);
+  }
 
   // 3. Get the feed_target_names and fetch_target_names
   const std::vector<std::string>& feed_target_names =
diff --git a/paddle/inference/tests/book/test_inference_recognize_digits.cc b/paddle/inference/tests/book/test_inference_recognize_digits.cc
index 48f887e6bc6..3a48db7fe08 100644
--- a/paddle/inference/tests/book/test_inference_recognize_digits.cc
+++ b/paddle/inference/tests/book/test_inference_recognize_digits.cc
@@ -59,3 +59,45 @@ TEST(inference, recognize_digits) {
   CheckError<float>(output1, output2);
 #endif
 }
+
+TEST(inference, recognize_digits_combine) {
+  if (FLAGS_dirname.empty()) {
+    LOG(FATAL) << "Usage: ./example --dirname=path/to/your/model";
+  }
+
+  LOG(INFO) << "FLAGS_dirname: " << FLAGS_dirname << std::endl;
+  std::string dirname = FLAGS_dirname;
+
+  // 0. Call `paddle::framework::InitDevices()` initialize all the devices
+  // In unittests, this is done in paddle/testing/paddle_gtest_main.cc
+
+  paddle::framework::LoDTensor input;
+  // Use normilized image pixels as input data,
+  // which should be in the range [-1.0, 1.0].
+  SetupTensor<float>(
+      input, {1, 28, 28}, static_cast<float>(-1), static_cast<float>(1));
+  std::vector<paddle::framework::LoDTensor*> cpu_feeds;
+  cpu_feeds.push_back(&input);
+
+  paddle::framework::LoDTensor output1;
+  std::vector<paddle::framework::LoDTensor*> cpu_fetchs1;
+  cpu_fetchs1.push_back(&output1);
+
+  // Run inference on CPU
+  TestInference<paddle::platform::CPUPlace, float, true>(
+      dirname, cpu_feeds, cpu_fetchs1);
+  LOG(INFO) << output1.dims();
+
+#ifdef PADDLE_WITH_CUDA
+  paddle::framework::LoDTensor output2;
+  std::vector<paddle::framework::LoDTensor*> cpu_fetchs2;
+  cpu_fetchs2.push_back(&output2);
+
+  // Run inference on CUDA GPU
+  TestInference<paddle::platform::CUDAPlace, float, true>(
+      dirname, cpu_feeds, cpu_fetchs2);
+  LOG(INFO) << output2.dims();
+
+  CheckError<float>(output1, output2);
+#endif
+}
diff --git a/python/paddle/v2/fluid/io.py b/python/paddle/v2/fluid/io.py
index 613dc20b6ea..0f43e46082a 100644
--- a/python/paddle/v2/fluid/io.py
+++ b/python/paddle/v2/fluid/io.py
@@ -342,7 +342,11 @@ def save_inference_model(dirname,
     prepend_feed_ops(inference_program, feeded_var_names)
     append_fetch_ops(inference_program, fetch_var_names)
 
-    model_file_name = dirname + "/__model__"
+    if save_file_name == None:
+        model_file_name = dirname + "/__model__"
+    else:
+        model_file_name = dirname + "/__model_combined__"
+
     with open(model_file_name, "wb") as f:
         f.write(inference_program.desc.serialize_to_string())
 
@@ -384,7 +388,11 @@ def load_inference_model(dirname, executor, load_file_name=None):
     if not os.path.isdir(dirname):
         raise ValueError("There is no directory named '%s'", dirname)
 
-    model_file_name = dirname + "/__model__"
+    if load_file_name == None:
+        model_file_name = dirname + "/__model__"
+    else:
+        model_file_name = dirname + "/__model_combined__"
+
     with open(model_file_name, "rb") as f:
         program_desc_str = f.read()
 
diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits.py
index d8f0ad89cd8..6f9d85faff9 100644
--- a/python/paddle/v2/fluid/tests/book/test_recognize_digits.py
+++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits.py
@@ -78,7 +78,7 @@ def conv_net(img, label):
     return loss_net(conv_pool_2, label)
 
 
-def train(nn_type, use_cuda, parallel, save_dirname):
+def train(nn_type, use_cuda, parallel, save_dirname, save_param_filename):
     if use_cuda and not fluid.core.is_compiled_with_cuda():
         return
     img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
@@ -143,8 +143,10 @@ def train(nn_type, use_cuda, parallel, save_dirname):
                 avg_loss_val = numpy.array(avg_loss_set).mean()
                 if float(acc_val) > 0.85:  # test acc > 85%
                     if save_dirname is not None:
-                        fluid.io.save_inference_model(save_dirname, ["img"],
-                                                      [prediction], exe)
+                        fluid.io.save_inference_model(
+                            save_dirname, ["img"], [prediction],
+                            exe,
+                            save_file_name=save_param_filename)
                     return
                 else:
                     print(
@@ -156,7 +158,7 @@ def train(nn_type, use_cuda, parallel, save_dirname):
     raise AssertionError("Loss of recognize digits is too large")
 
 
-def infer(use_cuda, save_dirname=None):
+def infer(use_cuda, save_dirname=None, param_filename=None):
     if save_dirname is None:
         return
 
@@ -167,8 +169,8 @@ def infer(use_cuda, save_dirname=None):
     # the feed_target_names (the names of variables that will be feeded 
     # data using feed operators), and the fetch_targets (variables that 
     # we want to obtain data from using fetch operators).
-    [inference_program, feed_target_names,
-     fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
+    [inference_program, feed_target_names, fetch_targets
+     ] = fluid.io.load_inference_model(save_dirname, exe, param_filename)
 
     # The input's dimension of conv should be 4-D or 5-D.
     # Use normilized image pixels as input data, which should be in the range [-1.0, 1.0].
@@ -183,36 +185,45 @@ def infer(use_cuda, save_dirname=None):
     print("infer results: ", results[0])
 
 
-def main(use_cuda, parallel, nn_type):
+def main(use_cuda, parallel, nn_type, combine):
     if not use_cuda and not parallel:
         save_dirname = "recognize_digits_" + nn_type + ".inference.model"
+        save_filename = None
+        if combine == True:
+            save_filename = "__params_combined__"
     else:
         save_dirname = None
+        save_filename = None
 
     train(
         nn_type=nn_type,
         use_cuda=use_cuda,
         parallel=parallel,
-        save_dirname=save_dirname)
-    infer(use_cuda=use_cuda, save_dirname=save_dirname)
+        save_dirname=save_dirname,
+        save_param_filename=save_filename)
+    infer(
+        use_cuda=use_cuda,
+        save_dirname=save_dirname,
+        param_filename=save_filename)
 
 
 class TestRecognizeDigits(unittest.TestCase):
     pass
 
 
-def inject_test_method(use_cuda, parallel, nn_type):
+def inject_test_method(use_cuda, parallel, nn_type, combine):
     def __impl__(self):
         prog = fluid.Program()
         startup_prog = fluid.Program()
         scope = fluid.core.Scope()
         with fluid.scope_guard(scope):
             with fluid.program_guard(prog, startup_prog):
-                main(use_cuda, parallel, nn_type)
+                main(use_cuda, parallel, nn_type, combine)
 
-    fn = 'test_{0}_{1}_{2}'.format(nn_type, 'cuda'
-                                   if use_cuda else 'cpu', 'parallel'
-                                   if parallel else 'normal')
+    fn = 'test_{0}_{1}_{2}_{3}'.format(nn_type, 'cuda'
+                                       if use_cuda else 'cpu', 'parallel'
+                                       if parallel else 'normal', 'combine'
+                                       if combine else 'separate')
 
     setattr(TestRecognizeDigits, fn, __impl__)
 
@@ -221,7 +232,10 @@ def inject_all_tests():
     for use_cuda in (False, True):
         for parallel in (False, True):
             for nn_type in ('mlp', 'conv'):
-                inject_test_method(use_cuda, parallel, nn_type)
+                inject_test_method(use_cuda, parallel, nn_type, True)
+
+    # One unit-test for saving parameters as separate files
+    inject_test_method(False, False, 'mlp', False)
 
 
 inject_all_tests()
-- 
GitLab