From fca2f65a11a392c02e3e8272a8e1fd40b729b056 Mon Sep 17 00:00:00 2001
From: rensilin <rensilin@baidu.com>
Date: Thu, 15 Aug 2019 14:56:49 +0800
Subject: [PATCH] create_programs

Change-Id: Idc1bc680b045084c417d82dacbd39c0e1bd199a0
---
 .../custom_trainer/feed/executor/executor.cc  |   4 +-
 .../feed/io/auto_file_system.cc               |  14 ---
 .../custom_trainer/feed/io/file_system.cc     |  14 ---
 .../custom_trainer/feed/io/file_system.h      |  14 ---
 .../feed/io/hadoop_file_system.cc             |  14 ---
 .../feed/io/local_file_system.cc              |  14 ---
 .../train/custom_trainer/feed/io/shell.cc     |  14 ---
 .../train/custom_trainer/feed/io/shell.h      |  14 ---
 .../feed/scripts/create_programs.py           |  87 +++++++++++++
 .../custom_trainer/feed/scripts/example.py    |  33 +++++
 .../feed/unit_test/test_create_programs.cc    | 118 ++++++++++++++++++
 .../feed/unit_test/test_executor.cc           |   3 +-
 12 files changed, 242 insertions(+), 101 deletions(-)
 create mode 100644 paddle/fluid/train/custom_trainer/feed/scripts/create_programs.py
 create mode 100644 paddle/fluid/train/custom_trainer/feed/scripts/example.py
 create mode 100644 paddle/fluid/train/custom_trainer/feed/unit_test/test_create_programs.cc
diff --git a/paddle/fluid/train/custom_trainer/feed/executor/executor.cc b/paddle/fluid/train/custom_trainer/feed/executor/executor.cc
index a7d5bdf2..1e8861de 100644
--- a/paddle/fluid/train/custom_trainer/feed/executor/executor.cc
+++ b/paddle/fluid/train/custom_trainer/feed/executor/executor.cc
@@ -54,8 +54,6 @@ public:
         paddle::framework::InitDevices(false);
         if (exe_config["num_threads"]) {
             paddle::platform::SetNumThreads(exe_config["num_threads"].as<int>());
-        } else {
-            paddle::platform::SetNumThreads(1);
         }
 
         if (!exe_config["startup_program"] || 
@@ -80,6 +78,8 @@ public:
                 return -1;
             }
             _context->prepare_context = _context->executor.Prepare(*_context->main_program, 0);
+
+
             _context->executor.CreateVariables(*_context->main_program, this->scope(), 0);
         } catch (::paddle::platform::EnforceNotMet& err) {
             VLOG(2) << err.what();
diff --git a/paddle/fluid/train/custom_trainer/feed/io/auto_file_system.cc b/paddle/fluid/train/custom_trainer/feed/io/auto_file_system.cc
index 16bbfed5..1d55cabc 100644
--- a/paddle/fluid/train/custom_trainer/feed/io/auto_file_system.cc
+++ b/paddle/fluid/train/custom_trainer/feed/io/auto_file_system.cc
@@ -1,17 +1,3 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
 #include "paddle/fluid/train/custom_trainer/feed/io/file_system.h"
 
 #include <string>
diff --git a/paddle/fluid/train/custom_trainer/feed/io/file_system.cc b/paddle/fluid/train/custom_trainer/feed/io/file_system.cc
index 2014cd23..61949955 100644
--- a/paddle/fluid/train/custom_trainer/feed/io/file_system.cc
+++ b/paddle/fluid/train/custom_trainer/feed/io/file_system.cc
@@ -1,17 +1,3 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
 #include "paddle/fluid/train/custom_trainer/feed/io/file_system.h"
 #include <string>
 
diff --git a/paddle/fluid/train/custom_trainer/feed/io/file_system.h b/paddle/fluid/train/custom_trainer/feed/io/file_system.h
index d7aa1cc2..482ab30d 100644
--- a/paddle/fluid/train/custom_trainer/feed/io/file_system.h
+++ b/paddle/fluid/train/custom_trainer/feed/io/file_system.h
@@ -1,17 +1,3 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
 #pragma once
 
 #include <memory>
diff --git a/paddle/fluid/train/custom_trainer/feed/io/hadoop_file_system.cc b/paddle/fluid/train/custom_trainer/feed/io/hadoop_file_system.cc
index d05affc5..d0be197b 100644
--- a/paddle/fluid/train/custom_trainer/feed/io/hadoop_file_system.cc
+++ b/paddle/fluid/train/custom_trainer/feed/io/hadoop_file_system.cc
@@ -1,17 +1,3 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
 #include "paddle/fluid/train/custom_trainer/feed/io/file_system.h"
 
 #include <string>
diff --git a/paddle/fluid/train/custom_trainer/feed/io/local_file_system.cc b/paddle/fluid/train/custom_trainer/feed/io/local_file_system.cc
index 287d3e0a..7fb4eaa8 100644
--- a/paddle/fluid/train/custom_trainer/feed/io/local_file_system.cc
+++ b/paddle/fluid/train/custom_trainer/feed/io/local_file_system.cc
@@ -1,17 +1,3 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
 #include "paddle/fluid/train/custom_trainer/feed/io/file_system.h"
 
 #include <string>
diff --git a/paddle/fluid/train/custom_trainer/feed/io/shell.cc b/paddle/fluid/train/custom_trainer/feed/io/shell.cc
index 6ed61d67..a96da2c6 100644
--- a/paddle/fluid/train/custom_trainer/feed/io/shell.cc
+++ b/paddle/fluid/train/custom_trainer/feed/io/shell.cc
@@ -1,17 +1,3 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
 #include "paddle/fluid/train/custom_trainer/feed/io/shell.h"
 
 namespace paddle {
diff --git a/paddle/fluid/train/custom_trainer/feed/io/shell.h b/paddle/fluid/train/custom_trainer/feed/io/shell.h
index 930d64d5..7eca3d8e 100644
--- a/paddle/fluid/train/custom_trainer/feed/io/shell.h
+++ b/paddle/fluid/train/custom_trainer/feed/io/shell.h
@@ -1,17 +1,3 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
 #pragma once
 
 #include <fcntl.h>
diff --git a/paddle/fluid/train/custom_trainer/feed/scripts/create_programs.py b/paddle/fluid/train/custom_trainer/feed/scripts/create_programs.py
new file mode 100644
index 00000000..b2ba57ab
--- /dev/null
+++ b/paddle/fluid/train/custom_trainer/feed/scripts/create_programs.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+
+from __future__ import print_function, division
+import os
+import sys
+import paddle
+from paddle import fluid
+import yaml
+
+def print_help(this_name):
+    """Print help
+    """
+    dirname = os.path.dirname(this_name)
+    print("Usage: {} <network building filename> [model_dir]\n".format(this_name))
+    print("    example: {} {}".format(this_name, os.path.join(dirname, 'example.py')))
+
+def inference(filename):
+    """Build inference network(without loss and optimizer)
+    Args:
+        filename: path of file which defined real inference function
+    Returns:
+        list<Variable>: inputs
+        and
+        Variable: ctr_output
+    """
+    with open(filename, 'r') as f:
+        code = f.read()
+    compiled = compile(code, filename, 'exec')
+    exec(compiled)
+    return inference()
+
+def main(argv):
+    """Create programs
+    Args:
+        argv: arg list, length should be 2
+    """
+    if len(argv) < 2 or not os.path.exists(argv[1]):
+        print_help(argv[0])
+        exit(1)
+    network_build_file = argv[1]
+
+    if len(argv) >= 2:
+        model_dir = argv[2]
+    else:
+        model_dir = './model'
+
+    main_program = fluid.Program()
+    startup_program = fluid.Program()
+    with fluid.program_guard(main_program, startup_program):
+        inputs, ctr_output = inference(network_build_file)
+
+        test_program = main_program.clone(for_test=True)
+
+        label_target = fluid.layers.data(name='label', shape=[1], dtype='float32')
+
+        loss = fluid.layers.square_error_cost(input=ctr_output, label=label_target)
+        loss = fluid.layers.mean(loss, name='loss')
+        
+        optimizer = fluid.optimizer.SGD(learning_rate=1.0)
+        params_grads = optimizer.backward(loss)
+
+    if not os.path.exists(model_dir):
+        os.mkdir(model_dir)
+
+    programs = {
+        'startup_program': startup_program,
+        'main_program': main_program,
+        'test_program': test_program,
+    }
+    for save_path, program in programs.items():
+        with open(os.path.join(model_dir, save_path), 'w') as f:
+            f.write(program.desc.serialize_to_string())
+
+    model_desc_path = os.path.join(model_dir, 'model.yaml')
+    model_desc = dict()
+    model_desc['inputs'] = {var.name: var.shape for var in inputs}
+    model_desc['loss_name'] = loss.name
+    model_desc['label_name'] = label_target.name
+    model_desc['ctr_output_name'] = ctr_output.name
+    
+    with open(model_desc_path, 'w') as f:
+        yaml.safe_dump(model_desc, f, encoding='utf-8', allow_unicode=True)
+
+
+if __name__ == "__main__":
+    main(sys.argv)
diff --git a/paddle/fluid/train/custom_trainer/feed/scripts/example.py b/paddle/fluid/train/custom_trainer/feed/scripts/example.py
new file mode 100644
index 00000000..8a76a418
--- /dev/null
+++ b/paddle/fluid/train/custom_trainer/feed/scripts/example.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+
+"""
+This is an example of network building
+"""
+
+from __future__ import print_function, division
+import paddle
+from paddle import fluid
+
+def inference():
+    """Build inference network(without loss and optimizer)
+
+    Returns:
+        list<Variable>: inputs
+        and
+        Variable: ctr_output
+    """
+    # TODO: build network here
+    cvm_input = fluid.layers.data(name='cvm_input', shape=[4488], dtype='float32')
+
+    net = cvm_input
+    net = fluid.layers.fc(net, 512, act='relu')
+    net = fluid.layers.fc(net, 256, act='relu')
+    net = fluid.layers.fc(net, 256, act='relu')
+    net = fluid.layers.fc(net, 128, act='relu')
+    net = fluid.layers.fc(net, 128, act='relu')
+    net = fluid.layers.fc(net, 128, act='relu')
+    net = fluid.layers.fc(net, 128, act='relu')
+
+    ctr_output = fluid.layers.fc(net, 1, act='sigmoid', name='ctr_output')
+    return [cvm_input], ctr_output
diff --git a/paddle/fluid/train/custom_trainer/feed/unit_test/test_create_programs.cc b/paddle/fluid/train/custom_trainer/feed/unit_test/test_create_programs.cc
new file mode 100644
index 00000000..1884f43e
--- /dev/null
+++ b/paddle/fluid/train/custom_trainer/feed/unit_test/test_create_programs.cc
@@ -0,0 +1,118 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <iostream>
+#include <fstream>
+#include <gtest/gtest.h>
+
+#include "paddle/fluid/train/custom_trainer/feed/executor/executor.h"
+#include "paddle/fluid/framework/tensor_util.h"
+#include "paddle/fluid/framework/program_desc.h"
+#include "paddle/fluid/train/custom_trainer/feed/io/file_system.h"
+#include "paddle/fluid/train/custom_trainer/feed/io/shell.h"
+#include "paddle/fluid/string/string_helper.h"
+
+namespace paddle {
+namespace custom_trainer {
+namespace feed {
+
+namespace {
+const char test_data_dir[] = "test_data";
+const char main_program_path[] = "test_data/main_program";
+const char startup_program_path[] = "test_data/startup_program";
+const char model_desc_path[] = "test_data/model.yaml";
+}
+
+class CreateProgramsTest : public testing::Test
+{
+public:
+    static void SetUpTestCase()
+    {
+        shell_execute(string::format_string("python scripts/create_programs.py scripts/example.py %s", test_data_dir));
+    }
+
+    static void TearDownTestCase()
+    {
+        std::unique_ptr<FileSystem> fs(CREATE_CLASS(FileSystem, "LocalFileSystem"));
+        fs->remove(test_data_dir);
+    }
+
+    virtual void SetUp()
+    {
+        context_ptr.reset(new TrainerContext());
+    }
+
+    virtual void TearDown()
+    {
+        context_ptr = nullptr;
+    }
+
+    std::shared_ptr<TrainerContext> context_ptr;
+};
+
+TEST_F(CreateProgramsTest, example_network) {
+    std::unique_ptr<Executor> executor(CREATE_CLASS(Executor, "SimpleExecutor"));
+    ASSERT_NE(nullptr, executor);
+
+    auto config = YAML::Load(string::format_string("{thread_num: 2, startup_program: %s, main_program: %s}", startup_program_path, main_program_path));
+    auto model_desc = YAML::LoadFile(model_desc_path);
+    ASSERT_EQ(0, executor->initialize(config, context_ptr));
+    
+    std::string input_name = "cvm_input";
+    ASSERT_TRUE(model_desc["inputs"]);
+    ASSERT_TRUE(model_desc["inputs"][input_name]);
+    ASSERT_TRUE(model_desc["loss_name"]);
+    ASSERT_TRUE(model_desc["label_name"]);
+    ASSERT_TRUE(model_desc["ctr_output_name"]);
+    auto loss_name = model_desc["loss_name"].as<std::string>();
+    auto label_name = model_desc["label_name"].as<std::string>();
+    auto ctr_output_name = model_desc["ctr_output_name"].as<std::string>();
+
+    std::vector<int> input_shape = model_desc["inputs"][input_name].as<std::vector<int>>(std::vector<int>());
+    ASSERT_EQ(2, input_shape.size());
+    ASSERT_EQ(-1, input_shape[0]);
+    ASSERT_EQ(4488, input_shape[1]);
+
+	auto input_var = executor->mutable_var<::paddle::framework::LoDTensor>(input_name);
+	auto label_var = executor->mutable_var<::paddle::framework::LoDTensor>(label_name);
+    ASSERT_NE(nullptr, input_var);
+    ASSERT_NE(nullptr, label_var);
+
+	input_var->Resize({1, input_shape[1]});
+	auto input_data = input_var->mutable_data<float>(context_ptr->cpu_place);
+    ASSERT_NE(nullptr, input_data);
+    for (int i = 0; i < input_shape[1]; ++i) {
+        input_data[i] = 0.1;
+    }
+
+    label_var->Resize({1, 1});
+	auto label_data = label_var->mutable_data<float>(context_ptr->cpu_place);
+    ASSERT_NE(nullptr, label_data);
+    label_data[0] = 0.5;
+
+    ASSERT_EQ(0, executor->run());
+
+	auto loss_var = executor->var<::paddle::framework::LoDTensor>(loss_name);
+    auto loss = loss_var.data<float>()[0];
+
+	auto ctr_output_var = executor->var<::paddle::framework::LoDTensor>(ctr_output_name);
+    auto ctr_output = ctr_output_var.data<float>()[0];
+
+    std::cout << "loss: " << loss << std::endl;
+    std::cout << "ctr_output: " << ctr_output << std::endl;
+}
+
+}  // namespace feed
+}  // namespace custom_trainer
+}  // namespace paddle
diff --git a/paddle/fluid/train/custom_trainer/feed/unit_test/test_executor.cc b/paddle/fluid/train/custom_trainer/feed/unit_test/test_executor.cc
index 5866b34e..e1cb5fbf 100644
--- a/paddle/fluid/train/custom_trainer/feed/unit_test/test_executor.cc
+++ b/paddle/fluid/train/custom_trainer/feed/unit_test/test_executor.cc
@@ -61,6 +61,7 @@ public:
             op->SetInput("X", {"x"});
             op->SetOutput("Out", {"mean"});
             op->CheckAttrs();
+            load_block->Var("mean");
             std::ofstream fout(main_program_path, std::ios::out | std::ios::binary);
             ASSERT_TRUE(fout);
             fout << main_program->Proto()->SerializeAsString();
@@ -106,12 +107,12 @@ TEST_F(SimpleExecutorTest, run) {
     ASSERT_EQ(0, executor->initialize(config, context_ptr));
     
 	auto x_var = executor->mutable_var<::paddle::framework::LoDTensor>("x");
-    executor->mutable_var<::paddle::framework::LoDTensor>("mean");
     ASSERT_NE(nullptr, x_var);
 
     int x_len = 10;
 	x_var->Resize({1, x_len});
 	auto x_data = x_var->mutable_data<float>(context_ptr->cpu_place);
+    ASSERT_NE(nullptr, x_data);
     std::cout << "x: ";
     for (int i = 0; i < x_len; ++i) {
         x_data[i] = i;
-- 
GitLab