From 4f2ee63c4466e018b9a1196281c1a059094698f3 Mon Sep 17 00:00:00 2001
From: Liu Yiqun <liuyiqun01@baidu.com>
Date: Mon, 11 Sep 2017 11:10:25 +0000
Subject: [PATCH] Get rid of the calling of inplace op in FCOp.

---
 paddle/operators/fc_op.cc                     | 57 +++++++++-------
 .../paddle/v2/framework/tests/test_fc_op.py   | 65 ++++++++++++++-----
 2 files changed, 84 insertions(+), 38 deletions(-)

diff --git a/paddle/operators/fc_op.cc b/paddle/operators/fc_op.cc
index 6e6a09bc3fb..1c6c0454275 100644
--- a/paddle/operators/fc_op.cc
+++ b/paddle/operators/fc_op.cc
@@ -26,38 +26,43 @@ class FCOp : public NetOp {
       : NetOp(type, inputs, outputs, attrs) {
     auto x = Inputs("X");
     auto w = Inputs("W");
+    auto mul_out = Outputs("mul_out");
     PADDLE_ENFORCE_EQ(
         x.size(), w.size(),
         "The size of inputs X(%d) should be the same as that of weights W(%d).",
         x.size(), w.size());
+    PADDLE_ENFORCE_EQ(mul_out.size(), x.size(),
+                      "The size of intermediate mul_out(%d) should be the same "
+                      "as that of inputs X(%d).",
+                      mul_out.size(), x.size());
 
     int n = x.size();
     PADDLE_ENFORCE_GE(n, 1,
                       "The size of inputs X(%d) should be no less than 1.", n);
 
-    // mul_out = X[0] * W[0] + ... + X[n-1] * W[n-1]
-    AppendOp(
-        framework::OpRegistry::CreateOp("mul", {{"X", {x[0]}}, {"Y", {w[0]}}},
-                                        {{"Out", {Output("mul_out")}}}, {}));
+    // mul_out[i] = X[i] * W[i]
+    for (int i = 0; i < n; i++) {
+      AppendOp(framework::OpRegistry::CreateOp(
+          "mul", {{"X", {x[i]}}, {"Y", {w[i]}}}, {{"Out", {mul_out[i]}}}, {}));
+    }
 
-    for (int i = 1; i < n; i++) {
-      // mul_out = mul_out + X[i] * W[i]
-      AppendOp(
-          framework::OpRegistry::CreateOp("mul", {{"X", {x[i]}}, {"Y", {w[i]}}},
-                                          {{"Out", {Output("add_out")}}}, {}));
+    // sum_out = X[0] * W[0] + ... + X[n-1] * W[n-1]
+    if (n > 1) {
+      AppendOp(framework::OpRegistry::CreateOp(
+          "sum", {{"X", {mul_out}}}, {{"Out", {Output("sum_out")}}}, {}));
+    } else {
       AppendOp(framework::OpRegistry::CreateOp(
-          "add", {{"X", {Output("mul_out")}}, {"Y", {Output("add_out")}}},
-          {{"Out", {Output("mul_out")}}}, {}));
+          "identity", {{"X", {mul_out[0]}}}, {{"Y", {Output("sum_out")}}}, {}));
     }
 
+    // add_out = sum_out + b
     auto b = Input("b");
-    std::string add_out = "mul_out";
+    std::string add_out = "sum_out";
     if (b != framework::kEmptyVarName) {
-      // add_out = mul_out + b
-      AppendOp(framework::OpRegistry::CreateOp(
-          "rowwise_add", {{"X", {Output("mul_out")}}, {"b", {Input("b")}}},
-          {{"Out", {Output("add_out")}}}, {}));
       add_out = "add_out";
+      AppendOp(framework::OpRegistry::CreateOp(
+          "rowwise_add", {{"X", {Output("sum_out")}}, {"b", {Input("b")}}},
+          {{"Out", {Output(add_out)}}}, {}));
     } else {
       if (Output("add_out") != framework::kEmptyVarName) {
         this->Rename(Output("add_out"), framework::kEmptyVarName);
@@ -68,8 +73,6 @@ class FCOp : public NetOp {
     AppendOp(framework::OpRegistry::CreateOp(
         activation, {{"X", {Output(add_out)}}}, {{"Y", {Output("Y")}}}, {}));
     CompleteAddOp(false);
-
-    std::cout << DebugString() << std::endl;
   }
 };
 
@@ -77,14 +80,24 @@ class FCOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   FCOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
       : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput("X", "The 2-D input matrix of FC operator.").AsDuplicable();
-    AddInput("W", "The 2-D weight matrix of FC operator.").AsDuplicable();
+    AddInput("X", "The inputs of FC operator, a ordered vector of 2-D matrix.")
+        .AsDuplicable();
+    AddInput("W", "The weights of FC operator, a ordered vector of 2-D matrix.")
+        .AsDuplicable();
     AddInput("b", "The 1-D bias vector of FC operator");
 
     AddOutput("Y", "The activated output matrix of FC operator");
-    AddOutput("mul_out", "The non-actived output of FC operator, X * W")
+    AddOutput("mul_out",
+              "The intermediate outputs of FC operator, "
+              "saving the product of X[i] * W[i]")
+        .AsIntermediate()
+        .AsDuplicable();
+    AddOutput("sum_out",
+              "The intermediate output of FC operator, "
+              "saving the sum of products, sum(X[i] * W[i])")
         .AsIntermediate();
-    AddOutput("add_out", "The non-actived output of FC operator, X * W + b")
+    AddOutput("add_out",
+              "The non-actived output of FC operator, saving X * W + b")
         .AsIntermediate();
     AddAttr<std::string>("activation", "The activation type of FC operator.")
         .SetDefault("identity")
diff --git a/python/paddle/v2/framework/tests/test_fc_op.py b/python/paddle/v2/framework/tests/test_fc_op.py
index 959bd7e405b..4355191223a 100644
--- a/python/paddle/v2/framework/tests/test_fc_op.py
+++ b/python/paddle/v2/framework/tests/test_fc_op.py
@@ -3,33 +3,65 @@ import numpy as np
 from op_test import OpTest
 
 
-class TestFCOp(OpTest):
+class TestFCOp1(OpTest):
     def setUp(self):
-        print "Run"
         self.op_type = "fc"
-        x0 = np.random.random((32, 256)).astype("float32")
-        x1 = np.random.random((32, 256)).astype("float32")
-        w0 = np.random.random((256, 100)).astype("float32")
-        w1 = np.random.random((256, 100)).astype("float32")
-        b = np.random.random(100).astype("float32")
+        x1 = np.random.random((16, 32)).astype("float32")
+        w1 = np.random.random((32, 10)).astype("float32")
+        b = np.random.random(10).astype("float32")
+        self.inputs = {"X": {"X1": x1}, "W": {"W1": w1}, "b": b}
+        mul_out1 = np.dot(x1, w1)
+        sum_out = mul_out1
+        add_out = sum_out + b
+        identity_out = add_out
+        self.outputs = {
+            "mul_out": {
+                "mul_out1": mul_out1,
+            },
+            "sum_out": sum_out,
+            "add_out": add_out,
+            "Y": identity_out
+        }
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad(self):
+        self.check_grad(["X1", "W1", "b"], "Y", max_relative_error=0.05)
+
+
+class TestFCOp2(OpTest):
+    def setUp(self):
+        self.op_type = "fc"
+        x1 = np.random.random((16, 32)).astype("float32")
+        x2 = np.random.random((16, 32)).astype("float32")
+        w1 = np.random.random((32, 10)).astype("float32")
+        w2 = np.random.random((32, 10)).astype("float32")
+        b = np.random.random(10).astype("float32")
         self.inputs = {
             "X": {
-                "X0": x0,
-                "X1": x1
+                "X1": x1,
+                "X2": x2
             },
             "W": {
-                "W0": w0,
-                "W1": w1
+                "W1": w1,
+                "W2": w2
             },
             "b": b
         }
         #self.attrs = {"activation": "sigmoid"}
-        mul_out = np.dot(x0, w0) + np.dot(x1, w1)
-        add_out = np.add(mul_out, b)
+        mul_out1 = np.dot(x1, w1)
+        mul_out2 = np.dot(x2, w2)
+        sum_out = mul_out1 + mul_out2
+        add_out = np.add(sum_out, b)
         #sigmoid_out = 1 / (1 + np.exp(-add_out))
         sigmoid_out = add_out
         self.outputs = {
-            "mul_out": mul_out,
+            "mul_out": {
+                "mul_out0": mul_out1,
+                "mul_out1": mul_out2
+            },
+            "sum_out": sum_out,
             "add_out": add_out,
             "Y": sigmoid_out
         }
@@ -37,8 +69,9 @@ class TestFCOp(OpTest):
     def test_check_output(self):
         self.check_output()
 
-    #def test_check_grad(self):
-    #    self.check_grad(["X0", "X1", "W0", "W1", "b"], "Y")
+    def test_check_grad(self):
+        self.check_grad(
+            ["X1", "X2", "W1", "W2", "b"], "Y", max_relative_error=0.05)
 
 
 if __name__ == '__main__':
-- 
GitLab