diff --git a/paddle/operators/fc_op.cc b/paddle/operators/fc_op.cc
index 1c6c0454275f7bb006b6c83b4ad6be288eeb7b83..3e6cd8f76adae0d2da62491442164965df8983d3 100644
--- a/paddle/operators/fc_op.cc
+++ b/paddle/operators/fc_op.cc
@@ -26,7 +26,7 @@ class FCOp : public NetOp {
       : NetOp(type, inputs, outputs, attrs) {
     auto x = Inputs("X");
     auto w = Inputs("W");
-    auto mul_out = Outputs("mul_out");
+    auto mul_out = Outputs("MulOut");
     PADDLE_ENFORCE_EQ(
         x.size(), w.size(),
         "The size of inputs X(%d) should be the same as that of weights W(%d).",
@@ -36,36 +36,51 @@ class FCOp : public NetOp {
                       "as that of inputs X(%d).",
                       mul_out.size(), x.size());
 
-    int n = x.size();
-    PADDLE_ENFORCE_GE(n, 1,
+    size_t n = x.size();
+    PADDLE_ENFORCE_GE(n, static_cast<size_t>(1),
                       "The size of inputs X(%d) should be no less than 1.", n);
 
+    auto x_num_col_dims = Attr<std::vector<int>>("xNumColDims");
+    auto w_num_col_dims = Attr<std::vector<int>>("wNumColDims");
+    PADDLE_ENFORCE_EQ(x_num_col_dims.size(), n,
+                      "The size of attribute xNumColDims(%d) should be the "
+                      "same as that of inputs X(%d).",
+                      x_num_col_dims.size(), n);
+    PADDLE_ENFORCE_EQ(w_num_col_dims.size(), n,
+                      "The size of attribute wNumColDims(%d) should be the "
+                      "same as that of inputs X(%d).",
+                      w_num_col_dims.size(), n)
+
     // mul_out[i] = X[i] * W[i]
-    for (int i = 0; i < n; i++) {
-      AppendOp(framework::OpRegistry::CreateOp(
-          "mul", {{"X", {x[i]}}, {"Y", {w[i]}}}, {{"Out", {mul_out[i]}}}, {}));
+    for (size_t i = 0; i < n; i++) {
+      framework::AttributeMap mul_attr;
+      mul_attr["x_num_col_dims"] = static_cast<int>(x_num_col_dims[i]);
+      mul_attr["y_num_col_dims"] = static_cast<int>(w_num_col_dims[i]);
+      AppendOp(
+          framework::OpRegistry::CreateOp("mul", {{"X", {x[i]}}, {"Y", {w[i]}}},
+                                          {{"Out", {mul_out[i]}}}, mul_attr));
     }
 
     // sum_out = X[0] * W[0] + ... + X[n-1] * W[n-1]
     if (n > 1) {
       AppendOp(framework::OpRegistry::CreateOp(
-          "sum", {{"X", {mul_out}}}, {{"Out", {Output("sum_out")}}}, {}));
+          "sum", {{"X", {mul_out}}}, {{"Out", {Output("SumOut")}}}, {}));
     } else {
       AppendOp(framework::OpRegistry::CreateOp(
-          "identity", {{"X", {mul_out[0]}}}, {{"Y", {Output("sum_out")}}}, {}));
+          "identity", {{"X", {mul_out[0]}}}, {{"Y", {Output("SumOut")}}}, {}));
     }
 
     // add_out = sum_out + b
-    auto b = Input("b");
-    std::string add_out = "sum_out";
+    auto b = Input("B");
+    std::string add_out = "SumOut";
     if (b != framework::kEmptyVarName) {
-      add_out = "add_out";
+      add_out = "AddOut";
       AppendOp(framework::OpRegistry::CreateOp(
-          "rowwise_add", {{"X", {Output("sum_out")}}, {"b", {Input("b")}}},
+          "rowwise_add", {{"X", {Output("SumOut")}}, {"b", {Input("B")}}},
           {{"Out", {Output(add_out)}}}, {}));
     } else {
-      if (Output("add_out") != framework::kEmptyVarName) {
-        this->Rename(Output("add_out"), framework::kEmptyVarName);
+      if (Output("AddOut") != framework::kEmptyVarName) {
+        this->Rename(Output("AddOut"), framework::kEmptyVarName);
       }
     }
 
@@ -84,24 +99,26 @@ class FCOpMaker : public framework::OpProtoAndCheckerMaker {
         .AsDuplicable();
     AddInput("W", "The weights of FC operator, a ordered vector of 2-D matrix.")
         .AsDuplicable();
-    AddInput("b", "The 1-D bias vector of FC operator");
+    AddInput("B", "The 1-D bias vector of FC operator");
 
     AddOutput("Y", "The activated output matrix of FC operator");
-    AddOutput("mul_out",
+    AddOutput("MulOut",
               "The intermediate outputs of FC operator, "
               "saving the product of X[i] * W[i]")
         .AsIntermediate()
         .AsDuplicable();
-    AddOutput("sum_out",
+    AddOutput("SumOut",
               "The intermediate output of FC operator, "
               "saving the sum of products, sum(X[i] * W[i])")
         .AsIntermediate();
-    AddOutput("add_out",
+    AddOutput("AddOut",
               "The non-actived output of FC operator, saving X * W + b")
         .AsIntermediate();
     AddAttr<std::string>("activation", "The activation type of FC operator.")
         .SetDefault("identity")
         .InEnum({"identity", "sigmoid", "softmax"});
+    AddAttr<std::vector<int>>("xNumColDims", "");
+    AddAttr<std::vector<int>>("wNumColDims", "");
 
     AddComment(R"DOC(
 Fully Connected Operator, known as Fully Connected Layer or Inner Product Layer
diff --git a/python/paddle/v2/framework/tests/test_fc_op.py b/python/paddle/v2/framework/tests/test_fc_op.py
index 00c487099710a702541fcdface9f71308724abc7..39906c8b332219d324ac04e9d772b6c56f71894b 100644
--- a/python/paddle/v2/framework/tests/test_fc_op.py
+++ b/python/paddle/v2/framework/tests/test_fc_op.py
@@ -5,52 +5,61 @@ from op_test import OpTest
 
 class TestFCOp1(OpTest):
     def setUp(self):
-        self.op_type = "fc"
         x0 = np.random.random((16, 32)).astype("float32")
         w0 = np.random.random((32, 10)).astype("float32")
         b = np.random.random(10).astype("float32")
-        self.inputs = {"X": [("X0", x0)], "W": [("W0", w0)], "b": b}
+
         mul_out0 = np.dot(x0, w0)
         sum_out = mul_out0
         add_out = sum_out + b
         identity_out = add_out
+
+        self.op_type = "fc"
+        self.inputs = {"X": [("X0", x0)], "W": [("W0", w0)], "B": b}
         self.outputs = {
-            "mul_out": [("mul_out0", mul_out0)],
-            "sum_out": sum_out,
-            "add_out": add_out,
+            "MulOut": [("MulOut0", mul_out0)],
+            "SumOut": sum_out,
+            "AddOut": add_out,
             "Y": identity_out
         }
+        self.attrs = {"xNumColDims": [1], "wNumColDims": [1]}
 
     def test_check_output(self):
         self.check_output()
 
     def test_check_grad(self):
-        self.check_grad(["X0", "W0", "b"], "Y", max_relative_error=0.01)
+        self.check_grad(["X0", "W0", "B"], "Y", max_relative_error=0.01)
 
 
 class TestFCOp2(OpTest):
     def setUp(self):
-        self.op_type = "fc"
-        x0 = np.random.random((16, 32)).astype("float32")
+        x0 = np.random.random((16, 4, 8)).astype("float32")
         x1 = np.random.random((16, 32)).astype("float32")
         w0 = np.random.random((32, 10)).astype("float32")
-        w1 = np.random.random((32, 10)).astype("float32")
+        w1 = np.random.random((4, 8, 10)).astype("float32")
         b = np.random.random(10).astype("float32")
+
+        mul_out0 = np.dot(x0.reshape(16, 4 * 8), w0)
+        mul_out1 = np.dot(x1, w1.reshape(4 * 8, 10))
+        sum_out = mul_out0 + mul_out1
+        add_out = np.add(sum_out, b)
+        sigmoid_out = 1 / (1 + np.exp(-add_out))
+
+        self.op_type = "fc"
         self.inputs = {
             "X": [("X0", x0), ("X1", x1)],
             "W": [("W0", w0), ("W1", w1)],
-            "b": b
+            "B": b
+        }
+        self.attrs = {
+            "xNumColDims": [1, 1],
+            "wNumColDims": [1, 2],
+            "activation": "sigmoid"
         }
-        self.attrs = {"activation": "sigmoid"}
-        mul_out0 = np.dot(x0, w0)
-        mul_out1 = np.dot(x1, w1)
-        sum_out = mul_out0 + mul_out1
-        add_out = np.add(sum_out, b)
-        sigmoid_out = 1 / (1 + np.exp(-add_out))
         self.outputs = {
-            "mul_out": [("mul_out0", mul_out0), ("mul_out1", mul_out1)],
-            "sum_out": sum_out,
-            "add_out": add_out,
+            "MulOut": [("MulOut0", mul_out0), ("MulOut1", mul_out1)],
+            "SumOut": sum_out,
+            "AddOut": add_out,
             "Y": sigmoid_out
         }
 
@@ -59,7 +68,7 @@ class TestFCOp2(OpTest):
 
     def test_check_grad(self):
         self.check_grad(
-            ["X0", "X1", "W0", "W1", "b"], "Y", max_relative_error=0.01)
+            ["X0", "X1", "W0", "W1", "B"], "Y", max_relative_error=0.01)
 
 
 if __name__ == '__main__':