diff --git a/src/io.cpp b/src/io.cpp
index 0b02073b5a8bf2601f2bacdb0e03fb36648a3b92..fbf5798fa297622f6963d4352d65ca16ae54d5df 100644
--- a/src/io.cpp
+++ b/src/io.cpp
@@ -218,7 +218,7 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
     }
   }
 
-  originProgramDesc->Description("program: ");
+  //  originProgramDesc->Description("program: ");
 
   paddle_mobile__framework__proto__program_desc__free_unpacked(c_program, NULL);
   return program;
diff --git a/src/operators/fusion_conv_add_relu_op.h b/src/operators/fusion_conv_add_relu_op.h
index b144415e51ff1af9b3362df19de2dab20f38b78c..39f11dd708c56c550a41545f5e4bf93b78b7fa51 100644
--- a/src/operators/fusion_conv_add_relu_op.h
+++ b/src/operators/fusion_conv_add_relu_op.h
@@ -42,8 +42,8 @@ class FusionFcOp {
  private:
 };
 
-static framework::FusionOpRegistrar fc_registrar(
-    new FushionConvAddReluOpMatcher());
+// static framework::FusionOpRegistrar fc_registrar(
+//    new FushionConvAddReluOpMatcher());
 
 }  // namespace operators
 }  // namespace paddle_mobile
diff --git a/src/operators/fusion_fc_op.cpp b/src/operators/fusion_fc_op.cpp
index 17273a05b7bb5441c4522fa9ba383924056a1087..0f1be5c29fee1f741b773bbfa11b50b5aa49b8b7 100644
--- a/src/operators/fusion_fc_op.cpp
+++ b/src/operators/fusion_fc_op.cpp
@@ -12,4 +12,45 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "fusion_fc_op.h"
+#include "operators/fusion_fc_op.h"
+namespace paddle_mobile {
+namespace operators {
+
+template <typename Dtype, typename T>
+void FushionFcOp<Dtype, T>::InferShape() const {
+  auto x_dims = param_.InputX()->dims();
+  auto y_dims = param_.InputY()->dims();
+  int x_num_col_dims = param_.XNumColDims();
+  int y_num_col_dims = param_.YNumColDims();
+
+  assert(x_dims.size() > x_num_col_dims);
+  assert(y_dims.size() > y_num_col_dims);
+
+  /// (1,2,3,4) , x_num_col_dims = 2  -> (2,12)
+  auto x_mat_dims = framework::flatten_to_2d(x_dims, x_num_col_dims);
+  auto y_mat_dims = framework::flatten_to_2d(y_dims, y_num_col_dims);
+
+  assert(x_mat_dims[1] == y_mat_dims[0]);
+
+  std::vector<int64_t> output_dims;
+  output_dims.reserve(
+      static_cast<size_t>(x_num_col_dims + y_dims.size() - y_num_col_dims));
+
+  for (int i = 0; i < x_num_col_dims; ++i) {
+    output_dims.push_back(x_dims[i]);
+  }
+
+  for (int i = y_num_col_dims; i < y_dims.size(); ++i) {
+    output_dims.push_back(y_dims[i]);
+  }
+
+  framework::DDim ddim = framework::make_ddim(output_dims);
+  param_.Out()->Resize(ddim);
+}
+template class FushionFcOp<CPU, float>;
+}  // namespace operators
+}  // namespace paddle_mobile
+
+namespace ops = paddle_mobile::operators;
+USE_OP(fc);
+REGISTER_OPERATOR(fc, ops::FushionFcOp);
diff --git a/src/operators/fusion_fc_op.h b/src/operators/fusion_fc_op.h
index e702ffb41c5f8749270c4ca1cf8b597f59be8e6e..1dd5d2bf535520c46ee838d1cf2945d988557a4c 100644
--- a/src/operators/fusion_fc_op.h
+++ b/src/operators/fusion_fc_op.h
@@ -18,6 +18,7 @@ limitations under the License. */
 
 #include "framework/operator.h"
 #include "framework/program/program-optimize/fusion_op_register.h"
+#include "operators/kernel/fushion_fc_kernel.h"
 
 namespace paddle_mobile {
 namespace operators {
@@ -38,9 +39,27 @@ class FusionFcMatcher : public framework::FusionOpMatcher {
   std::string Type() { return "fc"; }
 };
 
-class FusionFcOp {
+template <typename DeviceType, typename T>
+class FushionFcOp : public framework::OperatorWithKernel<DeviceType> {
  public:
- private:
+  FushionFcOp(const std::string &type, const VariableNameMap &inputs,
+              const VariableNameMap &outputs,
+              const framework::AttributeMap attrs,
+              std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
+                                                  scope),
+        param_(inputs, outputs, attrs, *scope) {}
+
+  void Run() const {
+    operators::FushionFcKernel<DeviceType, T> kernel;
+    kernel.Compute(param_);
+  }
+
+  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+  void InferShape() const override;
+
+ protected:
+  FushionFcParam param_;
 };
 
 static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
diff --git a/src/operators/kernel/arm/elementwise_add_kernel.cpp b/src/operators/kernel/arm/elementwise_add_kernel.cpp
index c2f92908a7a1ddb42b530890f140f1a0245e3723..f8d40ad17ff09d77c26a9f32a87190f1cdd6038a 100644
--- a/src/operators/kernel/arm/elementwise_add_kernel.cpp
+++ b/src/operators/kernel/arm/elementwise_add_kernel.cpp
@@ -31,7 +31,7 @@ void ElementwiseAddKernel<CPU, float>::Compute(
   const Tensor *input_y = param.InputY();
   Tensor *Out = param.Out();
   Out->mutable_data<float>();
-  const int axis = param.Axis();
+  int axis = param.Axis();
   ElementwiseComputeEx<AddFunctor<float>, float>(input_x, input_y, axis,
                                                  AddFunctor<float>(), Out);
 }
diff --git a/src/operators/kernel/arm/fushion_fc_kernel.cpp b/src/operators/kernel/arm/fushion_fc_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ebec90aa27154334488329d079b76d14630e3294
--- /dev/null
+++ b/src/operators/kernel/arm/fushion_fc_kernel.cpp
@@ -0,0 +1,67 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "operators/kernel/fushion_fc_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+void FushionFcKernel<CPU, float>::Compute(const FushionFcParam &param) const {
+  const Tensor *input_x = param.InputX();
+  const Tensor *input_y = param.InputY();
+  const Tensor *input_z = param.InputZ();
+  auto *input_z_data = input_z->data<float>();
+  int axis = param.Axis();
+  Tensor *out = param.Out();
+  auto *out_data = out->mutable_data<float>();
+  const Tensor x_matrix =
+      input_x->dims().size() > 2
+          ? framework::ReshapeToMatrix(*input_x, param.XNumColDims())
+          : *input_x;
+  const Tensor y_matrix =
+      input_y->dims().size() > 2
+          ? framework::ReshapeToMatrix(*input_y, param.YNumColDims())
+          : *input_y;
+  auto out_dim = out->dims();
+  if (out_dim.size() != 2) {
+    out->Resize({x_matrix.dims()[0], y_matrix.dims()[1]});
+  }
+  PADDLE_MOBILE_ENFORCE(out_dim.size() == 2, " out_dim.size must be 2.");
+  PADDLE_MOBILE_ENFORCE(input_z->dims().size() == 1, "inpu_z size must be 1");
+  PADDLE_MOBILE_ENFORCE(out_dim[1] == input_z->dims()[0],
+                        " out_dim.size must be 2.");
+  axis = (axis == -1 ? out_dim.size() - input_z->dims().size() : axis);
+  PADDLE_MOBILE_ENFORCE(axis == 1, " to fit broadcast, axis = 1. ")
+
+  int64_t classes = input_z->numel();
+  for (int i = 0; i < out_dim[0]; i++) {
+    memory::Copy(out_data + i * classes, input_z_data, sizeof(float) * classes);
+  }
+
+  for (int i = 0; i < out->numel(); i++) {
+    DLOG << out_data[i];
+  }
+  math::matmul<float>(x_matrix, false, y_matrix, false, static_cast<float>(1),
+                      out, static_cast<float>(1));
+  PADDLE_MOBILE_ENFORCE(out_dim.size() == 2, " out_dim.size must be 2.");
+  //            if (out_dim.size() != 2) {
+  //                out->Resize(out_dim);
+  //            }
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
diff --git a/src/operators/kernel/fushion_fc_kernel.h b/src/operators/kernel/fushion_fc_kernel.h
new file mode 100644
index 0000000000000000000000000000000000000000..7597a7120d1840128810730ad3fab11fd01b10fa
--- /dev/null
+++ b/src/operators/kernel/fushion_fc_kernel.h
@@ -0,0 +1,31 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "framework/operator.h"
+#include "operators/math/math_function.h"
+#include "operators/op_param.h"
+
+#pragma once;
+
+namespace paddle_mobile {
+namespace operators {
+
+template <typename DeviceType, typename T>
+class FushionFcKernel
+    : public framework::OpKernelBase<DeviceType, FushionFcParam> {
+ public:
+  void Compute(const FushionFcParam& param) const;
+};
+}  // namespace operators
+}  // namespace paddle_mobile
diff --git a/src/operators/math/math_function.cpp b/src/operators/math/math_function.cpp
index b47d408a6fa5974856de701a083f426c8ff60109..59dd3e82d98334fd8aa86caa8f552936a6983900 100644
--- a/src/operators/math/math_function.cpp
+++ b/src/operators/math/math_function.cpp
@@ -41,8 +41,8 @@ void matmul<float>(const framework::Tensor &matrix_a, bool trans_a,
   int N = dim_out[1];
   int K = (trans_a == false) ? dim_a[1] : dim_a[0];
 
-  sgemm(M, N, K, 1, matrix_a.data<float>(), K, matrix_b.data<float>(), N, 0,
-        matrix_out->data<float>(), N);
+  sgemm(M, N, K, alpha, matrix_a.data<float>(), K, matrix_b.data<float>(), N,
+        beta, matrix_out->data<float>(), N);
 }
 
 template <>
diff --git a/src/operators/op_param.h b/src/operators/op_param.h
index 72b729b572093a3ae58751fc0dd7f4a05e938cf6..315ee92ccff5b0887e98d67cffe0d46f2110da1b 100644
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -51,6 +51,11 @@ class OpParam : PaddleMobileObject {
     return GetVarValue<T>("Y", inputs, scope);
   }
 
+  template <typename T>
+  static T *InputZFrom(const VariableNameMap &inputs, const Scope &scope) {
+    return GetVarValue<T>("Z", inputs, scope);
+  }
+
   template <typename T>
   static T *InputBiasFrom(const VariableNameMap &inputs, const Scope &scope) {
     return GetVarValue<T>("Bias", inputs, scope);
@@ -703,5 +708,42 @@ class ReluParam : public OpParam {
   Tensor *out_;
 };
 
+class FushionFcParam : public OpParam {
+ public:
+  FushionFcParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
+                 const AttributeMap &attrs, const Scope &scope) {
+    input_x_ = InputXFrom<Tensor>(inputs, scope);
+    input_y_ = InputYFrom<Tensor>(inputs, scope);
+    input_z_ = InputZFrom<Tensor>(inputs, scope);
+    out_ = OutFrom<Tensor>(outputs, scope);
+    x_num_col_dims_ = GetAttr<int>("x_num_col_dims", attrs);
+    y_num_col_dims_ = GetAttr<int>("y_num_col_dims", attrs);
+    axis_ = GetAttr<int>("axis", attrs);
+  }
+
+  const Tensor *InputX() const { return input_x_; }
+
+  const Tensor *InputY() const { return input_y_; }
+
+  const Tensor *InputZ() const { return input_z_; }
+
+  Tensor *Out() const { return out_; }
+
+  const int &XNumColDims() const { return x_num_col_dims_; }
+
+  const int &YNumColDims() const { return y_num_col_dims_; }
+
+  const int &Axis() const { return axis_; }
+
+ private:
+  Tensor *input_x_;
+  Tensor *input_y_;
+  Tensor *input_z_;
+  Tensor *out_;
+  int x_num_col_dims_;
+  int y_num_col_dims_;
+  int axis_;
+};
+
 }  // namespace operators
 }  // namespace paddle_mobile
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 57ffd92c91f3f045745967bdfee52fc70317a328..20d6cfe7a780b36c9be4845519ee2730c049cfb2 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -45,10 +45,15 @@ target_link_libraries(test-multiclassnms-op paddle-mobile)
 # gen test
 ADD_EXECUTABLE(test-reshape-op  operators/test_reshape_op.cpp test_helper.h  test_include.h)
 target_link_libraries(test-reshape-op paddle-mobile)
+
 # gen test
 ADD_EXECUTABLE(test-relu-op  operators/test_relu_op.cpp test_helper.h  test_include.h)
 target_link_libraries(test-relu-op paddle-mobile)
 
+# gen test
+ADD_EXECUTABLE(test-fc-op  operators/test_fushion_fc_op.cpp test_helper.h  test_include.h)
+target_link_libraries(test-fc-op paddle-mobile)
+
 # gen test log
 ADD_EXECUTABLE(test-log common/test_log.cpp)
 target_link_libraries(test-log paddle-mobile)
diff --git a/test/framework/test_optimize.cpp b/test/framework/test_optimize.cpp
index 757e258a8e6285ec38a4534faefc82ff623137cf..c721c453739296685aa0075ca13db41a9072353e 100644
--- a/test/framework/test_optimize.cpp
+++ b/test/framework/test_optimize.cpp
@@ -24,7 +24,7 @@ int main() {
   //  program.originProgram->Description("origin");
   auto optimize_program = optimize.FushionOptimize(program.originProgram);
   if (optimize_program != nullptr) {
-    //    optimize_program->Description("optimize");
+    optimize_program->Description("optimize");
   } else {
     LOG(paddle_mobile::kLOG_ERROR) << "optimize_program is null";
   }
diff --git a/test/operators/test_fushion_fc_op.cpp b/test/operators/test_fushion_fc_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b52989b2e8b3f25a6994de7e630a6360ac8504d9
--- /dev/null
+++ b/test/operators/test_fushion_fc_op.cpp
@@ -0,0 +1,160 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <framework/program/program-optimize/program_optimize.h>
+#include "../test_include.h"
+#include "operators/fusion_fc_op.h"
+
+namespace paddle_mobile {
+namespace framework {
+
+template <typename Dtype>
+class TestFcOp {
+ public:
+  explicit TestFcOp(const Program<Dtype> p) : program_(p) {
+    use_optimize_ = true;
+    if (use_optimize_) {
+      to_predict_program_ = program_.optimizeProgram;
+    } else {
+      to_predict_program_ = program_.originProgram;
+    }
+
+    const std::vector<std::shared_ptr<BlockDesc>> blocks =
+        to_predict_program_->Blocks();
+    //  DLOG << " **block size " << blocks.size();
+    for (int i = 0; i < blocks.size(); ++i) {
+      std::shared_ptr<BlockDesc> block_desc = blocks[i];
+      std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
+      //    DLOG << " ops " << ops.size();
+      for (int j = 0; j < ops.size(); ++j) {
+        std::shared_ptr<OpDesc> op = ops[j];
+        if (op->Type() == "fc" && op->Input("X")[0] == "pool2d_13.tmp_0") {
+          DLOG << " fc attr size: " << op->GetAttrMap().size();
+          DLOG << " inputs size: " << op->GetInputs().size();
+          DLOG << " outputs size: " << op->GetOutputs().size();
+          DLOG << " Input X is : " << op->Input("X")[0];
+          DLOG << " Input Y is : " << op->Input("Y")[0];
+          DLOG << " Input Y is : " << op->Input("Z")[0];
+          DLOG << " Output Out is : " << op->Output("Out")[0];
+          std::shared_ptr<operators::FushionFcOp<Dtype, float>> testOp =
+              std::make_shared<operators::FushionFcOp<Dtype, float>>(
+                  op->Type(), op->GetInputs(), op->GetOutputs(),
+                  op->GetAttrMap(), program_.scope);
+          ops_of_block_[*block_desc.get()].push_back(testOp);
+        }
+      }
+    }
+  }
+
+  std::shared_ptr<Tensor> predict(const Tensor &t1, const Tensor &t2,
+                                  const Tensor &t3) {
+    // feed
+    auto scope = program_.scope;
+    Variable *x_feed_value = scope->Var("pool2d_13.tmp_0");
+    auto tensor_x = x_feed_value->GetMutable<Tensor>();
+    tensor_x->ShareDataWith(t1);
+
+    Variable *y_feed_value = scope->Var("loss3_classifier-loc_weights");
+    auto tensor_y = y_feed_value->GetMutable<Tensor>();
+    tensor_y->ShareDataWith(t2);
+
+    Variable *z_feed_value = scope->Var("loss3_classifier-loc_biases");
+    auto tensor_z = z_feed_value->GetMutable<Tensor>();
+    tensor_z->ShareDataWith(t3);
+
+    Variable *con_output = scope->Var("loss3_classifier-loc.tmp_1");
+    auto *output_tensor = con_output->GetMutable<Tensor>();
+    output_tensor->mutable_data<float>({3, 10});
+    //  DLOG << typeid(output_tensor).name();
+    //  DLOG << "output_tensor dims: " << output_tensor->dims();
+
+    std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
+    out_tensor.reset(output_tensor);
+
+    predict(t1, t2, t3, 0);
+    return out_tensor;
+  }
+
+ private:
+  const framework::Program<Dtype> program_;
+  std::shared_ptr<ProgramDesc> to_predict_program_;
+  std::map<framework::BlockDesc,
+           std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
+      ops_of_block_;
+  bool use_optimize_ = false;
+
+  void predict(const Tensor &t1, const Tensor &t2, const Tensor &t3,
+               int block_id) {
+    std::shared_ptr<BlockDesc> to_predict_block =
+        to_predict_program_->Block(block_id);
+    for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
+      auto op = ops_of_block_[*to_predict_block.get()][j];
+      DLOG << "op -> run()";
+      op->Run();
+    }
+  }
+};
+
+template class TestFcOp<CPU>;
+}  // namespace framework
+}  // namespace paddle_mobile
+int main() {
+  DLOG << "----------**********----------";
+  DLOG << "begin to run Fc Test";
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  //    "../../../test/models/googlenet"
+  auto program = loader.Load("../models/googlenet");
+  paddle_mobile::framework::ProgramOptimize optimize;
+  //  program.originProgram->Description("origin");
+  auto optimize_program = optimize.FushionOptimize(program.originProgram);
+
+  program.optimizeProgram = optimize_program;
+
+  if (optimize_program != nullptr) {
+    optimize_program->Description("optimize");
+  } else {
+    LOG(paddle_mobile::kLOG_ERROR) << "optimize_program is null";
+  }
+
+  /// input x (1,3,224,224)
+  paddle_mobile::framework::Tensor inputx;
+  SetupTensor<float>(&inputx, {3, 64, 1, 1}, static_cast<float>(1),
+                     static_cast<float>(1));
+  auto *inputx_ptr = inputx.data<float>();
+  /// input y (224,)
+  paddle_mobile::framework::Tensor inputy;
+  SetupTensor<float>(&inputy, {64, 10}, static_cast<float>(1.5),
+                     static_cast<float>(1.5));
+  auto *inputy_ptr = inputy.data<float>();
+
+  paddle_mobile::framework::Tensor inputz;
+  SetupTensor<float>(&inputz, {10}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto *inputz_ptr = inputz.data<float>();
+
+  paddle_mobile::framework::TestFcOp<paddle_mobile::CPU> testFcOp(program);
+
+  auto output = testFcOp.predict(inputx, inputy, inputz);
+  auto *output_ptr = output->data<float>();
+  for (int j = 0; j < output->numel(); ++j) {
+    DLOG << "value of output: " << output_ptr[j];
+  }
+
+  DLOG << "1 (3,64) * 2 (64,10) = 96(3,10)";
+  DLOG << "output : 96(3,10) + bias(10)";
+
+  return 0;
+}