diff --git a/lite/kernels/xpu/bridges/CMakeLists.txt b/lite/kernels/xpu/bridges/CMakeLists.txt
index ebddd36451f6d78ce52115c6a6ef9e47e044fd00..a1f7b67be3b0b1798ea50daa6638873500786912 100644
--- a/lite/kernels/xpu/bridges/CMakeLists.txt
+++ b/lite/kernels/xpu/bridges/CMakeLists.txt
@@ -7,6 +7,7 @@ lite_cc_library(xpu_bridge_conv_op SRCS conv_op.cc DEPS ${xpu_bridge_deps})
 lite_cc_library(xpu_bridge_elementwise_ops SRCS elementwise_ops.cc DEPS ${xpu_bridge_deps})
 lite_cc_library(xpu_bridge_pool_op SRCS pool_op.cc DEPS ${xpu_bridge_deps})
 lite_cc_library(xpu_bridge_softmax_op SRCS softmax_op.cc DEPS ${xpu_bridge_deps})
+lite_cc_library(xpu_bridge_mul_op SRCS mul_op.cc DEPS ${xpu_bridge_deps})
 
 set(xpu_bridges
         xpu_bridge_registry
@@ -15,6 +16,7 @@ set(xpu_bridges
         xpu_bridge_elementwise_ops
         xpu_bridge_pool_op
         xpu_bridge_softmax_op
+        xpu_bridge_mul_op
         CACHE INTERNAL "xpu_bridges")
 
 set(xpu_bridge_test_deps ${xpu_bridges} ${xpu_kernels} ${ops})
@@ -24,3 +26,4 @@ lite_cc_test(test_xpu_bridge_conv_op SRCS conv_op_test.cc test_helper.cc DEPS ${
 lite_cc_test(test_xpu_bridge_elementwise_ops SRCS elementwise_ops_test.cc test_helper.cc DEPS ${xpu_bridge_test_deps})
 lite_cc_test(test_xpu_bridge_pool_op SRCS pool_op_test.cc test_helper.cc DEPS ${xpu_bridge_test_deps})
 lite_cc_test(test_xpu_bridge_softmax_op SRCS softmax_op_test.cc test_helper.cc DEPS ${xpu_bridge_test_deps})
+lite_cc_test(test_xpu_bridge_mul_op SRCS mul_op_test.cc test_helper.cc DEPS ${xpu_bridge_test_deps})
diff --git a/lite/kernels/xpu/bridges/mul_op.cc b/lite/kernels/xpu/bridges/mul_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..edf44f78bbfb54cf4316d3b9d7d9be2a121669d7
--- /dev/null
+++ b/lite/kernels/xpu/bridges/mul_op.cc
@@ -0,0 +1,97 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/backends/xpu/builder.h"
+#include "lite/kernels/xpu/bridges/registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace xpu {
+namespace bridges {
+
+node_map_type MulConverter(const std::shared_ptr<lite::OpLite> op,
+                           graph_ctx_type* graph_ctx,
+                           const node_map_type& input_nodes) {
+  auto scope = op->scope();
+  auto op_info = op->op_info();
+  auto op_type = op_info->Type();
+  auto unique_op_type = lite::xpu::UniqueName(op_type);
+  LOG(INFO) << "[XPU] Converting " + op_type + "...";
+
+  // check context
+  CHECK(graph_ctx != nullptr);
+  CHECK(graph_ctx->builder != nullptr);
+  CHECK(graph_ctx->params != nullptr);
+
+  // get input, and attributes
+  auto x_var_name = op_info->Input("X").front();
+  auto y_var_name = op_info->Input("Y").front();
+  auto y_tensor = scope->FindMutableTensor(y_var_name);
+  auto y_dims = y_tensor->dims();
+  CHECK_EQ(y_dims.size(), 2) << "xpu now only support y_dims.size() == 2";
+
+  auto x_num_col_dims = op_info->GetAttr<int>("x_num_col_dims");
+  CHECK_EQ(x_num_col_dims, 1) << "xpu now only support x_num_col_dims == 1";
+  auto y_num_col_dims = op_info->GetAttr<int>("x_num_col_dims");
+  CHECK_EQ(y_num_col_dims, 1) << "xpu now only support y_num_col_dims == 1";
+
+  // create x node
+  std::shared_ptr<xtcl::xExpr> x_node = nullptr;
+  x_node = std::make_shared<xtcl::xExpr>(
+      graph_ctx->builder->CreateBatchFlatten(*input_nodes.at(x_var_name)));
+  graph_ctx->builder->SetLayer(unique_op_type + "/X");
+
+  // transpose y
+  DDimLite y_dims_t(std::vector<int64_t>{1, 1});
+  y_dims_t[0] = y_dims[1];
+  y_dims_t[1] = y_dims[0];
+  auto y_var_name_t = unique_op_type + "/Y";
+  Tensor* y_tensor_t = new Tensor();
+  y_tensor_t->Resize(y_dims_t);
+  auto y_data_t = y_tensor_t->mutable_data<float>();
+  auto y_data = y_tensor->mutable_data<float>();
+  for (int i = 0; i < y_dims_t[0]; i++) {
+    for (int j = 0; j < y_dims_t[1]; j++) {
+      y_data_t[i * y_dims_t[1] + j] = y_data[j * y_dims_t[0] + i];
+    }
+  }
+
+  // create y node
+  std::shared_ptr<xtcl::xExpr> y_const_node = nullptr;
+  y_const_node = std::make_shared<xtcl::xExpr>(graph_ctx->builder->CreateTensor(
+      y_var_name_t, lite::xpu::CvtShape(y_dims_t), ::xtcl::Float(32)));
+  auto y_const_tensor = lite::xpu::CvtTensor(y_tensor_t);
+  graph_ctx->params->emplace(std::make_pair(y_var_name_t, *y_const_tensor));
+  delete y_tensor_t;
+
+  // create mul node and set params from op
+  std::shared_ptr<xtcl::xExpr> mul_node = nullptr;
+  mul_node = std::make_shared<xtcl::xExpr>(graph_ctx->builder->CreateDense(
+      *x_node, *y_const_node, static_cast<int>(y_dims[1])));
+  graph_ctx->builder->SetLayer(unique_op_type);
+
+  // output converted nodes
+  node_map_type output_nodes;
+  output_nodes[op_info->Output("Out").front()] = mul_node;
+  return output_nodes;
+}
+
+}  // namespace bridges
+}  // namespace xpu
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_XPU_BRIDGE(mul, paddle::lite::kernels::xpu::bridges::MulConverter);
diff --git a/lite/kernels/xpu/bridges/mul_op_test.cc b/lite/kernels/xpu/bridges/mul_op_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..cd439b68cb7286a919a8fce97371443f53ed40db
--- /dev/null
+++ b/lite/kernels/xpu/bridges/mul_op_test.cc
@@ -0,0 +1,113 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/operators/mul_op.h"
+#include <gtest/gtest.h>
+#include "lite/core/op_registry.h"
+#include "lite/kernels/xpu/bridges/registry.h"
+#include "lite/kernels/xpu/bridges/test_helper.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace xpu {
+namespace bridges {
+
+void mul_ref(const std::shared_ptr<operators::MulOpLite> op) {
+  Scope* scope = op->scope();
+  const OpInfo* op_info = op->op_info();
+  auto x = scope->FindVar(op_info->Input("X").front())->GetMutable<Tensor>();
+  auto y = scope->FindVar(op_info->Input("Y").front())->GetMutable<Tensor>();
+  auto out =
+      scope->FindVar(op_info->Output("Out").front())->GetMutable<Tensor>();
+  int32_t x_num_col_dims = op_info->GetAttr<int32_t>("x_num_col_dims");
+  int32_t y_num_col_dims = op_info->GetAttr<int32_t>("y_num_col_dims");
+  auto x_data = x->mutable_data<float>();
+  auto y_data = y->mutable_data<float>();
+  auto out_data = out->mutable_data<float>();
+  auto x_mat_dims = x->dims().Flatten2D(x_num_col_dims);
+  auto y_mat_dims = y->dims().Flatten2D(y_num_col_dims);
+  CHECK_EQ(x_mat_dims[1], y_mat_dims[0]);
+  const int M = x_mat_dims[0];
+  const int K = x_mat_dims[1];
+  const int N = y_mat_dims[1];
+  for (int m = 0; m < M; ++m) {
+    for (int n = 0; n < N; ++n) {
+      out_data[m * N + n] = 0;
+      for (int k = 0; k < K; ++k) {
+        out_data[m * N + n] += x_data[m * K + k] * y_data[k * N + n];
+      }
+    }
+  }
+}
+
+void test_mul(const std::vector<int64_t>& x_shape,
+              const std::vector<int64_t>& y_shape,
+              int x_num_col_dims,
+              int y_num_col_dims) {
+  Scope scope;
+  std::string x_var_name("X");
+  std::string y_var_name("Y");
+  std::string out_var_name("Out");
+  std::string out_ref_var_name("out_ref");
+  auto* x = scope.Var(x_var_name)->GetMutable<Tensor>();
+  auto* y = scope.Var(y_var_name)->GetMutable<Tensor>();
+  auto* out = scope.Var(out_var_name)->GetMutable<Tensor>();
+  auto* out_ref = scope.Var(out_ref_var_name)->GetMutable<Tensor>();
+  x->Resize(x_shape);
+  y->Resize(y_shape);
+
+  FillTensor<float>(x);
+  FillTensor<float>(y);
+
+  // create mul op
+  cpp::OpDesc mul_op_desc;
+  mul_op_desc.SetType("mul");
+  mul_op_desc.SetInput("X", {x_var_name});
+  mul_op_desc.SetInput("Y", {y_var_name});
+  mul_op_desc.SetOutput("Out", {out_var_name});
+  mul_op_desc.SetAttr("x_num_col_dims", static_cast<int>(x_num_col_dims));
+  mul_op_desc.SetAttr("y_num_col_dims", static_cast<int>(y_num_col_dims));
+
+  auto mul_op = CreateOp<operators::MulOpLite>(mul_op_desc, &scope);
+  LauchOp(mul_op, {x_var_name}, {out_var_name});
+  out_ref->CopyDataFrom(*out);
+
+  mul_ref(mul_op);
+
+  // compare results
+  auto* out_data = out->mutable_data<float>();
+  auto* out_ref_data = out_ref->mutable_data<float>();
+  for (int i = 0; i < out->dims().production(); i++) {
+    EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-5);
+  }
+}
+
+TEST(XPUBridges, mul) {
+  test_mul({1, 2, 3, 4}, {24, 2}, 1, 1);
+  test_mul({2, 2, 3, 4}, {24, 2}, 1, 1);
+  test_mul({2, 7}, {7, 3}, 1, 1);
+  //  test_mul({1, 8, 8, 1}, {1, 8, 2, 2}, 2, 2);
+  //  test_mul({1, 5, 5, 1}, {1, 5, 7, 7}, 2, 2);
+  //  test_mul({1, 4, 1, 1}, {4, 8}, 1, 1);
+}
+
+}  // namespace bridges
+}  // namespace xpu
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+USE_LITE_OP(mul);
+USE_XPU_BRIDGE(mul);
diff --git a/lite/kernels/xpu/bridges/paddle_use_xpu_bridges.h b/lite/kernels/xpu/bridges/paddle_use_xpu_bridges.h
index ee48fee626b9459bb24780e9241dab3071307774..27e936eaaa125f26b0bdab43f5c38d60769cfd88 100644
--- a/lite/kernels/xpu/bridges/paddle_use_xpu_bridges.h
+++ b/lite/kernels/xpu/bridges/paddle_use_xpu_bridges.h
@@ -19,3 +19,7 @@
 USE_XPU_BRIDGE(relu);
 USE_XPU_BRIDGE(conv2d);
 USE_XPU_BRIDGE(depthwise_conv2d);
+USE_XPU_BRIDGE(elementwise_add);
+USE_XPU_BRIDGE(pool2d);
+USE_XPU_BRIDGE(softmax);
+USE_XPU_BRIDGE(mul);
diff --git a/lite/kernels/xpu/bridges/pool_op_test.cc b/lite/kernels/xpu/bridges/pool_op_test.cc
index 512d59feb1340bcaa485d9290886cf5d58a878cf..ed5f922d59b5ca5e387076c9a533c4b4c251cc87 100644
--- a/lite/kernels/xpu/bridges/pool_op_test.cc
+++ b/lite/kernels/xpu/bridges/pool_op_test.cc
@@ -181,7 +181,7 @@ void test_pool(int bs,
   }
 }
 
-TEST(NPUBridges, pool) {
+TEST(XPUBridges, pool) {
   for (auto pooling_type : {"max", "avg"}) {
     for (auto bs : {1, 3}) {
       for (auto ic : {2}) {
diff --git a/lite/kernels/xpu/bridges/softmax_op_test.cc b/lite/kernels/xpu/bridges/softmax_op_test.cc
index ee9a44acd5b8fec2e3df4d7bc4034808fc2b0b45..2cd12cbf4e8dc108ac43fec55a568ecec72a51ab 100644
--- a/lite/kernels/xpu/bridges/softmax_op_test.cc
+++ b/lite/kernels/xpu/bridges/softmax_op_test.cc
@@ -110,7 +110,7 @@ void test_softmax(int bs, int ic, int ih, int iw, int axis) {
   }
 }
 
-TEST(NPUBridges, softmax) {
+TEST(XPUBridges, softmax) {
   for (auto bs : {2, 3}) {
     for (auto ic : {4}) {
       for (auto ih : {5}) {