Merge branch 'develop' into Batch_Size

f0a6ddfd · jackzhang235 · dda23220 · cce54cb6 · f0a6ddfd · f0a6ddfd
11 changed file
--- a/.github/workflows/github-CI.yml
+++ b/.github/workflows/github-CI.yml
@@ -41,6 +41,12 @@ jobs:
      run: ./build.lite.mlu/lite/kernels/mlu/bridges/test_softmax_converter_mlu
    - name: test_transpose_converter_mlu
      run: ./build.lite.mlu/lite/kernels/mlu/bridges/test_transpose_converter_mlu
+    - name: test_slice_converter_mlu
+      run: ./build.lite.mlu/lite/kernels/mlu/bridges/test_slice_converter_mlu
+    - name: test_argmax_converter_mlu
+      run: ./build.lite.mlu/lite/kernels/mlu/bridges/test_argmax_converter_mlu
+    - name: test_split_converter_mlu
+      run: ./build.lite.mlu/lite/kernels/mlu/bridges/test_split_converter_mlu
    - name: test_classification
      run: |
        cd ..

--- a/lite/core/mir/elimination/identity_scale_eliminate_pass.cc
+++ b/lite/core/mir/elimination/identity_scale_eliminate_pass.cc
@@ -26,7 +26,9 @@ class Eliminator : public FuseBase {
 public:
  void BuildPattern() override {
    // the previous op's output need updat
-    auto* pre_op = OpNode("preop")->assert_is_not_op_type("conditional_block");
+    auto* pre_op = OpNode("preop")
+                       ->assert_is_not_op_type("conditional_block")
+                       ->assert_is_not_op_type("scale");
    // TODO(Superjomn) check has only one output
    auto* x = VarNode("x")->assert_is_op_input("scale", "X");
    auto* scale_op = OpNode("scale", "scale")

--- a/lite/kernels/mlu/bridges/CMakeLists.txt
+++ b/lite/kernels/mlu/bridges/CMakeLists.txt
@@ -21,7 +21,9 @@ lite_cc_library(subgraph_bridge_concat_op_mlu SRCS concat_op.cc DEPS ${subgraph_
 lite_cc_library(subgraph_bridge_transpose_op_mlu SRCS transpose_op.cc DEPS ${subgraph_bridge_deps_mlu})
 lite_cc_library(subgraph_bridge_dropout_op_mlu SRCS dropout_op.cc DEPS ${subgraph_bridge_deps_mlu})
 lite_cc_library(subgraph_bridge_slice_op_mlu SRCS slice_op.cc DEPS ${subgraph_bridge_deps_mlu})
+lite_cc_library(subgraph_bridge_split_op_mlu SRCS split_op.cc DEPS ${subgraph_bridge_deps_mlu})
 lite_cc_library(subgraph_bridge_argmax_op_mlu SRCS argmax_op.cc DEPS ${subgraph_bridge_deps_mlu})
+lite_cc_library(subgraph_bridge_squeeze_op_mlu SRCS squeeze_op.cc DEPS ${subgraph_bridge_deps_mlu})
 set(mlu_subgraph_bridges
        subgraph_bridge_registry
        subgraph_bridge_utility_mlu
@@ -39,7 +41,9 @@ set(mlu_subgraph_bridges
        subgraph_bridge_concat_op_mlu
        subgraph_bridge_dropout_op_mlu
        subgraph_bridge_slice_op_mlu
+        subgraph_bridge_split_op_mlu
        subgraph_bridge_argmax_op_mlu
+        subgraph_bridge_squeeze_op_mlu
        CACHE INTERNAL "mlu_subgraph_bridges")
@@ -62,7 +66,9 @@ lite_cc_test(test_concat_converter_mlu SRCS concat_op_test.cc DEPS scope optimiz
 lite_cc_test(test_transpose_converter_mlu SRCS transpose_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
 lite_cc_test(test_dropout_converter_mlu SRCS dropout_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
 lite_cc_test(test_slice_converter_mlu SRCS slice_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
+lite_cc_test(test_split_converter_mlu SRCS split_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
 lite_cc_test(test_argmax_converter_mlu SRCS argmax_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
+lite_cc_test(test_squeeze_converter_mlu SRCS squeeze_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
 if (LITE_BUILD_EXTRA)
  lite_cc_test(test_lrn_converter_mlu SRCS lrn_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
 endif()

--- a/lite/kernels/mlu/bridges/act_op.cc
+++ b/lite/kernels/mlu/bridges/act_op.cc
@@ -73,6 +73,9 @@ REGISTER_SUBGRAPH_BRIDGE(sigmoid,
                         kMLU,
                         paddle::lite::subgraph::mlu::ActConverter);
 REGISTER_SUBGRAPH_BRIDGE(relu, kMLU, paddle::lite::subgraph::mlu::ActConverter);
+REGISTER_SUBGRAPH_BRIDGE(relu6,
+                         kMLU,
+                         paddle::lite::subgraph::mlu::ActConverter);
 REGISTER_SUBGRAPH_BRIDGE(tanh, kMLU, paddle::lite::subgraph::mlu::ActConverter);
 REGISTER_SUBGRAPH_BRIDGE(leaky_relu,
                         kMLU,

--- a/lite/kernels/mlu/bridges/act_op_test.cc
+++ b/lite/kernels/mlu/bridges/act_op_test.cc
@@ -13,7 +13,9 @@
 // limitations under the License.
 #include <gtest/gtest.h>
 #include <random>
 #include "lite/core/op_lite.h"
 #include "lite/core/op_registry.h"
 #include "lite/kernels/mlu/bridges/test_helper.h"
@@ -134,7 +136,8 @@ void test_act(std::vector<int64_t> x_shape, std::string op_type) {
 TEST(MLUBridges, activation) {
  std::vector<std::vector<int64_t>> shapes{{1}, {2, 3}, {1, 2, 3, 4}};
-  std::vector<std::string> types{"sigmoid", "relu", "tanh", "leaky_relu"};
+  std::vector<std::string> types{
+      "sigmoid", "relu", "relu6", "tanh", "leaky_relu"};
  for (auto x_shape : shapes) {
    for (auto op_type : types) {
      test_act(x_shape, op_type);
@@ -149,5 +152,6 @@ TEST(MLUBridges, activation) {
 USE_SUBGRAPH_BRIDGE(sigmoid, kMLU)
 USE_SUBGRAPH_BRIDGE(relu, kMLU)
+USE_SUBGRAPH_BRIDGE(relu6, kMLU)
 USE_SUBGRAPH_BRIDGE(tanh, kMLU)
 USE_SUBGRAPH_BRIDGE(leaky_relu, kMLU)
--- a/lite/kernels/mlu/bridges/paddle_use_bridges.h
+++ b/lite/kernels/mlu/bridges/paddle_use_bridges.h
@@ -15,6 +15,7 @@
 #pragma once
 USE_SUBGRAPH_BRIDGE(relu, kMLU);
+USE_SUBGRAPH_BRIDGE(relu6, kMLU)
 USE_SUBGRAPH_BRIDGE(conv2d, kMLU);
 USE_SUBGRAPH_BRIDGE(depthwise_conv2d, kMLU);
 USE_SUBGRAPH_BRIDGE(elementwise_add, kMLU);
@@ -32,6 +33,10 @@ USE_SUBGRAPH_BRIDGE(sigmoid, kMLU);
 USE_SUBGRAPH_BRIDGE(elementwise_mul, kMLU);
 USE_SUBGRAPH_BRIDGE(dropout, kMLU);
 USE_SUBGRAPH_BRIDGE(argmax, kMLU);
+USE_SUBGRAPH_BRIDGE(split, kMLU);
+USE_SUBGRAPH_BRIDGE(slice, kMLU);
+USE_SUBGRAPH_BRIDGE(squeeze, kMLU);
+USE_SUBGRAPH_BRIDGE(squeeze2, kMLU);
 #ifdef LITE_BUILD_EXTRA
 USE_SUBGRAPH_BRIDGE(lrn, kMLU)
 #endif
--- a/lite/kernels/mlu/bridges/split_op.cc
+++ b/lite/kernels/mlu/bridges/split_op.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "lite/kernels/mlu/bridges/graph.h"
+#include "lite/kernels/mlu/bridges/utility.h"
+#include "lite/kernels/npu/bridges/registry.h"
+namespace paddle {
+namespace lite {
+namespace subgraph {
+namespace mlu {
+int SplitConverter(void* ctx, OpLite* op, KernelBase* kernel) {
+  CHECK(ctx != nullptr);
+  CHECK(op != nullptr);
+  auto graph = static_cast<Graph*>(ctx);
+  auto op_info = op->op_info();
+  auto op_type = op_info->Type();
+  auto scope = op->scope();
+  VLOG(3) << "[MLU] Converting " + op_type + "...";
+  auto x_var_name = op_info->Input("X").front();
+  auto x = scope->FindVar(x_var_name)->GetMutable<Tensor>();
+  auto x_dims = x->dims().Vectorize();
+  auto out_var_name = op_info->Output("Out");
+  auto param_axis = op_info->GetAttr<int>("axis");
+  auto num = op_info->GetAttr<int>("num");
+  auto sections = op_info->GetAttr<std::vector<int>>("sections");
+  int64_t sections_num = static_cast<int64_t>(sections.size());
+  auto output_num = num > 0 ? num : sections_num;
+  std::vector<cnmlTensor_t> output_tensor;
+  for (auto out_name : out_var_name) {
+    auto out = scope->FindVar(out_name)->GetMutable<Tensor>();
+    auto out_dims = out->dims().Vectorize();
+    auto out_tensor = graph->AddNode(
+        out_name, out_dims, CNML_TENSOR, CNML_NCHW, graph->FPType());
+    output_tensor.push_back(out_tensor->mlu_tensor());
+  }
+  auto dims = x_dims.size();
+  int axis = (param_axis < 0) ? (param_axis + dims) : param_axis;
+  CHECK_LE(axis, 4) << "Unsupport dims in mlu concat";
+  int nchw_to_nhwc_axis_map[4] = {0, 3, 1, 2};
+  int nhwc_axis = nchw_to_nhwc_axis_map[axis];
+  CHECK(graph->HasNode(x_var_name));
+  auto input_tensor = graph->GetNode(x_var_name);
+  cnmlBaseOp_t split_op;
+  cnmlTensor_t inputs = input_tensor->mlu_tensor();
+  CNML_CALL(cnmlCreateNdSplitOp(
+      &split_op, nhwc_axis, &inputs, 1, output_tensor.data(), output_num));
+  graph->FuseOp(split_op);
+  CNML_CALL(cnmlDestroyBaseOp(&split_op));
+  return SUCCESS;
+}
+}  // namespace mlu
+}  // namespace subgraph
+}  // namespace lite
+}  // namespace paddle
+REGISTER_SUBGRAPH_BRIDGE(split,
+                         kMLU,
+                         paddle::lite::subgraph::mlu::SplitConverter);
--- a/lite/kernels/mlu/bridges/split_op_test.cc
+++ b/lite/kernels/mlu/bridges/split_op_test.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "lite/operators/split_op.h"
+#include <gtest/gtest.h>
+#include "lite/core/op_lite.h"
+#include "lite/core/op_registry.h"
+#include "lite/kernels/mlu/bridges/test_helper.h"
+#include "lite/kernels/mlu/bridges/utility.h"
+#include "lite/kernels/npu/bridges/registry.h"
+namespace paddle {
+namespace lite {
+namespace subgraph {
+namespace mlu {
+template <typename dtype>
+void split_ref(const std::shared_ptr<operators::SplitOp> op) {
+  Scope* scope = op->scope();
+  const OpInfo* op_info = op->op_info();
+  auto x = scope->FindVar(op_info->Input("X").front())->GetMutable<Tensor>();
+  int num = op_info->GetAttr<int>("num");
+  int axis = op_info->GetAttr<int>("axis");
+  std::vector<int> sections = op_info->GetAttr<std::vector<int>>("sections");
+  std::vector<lite::Tensor*> output_vec;
+  auto output = op_info->Output("Out");
+  for (auto out_var : output) {
+    output_vec.push_back(scope->Var(out_var)->GetMutable<Tensor>());
+  }
+  auto in_dims = x->dims();
+  auto rank = in_dims.size();
+  int outs_number = output_vec.size();
+  std::vector<lite::DDimLite> outs_dims;
+  outs_dims.reserve(outs_number);
+  if (axis < 0) {
+    axis += rank;
+  }
+  if (num > 0) {
+    int out_axis_dim = in_dims[axis] / num;
+    for (int i = 0; i < outs_number; ++i) {
+      auto dim = in_dims;
+      dim[axis] = out_axis_dim;
+      outs_dims.push_back(dim);
+    }
+  } else if (sections.size() > 0) {
+    for (size_t i = 0; i < outs_number; ++i) {
+      auto dim = in_dims;
+      dim[axis] = sections[i];
+      outs_dims.push_back(dim);
+    }
+  }
+  for (int j = 0; j < outs_dims.size(); ++j) {
+    output_vec[j]->Resize(outs_dims[j]);
+  }
+  const dtype* din = x->mutable_data<const dtype>();
+  std::vector<int> in_strides(in_dims.size());
+  in_strides[in_dims.size() - 1] = in_dims[in_dims.size() - 1];
+  for (int i = in_dims.size() - 2; i >= 0; --i) {
+    in_strides[i] = in_strides[i + 1] * in_dims[i];
+  }
+  int input_offset = 0;
+  for (auto out : output_vec) {
+    auto out_dim = out->dims();
+    std::vector<int> out_strides(out_dim.size());
+    out_strides[out_dim.size() - 1] = out_dim[out_dim.size() - 1];
+    for (int i = out_dim.size() - 2; i >= 0; --i) {
+      out_strides[i] = out_strides[i + 1] * out_dim[i];
+    }
+    dtype* out_data = out->mutable_data<dtype>();
+    int before = out_strides[0] / out_strides[axis];
+    int in_after = in_strides[axis];
+    int out_after = out_strides[axis];
+    for (int i = 0; i < before; ++i) {
+      std::memcpy(out_data + i * out_after,
+                  din + input_offset + i * in_after,
+                  sizeof(dtype) * out_after);
+    }
+    input_offset += out_strides[axis];
+  }
+}
+void test_split(int bs,
+                int ic,
+                int ih,
+                int iw,
+                int axis,
+                int num,
+                std::vector<int> sections) {
+  // prepare input&output variables
+  std::string x_var_name = "x";
+  std::string out_var_name_1 = "out_1";
+  std::string out_var_name_2 = "out_2";
+  std::string out_ref_var_name_1 = "out_ref_1";
+  std::string out_ref_var_name_2 = "out_ref_2";
+  Scope scope;
+  auto* x = scope.Var(x_var_name)->GetMutable<Tensor>();
+  auto* out_1 = scope.Var(out_var_name_1)->GetMutable<Tensor>();
+  auto* out_2 = scope.Var(out_var_name_2)->GetMutable<Tensor>();
+  auto* out_ref_1 = scope.Var(out_ref_var_name_1)->GetMutable<Tensor>();
+  auto* out_ref_2 = scope.Var(out_ref_var_name_2)->GetMutable<Tensor>();
+  x->Resize({bs, ic, ih, iw});
+  // initialize input&output data
+  FillTensor<float>(x);
+  // initialize op desc
+  cpp::OpDesc opdesc;
+  opdesc.SetType("split");
+  opdesc.SetInput("X", {x_var_name});
+  opdesc.SetOutput("Out", {out_var_name_1, out_var_name_2});
+  opdesc.SetAttr("axis", axis);
+  opdesc.SetAttr("sections", sections);
+  opdesc.SetAttr("num", num);
+  auto op = CreateOp<operators::SplitOp>(opdesc, &scope);
+  split_ref<float>(op);
+  out_ref_1->CopyDataFrom(*out_1);
+  out_ref_2->CopyDataFrom(*out_2);
+  // execute reference implementation and save to output tensor
+  Tensor input;
+  input.Resize({bs, ic, ih, iw});
+  transpose<float*>(x->mutable_data<float>(),
+                    input.mutable_data<float>(),
+                    {static_cast<int>(bs),
+                     static_cast<int>(ic),
+                     static_cast<int>(ih),
+                     static_cast<int>(iw)},
+                    {0, 2, 3, 1});
+  x->CopyDataFrom(input);
+  LaunchOp(op, {x_var_name}, {out_var_name_1, out_var_name_2});
+  // compare results
+  auto* out_data_1 = out_1->mutable_data<float>();
+  auto* out_data_2 = out_2->mutable_data<float>();
+  auto* out_ref_data_1 = out_ref_1->mutable_data<float>();
+  auto* out_ref_data_2 = out_ref_2->mutable_data<float>();
+  Tensor output1, output2;
+  output1.Resize(out_1->dims());
+  output2.Resize(out_2->dims());
+  transpose<float*>(out_data_1,
+                    output1.mutable_data<float>(),
+                    {static_cast<int>(out_1->dims()[0]),
+                     static_cast<int>(out_1->dims()[2]),
+                     static_cast<int>(out_1->dims()[3]),
+                     static_cast<int>(out_1->dims()[1])},
+                    {0, 3, 1, 2});
+  transpose<float*>(out_data_2,
+                    output2.mutable_data<float>(),
+                    {static_cast<int>(out_2->dims()[0]),
+                     static_cast<int>(out_2->dims()[2]),
+                     static_cast<int>(out_2->dims()[3]),
+                     static_cast<int>(out_2->dims()[1])},
+                    {0, 3, 1, 2});
+  out_data_1 = output1.mutable_data<float>();
+  out_data_2 = output2.mutable_data<float>();
+  for (int i = 0; i < out_1->dims().production(); i++) {
+    VLOG(5) << i;
+    EXPECT_NEAR(out_data_1[i], out_ref_data_1[i], 5e-4);
+  }
+  for (int i = 0; i < out_2->dims().production(); i++) {
+    VLOG(5) << i;
+    EXPECT_NEAR(out_data_2[i], out_ref_data_2[i], 5e-4);
+  }
+}
+TEST(MLUBridges, split) {
+  test_split(4, 2, 3, 1, 0, 2, {});
+  test_split(4, 2, 3, 1, 0, 0, {3, 1});
+  test_split(4, 6, 3, 1, 1, 2, {});
+  test_split(4, 6, 3, 1, 1, 0, {2, 4});
+  test_split(4, 2, 2, 1, 2, 2, {});
+  test_split(4, 2, 6, 1, 2, 0, {3, 3});
+  test_split(4, 2, 3, 4, 3, 2, {});
+  test_split(4, 2, 3, 6, 3, 0, {5, 1});
+}
+}  // namespace mlu
+}  // namespace subgraph
+}  // namespace lite
+}  // namespace paddle
+USE_SUBGRAPH_BRIDGE(split, kMLU);
--- a/lite/kernels/mlu/bridges/squeeze_op.cc
+++ b/lite/kernels/mlu/bridges/squeeze_op.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "lite/kernels/mlu/bridges/graph.h"
+#include "lite/kernels/mlu/bridges/utility.h"
+#include "lite/kernels/npu/bridges/registry.h"
+namespace paddle {
+namespace lite {
+namespace subgraph {
+namespace mlu {
+int SqueezeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
+  CHECK(ctx != nullptr);
+  CHECK(op != nullptr);
+  auto graph = static_cast<Graph*>(ctx);
+  auto op_info = op->op_info();
+  auto op_type = op_info->Type();
+  auto scope = op->scope();
+  VLOG(3) << "[MLU] Converting " + op_type + "...";
+  // Create act node and set params from op
+  auto fp_type = graph->FPType();
+  auto x_var_name = op_info->Input("X").front();
+  auto out_var_name = op_info->Output("Out").front();
+  auto output = scope->FindVar(out_var_name)->GetMutable<Tensor>();
+  auto output_dims = output->dims().Vectorize();
+  auto output_tensor = graph->AddNode(
+      out_var_name, output_dims, CNML_TENSOR, CNML_NCHW, fp_type);
+  CHECK(graph->HasNode(x_var_name));
+  auto input_tensor = graph->GetNode(x_var_name);
+  auto output_dims_nhwc = DimNCHW2NHWC(output_dims);
+  std::vector<int> o_dims(output_dims.size());
+  std::transform(output_dims_nhwc.cbegin(),
+                 output_dims_nhwc.cend(),
+                 o_dims.begin(),
+                 [](DDim::value_type d) { return static_cast<int>(d); });
+  cnmlReshapeOpParam_t param;
+  cnmlBaseOp_t squeeze_op;
+  CNML_CALL(cnmlCreateNdReshapeOpParam(&param, o_dims.data(), o_dims.size()));
+  CNML_CALL(cnmlCreateReshapeOp(&squeeze_op,
+                                param,
+                                input_tensor->mlu_tensor(),
+                                output_tensor->mlu_tensor()));
+  CNML_CALL(cnmlDestroyReshapeOpParam(&param));
+  graph->FuseOp(squeeze_op);
+  CNML_CALL(cnmlDestroyBaseOp(&squeeze_op));
+  if (op_type == "squeeze2") {
+    auto xshape_var_name = op_info->Output("XShape").front();
+    auto xshape = scope->FindVar(xshape_var_name)->GetMutable<Tensor>();
+    auto dims_64 = xshape->dims().Vectorize();
+    auto dims_64_nhwc = DimNCHW2NHWC(dims_64);
+    auto xshape_tensor = graph->AddNode(
+        xshape_var_name, dims_64, CNML_TENSOR, CNML_NCHW, fp_type);
+    std::vector<int> xshape_dims(dims_64.size());
+    std::transform(dims_64_nhwc.cbegin(),
+                   dims_64_nhwc.cend(),
+                   xshape_dims.begin(),
+                   [](DDim::value_type d) { return static_cast<int>(d); });
+    cnmlBaseOp_t squeeze2_op;
+    CNML_CALL(cnmlCreateNdReshapeOpParam(
+        &param, xshape_dims.data(), xshape_dims.size()));
+    CNML_CALL(cnmlCreateReshapeOp(&squeeze2_op,
+                                  param,
+                                  input_tensor->mlu_tensor(),
+                                  xshape_tensor->mlu_tensor()));
+    CNML_CALL(cnmlDestroyReshapeOpParam(&param));
+    graph->FuseOp(squeeze2_op);
+    CNML_CALL(cnmlDestroyBaseOp(&squeeze2_op));
+  }
+  return SUCCESS;
+}
+}  // namespace mlu
+}  // namespace subgraph
+}  // namespace lite
+}  // namespace paddle
+REGISTER_SUBGRAPH_BRIDGE(squeeze,
+                         kMLU,
+                         paddle::lite::subgraph::mlu::SqueezeConverter);
+REGISTER_SUBGRAPH_BRIDGE(squeeze2,
+                         kMLU,
+                         paddle::lite::subgraph::mlu::SqueezeConverter);
--- a/lite/kernels/mlu/bridges/squeeze_op_test.cc
+++ b/lite/kernels/mlu/bridges/squeeze_op_test.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "lite/operators/squeeze_op.h"
+#include <gtest/gtest.h>
+#include <memory>
+#include <utility>
+#include <vector>
+#include "lite/core/op_registry.h"
+#include "lite/kernels/mlu/bridges/test_helper.h"
+#include "lite/kernels/npu/bridges/registry.h"
+namespace paddle {
+namespace lite {
+namespace subgraph {
+namespace mlu {
+// squeeze
+TEST(MLUBridges, squeeze) {
+  Scope scope;
+  std::string x_var_name("x");
+  std::string out_var_name("out");
+  std::string ref_var_name("ref");
+  auto* x = scope.Var(x_var_name)->GetMutable<Tensor>();
+  auto* out = scope.Var(out_var_name)->GetMutable<Tensor>();
+  auto* out_ref = scope.Var(ref_var_name)->GetMutable<Tensor>();
+  std::vector<int64_t> x_shape({1, 3, 1, 5});
+  x->Resize(x_shape);
+  out_ref->Resize(x_shape);
+  std::vector<int64_t> out_shape({3, 5});
+  out->Resize(out_shape);
+  FillTensor<float>(x, 0, 10);
+  out_ref->CopyDataFrom(*x);
+  // SqueezeCompute squeeze;
+  cpp::OpDesc opdesc;
+  opdesc.SetType("squeeze");
+  opdesc.SetInput("X", {x_var_name});
+  opdesc.SetOutput("Out", {out_var_name});
+  std::vector<int> axes{0, -2};
+  opdesc.SetAttr("axes", axes);
+  // create and convert op to MLU model, then run it on MLU
+  auto op = CreateOp<operators::SqueezeOp>(opdesc, &scope);
+  LaunchOp(op, {x_var_name}, {out_var_name});
+  auto x_data = out_ref->data<float>();
+  auto out_data = out->data<float>();
+  for (int j = 0; j < out->numel(); ++j) {
+    EXPECT_NEAR(out_data[j], x_data[j], 1e-5);
+  }
+}
+// squeeze2
+TEST(MLUBridges, squeeze2) {
+  Scope scope;
+  std::string x_var_name("x");
+  std::string out_var_name("out");
+  std::string xshape_var_name("xshape");
+  std::string ref_var_name("ref");
+  auto* x = scope.Var(x_var_name)->GetMutable<Tensor>();
+  auto* out = scope.Var(out_var_name)->GetMutable<Tensor>();
+  auto* xshape = scope.Var(xshape_var_name)->GetMutable<Tensor>();
+  auto* out_ref = scope.Var(ref_var_name)->GetMutable<Tensor>();
+  std::vector<int64_t> x_shape({1, 3, 1, 5});
+  x->Resize(x_shape);
+  out_ref->Resize(x_shape);
+  std::vector<int64_t> out_shape({3, 5});
+  out->Resize(out_shape);
+  std::vector<int64_t> xshape_shape({1, 3, 1, 5});
+  xshape->Resize(xshape_shape);
+  FillTensor<float>(x, 0, 10);
+  out_ref->CopyDataFrom(*x);
+  // Squeeze2Compute squeeze2;
+  cpp::OpDesc opdesc;
+  opdesc.SetType("squeeze2");
+  opdesc.SetInput("X", {x_var_name});
+  opdesc.SetOutput("Out", {out_var_name});
+  opdesc.SetOutput("XShape", {xshape_var_name});
+  std::vector<int> axes({0, -2});
+  opdesc.SetAttr("axes", axes);
+  // create and convert op to MLU model, then run it on MLU
+  auto op = CreateOp<operators::SqueezeOp>(opdesc, &scope);
+  LaunchOp(op, {x_var_name}, {out_var_name, xshape_var_name});
+  auto x_data = out_ref->mutable_data<float>();
+  auto out_data = out->mutable_data<float>();
+  auto xshape_data = xshape->mutable_data<float>();
+  for (int j = 0; j < out->numel(); ++j) {
+    EXPECT_NEAR(out_data[j], x_data[j], 1e-5);
+    EXPECT_NEAR(xshape_data[j], x_data[j], 1e-5);
+  }
+}
+}  // namespace mlu
+}  // namespace subgraph
+}  // namespace lite
+}  // namespace paddle
+USE_SUBGRAPH_BRIDGE(squeeze, kMLU);
+USE_SUBGRAPH_BRIDGE(squeeze2, kMLU);
--- a/lite/kernels/mlu/bridges/utility.h
+++ b/lite/kernels/mlu/bridges/utility.h
@@ -103,14 +103,44 @@ inline const ::paddle::lite::DDimLite DimNCHW2NHWC(
      std::vector<int64_t>({dim[0], dim[2], dim[3], dim[1]}));
 }
-inline const std::vector<int64_t> DimNHWC2NCHW(
+inline const std::vector<DDimLite::value_type> DimNHWC2NCHW(
-    const std::vector<int64_t>& dim) {
+    const std::vector<DDimLite::value_type>& dim) {
-  return std::vector<int64_t>({dim[0], dim[3], dim[1], dim[2]});
+  switch (dim.size()) {
+    case 1:
+      return dim;
+    case 2:
+      return dim;
+    case 3:
+      return std::vector<DDimLite::value_type>({dim[0], dim[2], dim[1]});
+    case 4:
+      return std::vector<DDimLite::value_type>(
+          {dim[0], dim[3], dim[1], dim[2]});
+    case 5:
+      return std::vector<DDimLite::value_type>(
+          {dim[0], dim[4], dim[1], dim[2], dim[3]});
+    default:
+      CHECK(0) << "unsupport dimension";
+  }
 }
-inline const std::vector<int64_t> DimNCHW2NHWC(
+inline const std::vector<DDimLite::value_type> DimNCHW2NHWC(
-    const std::vector<int64_t>& dim) {
+    const std::vector<DDimLite::value_type>& dim) {
-  return std::vector<int64_t>({dim[0], dim[2], dim[3], dim[1]});
+  switch (dim.size()) {
+    case 1:
+      return dim;
+    case 2:
+      return dim;
+    case 3:
+      return std::vector<DDimLite::value_type>({dim[0], dim[2], dim[1]});
+    case 4:
+      return std::vector<DDimLite::value_type>(
+          {dim[0], dim[2], dim[3], dim[1]});
+    case 5:
+      return std::vector<DDimLite::value_type>(
+          {dim[0], dim[2], dim[3], dim[4], dim[1]});
+    default:
+      CHECK(0) << "unsupport dimension";
+  }
 }
 template <paddle::lite_api::PrecisionType>