(new feat): add flatten & reshape

(ref): insert trans before and after transpose

(new feat): add flatten & reshape
(ref): insert trans before and after transpose
99b7f238 · --get · MaxwellDing · 6c405fca · 99b7f238 · 99b7f238
5 changed file
--- a/lite/kernels/mlu/bridges/CMakeLists.txt
+++ b/lite/kernels/mlu/bridges/CMakeLists.txt
@@ -26,6 +26,8 @@ lite_cc_library(subgraph_bridge_cast_op_mlu SRCS cast_op.cc DEPS ${subgraph_brid
 lite_cc_library(subgraph_bridge_layout_op_mlu SRCS layout_op.cc DEPS ${subgraph_bridge_deps_mlu})
 lite_cc_library(subgraph_bridge_argmax_op_mlu SRCS argmax_op.cc DEPS ${subgraph_bridge_deps_mlu})
 lite_cc_library(subgraph_bridge_squeeze_op_mlu SRCS squeeze_op.cc DEPS ${subgraph_bridge_deps_mlu})
+lite_cc_library(subgraph_bridge_reshape_op_mlu SRCS reshape_op.cc DEPS ${subgraph_bridge_deps_mlu})
+lite_cc_library(subgraph_bridge_flatten_op_mlu SRCS flatten_op.cc DEPS ${subgraph_bridge_deps_mlu})
 set(mlu_subgraph_bridges
        subgraph_bridge_registry
        subgraph_bridge_utility_mlu
@@ -48,6 +50,8 @@ set(mlu_subgraph_bridges
        subgraph_bridge_layout_op_mlu
        subgraph_bridge_argmax_op_mlu
        subgraph_bridge_squeeze_op_mlu
+        subgraph_bridge_reshape_op_mlu
+        subgraph_bridge_flatten_op_mlu
        CACHE INTERNAL "mlu_subgraph_bridges")


@@ -77,6 +81,8 @@ lite_cc_test(test_layout_converter_mlu SRCS layout_op_test.cc DEPS scope optimiz
 lite_cc_test(test_cast_converter_mlu SRCS cast_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
 lite_cc_test(test_argmax_converter_mlu SRCS argmax_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
 lite_cc_test(test_squeeze_converter_mlu SRCS squeeze_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
+lite_cc_test(test_reshape_converter_mlu SRCS reshape_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
+lite_cc_test(test_flatten_converter_mlu SRCS flatten_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
 if (LITE_BUILD_EXTRA)
  lite_cc_test(test_lrn_converter_mlu SRCS lrn_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
  lite_cc_test(test_gather_converter_mlu SRCS gather_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)

--- a/lite/kernels/mlu/bridges/flatten_op.cc
+++ b/lite/kernels/mlu/bridges/flatten_op.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/mlu/bridges/graph.h"
+#include "lite/kernels/mlu/bridges/utility.h"
+#include "lite/kernels/npu/bridges/registry.h"
+
+namespace paddle {
+namespace lite {
+namespace subgraph {
+namespace mlu {
+
+int FlattenConverter(void* ctx, OpLite* op, KernelBase* kernel) {
+  CHECK(ctx != nullptr);
+  CHECK(op != nullptr);
+  auto graph = static_cast<Graph*>(ctx);
+  auto op_info = op->op_info();
+  auto op_type = op_info->Type();
+  auto scope = op->scope();
+  VLOG(3) << "[MLU] Converting " + op_type + "...";
+
+  auto x_var_name = op_info->Input("X").front();
+  auto out_var_name = op_info->Output("Out").front();
+  auto x = scope->FindVar(x_var_name)->GetMutable<Tensor>();
+  auto output = scope->FindVar(out_var_name)->GetMutable<Tensor>();
+  auto output_dims = output->dims().Vectorize();
+
+  // ================== Trans1: NHWC => NCHW ===========================
+  auto input_tensor = graph->GetNode(x_var_name);
+  std::vector<int> nhwc_to_nchw_axis = {0, 3, 1, 2};
+  auto trans1_out = graph->AddNode(x_var_name + ".trans.i",
+                                   x->dims().Vectorize(),
+                                   CNML_TENSOR,
+                                   CNML_NHWC,
+                                   graph->FPType());
+  cnmlBaseOp_t trans1_op{nullptr};
+  cnmlNdTransposeOpParam_t trans1_param{nullptr};
+  CNML_CALL(cnmlCreateNdTransposeOpParam(
+      &trans1_param, nhwc_to_nchw_axis.data(), nhwc_to_nchw_axis.size()));
+  CNML_CALL(cnmlCreateNdTransposeProOp(&trans1_op,
+                                       input_tensor->mlu_tensor(),
+                                       trans1_out->mlu_tensor(),
+                                       trans1_param));
+  // ======================== Trans1 End ==================================
+
+  // ======================= Flatten op ===================================
+  cnmlBaseOp_t flatten_op;
+  auto trans2_input = graph->AddNode(out_var_name + ".trans.o",
+                                     output_dims,
+                                     CNML_TENSOR,
+                                     CNML_NHWC,
+                                     graph->FPType());
+  int cnml_trans2_input_shape[4];
+  CNML_CALL(
+      cnmlGetTensorShape(trans2_input->mlu_tensor(), cnml_trans2_input_shape));
+  cnmlReshapeOpParam_t reshape_param{nullptr};
+  CNML_CALL(
+      cnmlCreateNdReshapeOpParam(&reshape_param, cnml_trans2_input_shape, 4));
+
+  // Use cnmlCreatexxxOpForward to create op.
+  CNML_CALL(cnmlCreateReshapeOp(&flatten_op,
+                                reshape_param,
+                                trans1_out->mlu_tensor(),
+                                trans2_input->mlu_tensor()));
+
+  // ======================= Flatten End ===================================
+
+  // ================== Trans2: NCHW => NHWC ===============================
+  std::vector<int> nchw_to_nhwc_axis = {0, 2, 3, 1};
+  auto output_tensor = graph->AddNode(
+      out_var_name, output_dims, CNML_TENSOR, CNML_NCHW, graph->FPType());
+  cnmlBaseOp_t trans2_op{nullptr};
+  cnmlNdTransposeOpParam_t trans2_param{nullptr};
+  CNML_CALL(cnmlCreateNdTransposeOpParam(
+      &trans2_param, nchw_to_nhwc_axis.data(), nchw_to_nhwc_axis.size()));
+  CNML_CALL(cnmlCreateNdTransposeProOp(&trans2_op,
+                                       trans2_input->mlu_tensor(),
+                                       output_tensor->mlu_tensor(),
+                                       trans2_param));
+  // ======================== Trans2 End ==================================
+
+  // ============== DEBUG LOG ===============
+
+  VLOG(6) << "x_var_name: " << x_var_name;
+  VLOG(6) << "out_var_name: " << out_var_name;
+  VLOG(6) << "input dim: " << x->dims();
+  VLOG(6) << "output dim: " << output->dims();
+  int tmp_shape[4];
+  cnmlGetTensorShape(trans1_out->mlu_tensor(), tmp_shape);
+  VLOG(6) << "trans1_out shape"
+          << ": " << tmp_shape[0] << " " << tmp_shape[1] << " " << tmp_shape[2]
+          << " " << tmp_shape[3];
+  cnmlGetTensorShape(trans2_input->mlu_tensor(), tmp_shape);
+  VLOG(6) << "trans2_input shape"
+          << ": " << tmp_shape[0] << " " << tmp_shape[1] << " " << tmp_shape[2]
+          << " " << tmp_shape[3];
+  // ============== DEBUG END ===============
+  graph->FuseOp(trans1_op);
+  graph->FuseOp(flatten_op);
+  graph->FuseOp(trans2_op);
+  CNML_CALL(cnmlDestroyBaseOp(&trans1_op));
+  CNML_CALL(cnmlDestroyBaseOp(&flatten_op));
+  CNML_CALL(cnmlDestroyBaseOp(&trans2_op));
+  return SUCCESS;
+}
+
+}  // namespace mlu
+}  // namespace subgraph
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_SUBGRAPH_BRIDGE(flatten,
+                         kMLU,
+                         paddle::lite::subgraph::mlu::FlattenConverter);
+REGISTER_SUBGRAPH_BRIDGE(flatten2,
+                         kMLU,
+                         paddle::lite::subgraph::mlu::FlattenConverter);
--- a/lite/kernels/mlu/bridges/flatten_op_test.cc
+++ b/lite/kernels/mlu/bridges/flatten_op_test.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/operators/flatten_op.h"
+
+#include <gtest/gtest.h>
+
+#include <random>
+
+#include "lite/core/op_registry.h"
+#include "lite/kernels/mlu/bridges/test_helper.h"
+#include "lite/kernels/npu/bridges/registry.h"
+
+namespace paddle {
+namespace lite {
+namespace subgraph {
+namespace mlu {
+
+void test_flatten(std::vector<int64_t> input_shape, int axis) {
+  // prepare input&output variables
+  Scope scope;
+  std::string x_var_name("x");
+  std::string out_var_name("out");
+  auto* x = scope.Var(x_var_name)->GetMutable<Tensor>();
+  auto* out = scope.Var(out_var_name)->GetMutable<Tensor>();
+  x->Resize(input_shape);
+  Tensor x_cpu;
+
+  // initialize input&output data
+  FillTensor<float, int>(x);
+  x_cpu.CopyDataFrom(*x);
+
+  Tensor input_trans;
+  input_trans.Resize(input_shape);
+  transpose(x->mutable_data<float>(),
+            input_trans.mutable_data<float>(),
+            {static_cast<int>(input_shape[0]),
+             static_cast<int>(input_shape[1]),
+             static_cast<int>(input_shape[2]),
+             static_cast<int>(input_shape[3])},
+            {0, 2, 3, 1});
+  x->CopyDataFrom(input_trans);
+
+  // initialize op desc
+  cpp::OpDesc opdesc;
+  opdesc.SetType("flatten2");
+  opdesc.SetInput("X", {x_var_name});
+  opdesc.SetOutput("Out", {out_var_name});
+  opdesc.SetAttr<int>("axis", axis);
+  auto op = CreateOp<operators::FlattenOp>(opdesc, &scope);
+
+  LaunchOp(op, {x_var_name}, {out_var_name});
+  // compare results
+  auto* out_data = out->mutable_data<float>();
+  for (int i = 0; i < out->dims().production(); i++) {
+    EXPECT_NEAR(out_data[i], x_cpu.mutable_data<float>()[i], 1e-5);
+  }
+}
+
+TEST(MLUBridges, flatten) {
+  std::vector<int64_t> input_shape = {1, 2, 4, 4};
+  int axis = 2;
+  test_flatten(input_shape, axis);
+}
+}  // namespace mlu
+}  // namespace subgraph
+}  // namespace lite
+}  // namespace paddle
+
+USE_SUBGRAPH_BRIDGE(flatten, kMLU);
+USE_SUBGRAPH_BRIDGE(flatten2, kMLU);
--- a/lite/kernels/mlu/bridges/reshape_op.cc
+++ b/lite/kernels/mlu/bridges/reshape_op.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/mlu/bridges/graph.h"
+#include "lite/kernels/mlu/bridges/utility.h"
+#include "lite/kernels/npu/bridges/registry.h"
+
+namespace paddle {
+namespace lite {
+namespace subgraph {
+namespace mlu {
+
+int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
+  CHECK(ctx != nullptr);
+  CHECK(op != nullptr);
+  auto graph = static_cast<Graph*>(ctx);
+  auto op_info = op->op_info();
+  auto op_type = op_info->Type();
+  auto scope = op->scope();
+  VLOG(3) << "[MLU] Converting " + op_type + "...";
+
+  auto x_var_name = op_info->Input("X").front();
+  auto out_var_name = op_info->Output("Out").front();
+  auto x = scope->FindVar(x_var_name)->GetMutable<Tensor>();
+  auto output = scope->FindVar(out_var_name)->GetMutable<Tensor>();
+  auto output_dims = output->dims().Vectorize();
+
+  // ================== Trans1: NHWC => NCHW ===========================
+  auto input_tensor = graph->GetNode(x_var_name);
+  std::vector<int> nhwc_to_nchw_axis = {0, 3, 1, 2};
+  auto trans1_out = graph->AddNode(x_var_name + ".trans.i",
+                                   x->dims().Vectorize(),
+                                   CNML_TENSOR,
+                                   CNML_NHWC,
+                                   graph->FPType());
+  cnmlBaseOp_t trans1_op{nullptr};
+  cnmlNdTransposeOpParam_t trans1_param{nullptr};
+  CNML_CALL(cnmlCreateNdTransposeOpParam(
+      &trans1_param, nhwc_to_nchw_axis.data(), nhwc_to_nchw_axis.size()));
+  CNML_CALL(cnmlCreateNdTransposeProOp(&trans1_op,
+                                       input_tensor->mlu_tensor(),
+                                       trans1_out->mlu_tensor(),
+                                       trans1_param));
+  // ======================== Trans1 End ==================================
+
+  // ======================= Reshape op ===================================
+  cnmlBaseOp_t reshape_op;
+  auto trans2_input = graph->AddNode(out_var_name + ".trans.o",
+                                     output_dims,
+                                     CNML_TENSOR,
+                                     CNML_NHWC,
+                                     graph->FPType());
+  cnmlReshapeOpParam_t reshape_param{nullptr};
+  int cnml_trans2_input_shape[4];
+  CNML_CALL(
+      cnmlGetTensorShape(trans2_input->mlu_tensor(), cnml_trans2_input_shape));
+  CNML_CALL(
+      cnmlCreateNdReshapeOpParam(&reshape_param, cnml_trans2_input_shape, 4));
+
+  // Use cnmlCreatexxxOpForward to create op.
+  CNML_CALL(cnmlCreateReshapeOp(&reshape_op,
+                                reshape_param,
+                                trans1_out->mlu_tensor(),
+                                trans2_input->mlu_tensor()));
+  // ======================= Reshape op End ===================================
+
+  // ================== Trans2: NCHW => NHWC ===============================
+  std::vector<int> nchw_to_nhwc_axis = {0, 2, 3, 1};
+  auto output_tensor = graph->AddNode(
+      out_var_name, output_dims, CNML_TENSOR, CNML_NCHW, graph->FPType());
+  cnmlBaseOp_t trans2_op{nullptr};
+  cnmlNdTransposeOpParam_t trans2_param{nullptr};
+  CNML_CALL(cnmlCreateNdTransposeOpParam(
+      &trans2_param, nchw_to_nhwc_axis.data(), nchw_to_nhwc_axis.size()));
+  CNML_CALL(cnmlCreateNdTransposeProOp(&trans2_op,
+                                       trans2_input->mlu_tensor(),
+                                       output_tensor->mlu_tensor(),
+                                       trans2_param));
+  // ======================== Trans2 End ==================================
+
+  // =============== DEBUG ====================
+  VLOG(6) << "x_var_name: " << x_var_name;
+  VLOG(6) << "out_var_name: " << out_var_name;
+  VLOG(6) << "input dim: " << x->dims();
+  VLOG(6) << "output dim: " << output->dims();
+  int cnml_input_shape[4];
+  CNML_CALL(cnmlGetTensorShape(input_tensor->mlu_tensor(), cnml_input_shape));
+  VLOG(6) << "cnml input dim: ";
+  for (size_t i = 0; i < 4; i++) {
+    VLOG(6) << cnml_input_shape[i];
+  }
+  int tmp_shape[4];
+  cnmlGetTensorShape(trans1_out->mlu_tensor(), tmp_shape);
+  VLOG(6) << "trans1_out shape"
+          << ": " << tmp_shape[0] << " " << tmp_shape[1] << " " << tmp_shape[2]
+          << " " << tmp_shape[3];
+  cnmlGetTensorShape(trans2_input->mlu_tensor(), tmp_shape);
+  VLOG(6) << "trans2_input shape"
+          << ": " << tmp_shape[0] << " " << tmp_shape[1] << " " << tmp_shape[2]
+          << " " << tmp_shape[3];
+  // =============== DEBUG END =================
+
+  // CNML_CALL(cnmlCreateReshapeOp_V2(
+  //     &reshape_op,
+  //     input_tensor->mlu_tensor(),
+  //     output_tensor->mlu_tensor()));
+  graph->FuseOp(trans1_op);
+  graph->FuseOp(reshape_op);
+  graph->FuseOp(trans2_op);
+  CNML_CALL(cnmlDestroyBaseOp(&trans1_op));
+  CNML_CALL(cnmlDestroyBaseOp(&reshape_op));
+  CNML_CALL(cnmlDestroyBaseOp(&trans2_op));
+  return SUCCESS;
+}
+
+}  // namespace mlu
+}  // namespace subgraph
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_SUBGRAPH_BRIDGE(reshape,
+                         kMLU,
+                         paddle::lite::subgraph::mlu::ReshapeConverter);
+REGISTER_SUBGRAPH_BRIDGE(reshape2,
+                         kMLU,
+                         paddle::lite::subgraph::mlu::ReshapeConverter);
--- a/lite/kernels/mlu/bridges/reshape_op_test.cc
+++ b/lite/kernels/mlu/bridges/reshape_op_test.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/operators/reshape_op.h"
+
+#include <gtest/gtest.h>
+
+#include <random>
+
+#include "lite/core/op_registry.h"
+#include "lite/kernels/mlu/bridges/test_helper.h"
+#include "lite/kernels/npu/bridges/registry.h"
+
+namespace paddle {
+namespace lite {
+namespace subgraph {
+namespace mlu {
+
+void test_reshape(std::vector<int64_t> input_shape,
+                  std::vector<int64_t> out_shape) {
+  // prepare input&output variables
+  Scope scope;
+  std::string x_var_name("x");
+  std::string out_var_name("out");
+  auto* x = scope.Var(x_var_name)->GetMutable<Tensor>();
+  auto* out = scope.Var(out_var_name)->GetMutable<Tensor>();
+  x->Resize(input_shape);
+  Tensor x_cpu;
+
+  // initialize input&output data
+  FillTensor<float, int>(x);
+  x_cpu.CopyDataFrom(*x);
+
+  Tensor input_trans;
+  input_trans.Resize(input_shape);
+  transpose(x->mutable_data<float>(),
+            input_trans.mutable_data<float>(),
+            {static_cast<int>(input_shape[0]),
+             static_cast<int>(input_shape[1]),
+             static_cast<int>(input_shape[2]),
+             static_cast<int>(input_shape[3])},
+            {0, 2, 3, 1});
+  x->CopyDataFrom(input_trans);
+
+  // initialize op desc
+  cpp::OpDesc opdesc;
+  opdesc.SetType("reshape2");
+  opdesc.SetInput("X", {x_var_name});
+  opdesc.SetOutput("Out", {out_var_name});
+  std::vector<int> shape_attr;
+  shape_attr.resize(out_shape.size());
+  for (size_t i = 0; i < out_shape.size(); i++) {
+    shape_attr[i] = static_cast<int>(out_shape[i]);
+  }
+
+  opdesc.SetAttr<std::vector<int>>("shape", shape_attr);
+  auto op = CreateOp<operators::ReshapeOp>(opdesc, &scope);
+
+  auto os = out->dims();
+  out->Resize(out_shape);
+  LaunchOp(op, {x_var_name}, {out_var_name});
+
+  Tensor out_trans;
+  out_trans.Resize(out_shape);
+  transpose(out->mutable_data<float>(),
+            out_trans.mutable_data<float>(),
+            {static_cast<int>(out_shape[0]),
+             static_cast<int>(out_shape[1]),
+             static_cast<int>(out_shape[2]),
+             static_cast<int>(out_shape[3])},
+            {0, 3, 1, 2});
+  out->CopyDataFrom(out_trans);
+  // compare results
+  auto* out_data = out->mutable_data<float>();
+  for (int i = 0; i < out->dims().production(); i++) {
+    EXPECT_NEAR(out_data[i], x_cpu.mutable_data<float>()[i], 1e-5);
+  }
+}
+
+TEST(MLUBridges, reshape) {
+  std::vector<int64_t> input_shape = {1, 2, 4, 4};
+  std::vector<int64_t> out_shape = {1, 4, 2, 4};
+  test_reshape(input_shape, out_shape);
+}
+}  // namespace mlu
+}  // namespace subgraph
+}  // namespace lite
+}  // namespace paddle
+
+USE_SUBGRAPH_BRIDGE(reshape, kMLU);
+USE_SUBGRAPH_BRIDGE(reshape2, kMLU);