add transpose op

df401f07 · pmshst · jackzhang235 · 6ed30885 · df401f07 · df401f07
4 changed file
--- a/lite/kernels/mlu/bridges/CMakeLists.txt
+++ b/lite/kernels/mlu/bridges/CMakeLists.txt
@@ -18,6 +18,7 @@ lite_cc_library(subgraph_bridge_fc_op_mlu SRCS fc_op.cc DEPS ${subgraph_bridge_d
 lite_cc_library(subgraph_bridge_scale_op_mlu SRCS scale_op.cc DEPS ${subgraph_bridge_deps_mlu})
 lite_cc_library(subgraph_bridge_interp_op_mlu SRCS interpolate_op.cc DEPS ${subgraph_bridge_deps_mlu})
 lite_cc_library(subgraph_bridge_concat_op_mlu SRCS concat_op.cc DEPS ${subgraph_bridge_deps_mlu})
+lite_cc_library(subgraph_bridge_transpose_op_mlu SRCS transpose_op.cc DEPS ${subgraph_bridge_deps_mlu})
 set(mlu_subgraph_bridges
        subgraph_bridge_registry
        subgraph_bridge_utility_mlu
@@ -28,6 +29,7 @@ set(mlu_subgraph_bridges
        subgraph_bridge_pool_op_mlu
        subgraph_bridge_softmax_op_mlu
        subgraph_bridge_fc_op_mlu
+        subgraph_bridge_transpose_op_mlu
        subgraph_bridge_batch_norm_op_mlu
        subgraph_bridge_scale_op_mlu
        subgraph_bridge_interp_op_mlu
@@ -45,5 +47,5 @@ lite_cc_test(test_fc_converter_mlu SRCS fc_op_test.cc DEPS scope optimizer targe
 lite_cc_test(test_scale_converter_mlu SRCS scale_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
 lite_cc_test(test_interp_converter_mlu SRCS interpolate_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
 lite_cc_test(test_concat_converter_mlu SRCS concat_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
-
+lite_cc_test(test_transpose_converter_mlu SRCS transpose_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
 message(STATUS "+++++ mlu_subgraph_bridges: ${mlu_subgraph_bridges}")
--- a/lite/kernels/mlu/bridges/paddle_use_bridges.h
+++ b/lite/kernels/mlu/bridges/paddle_use_bridges.h
@@ -24,3 +24,5 @@ USE_SUBGRAPH_BRIDGE(batch_norm, kMLU);
 USE_SUBGRAPH_BRIDGE(fc, kMLU);
 USE_SUBGRAPH_BRIDGE(nearest_interp, kMLU);
 USE_SUBGRAPH_BRIDGE(leaky_relu, kMLU);
+USE_SUBGRAPH_BRIDGE(transpose, kMLU);
+USE_SUBGRAPH_BRIDGE(transpose2, kMLU);
--- a/lite/kernels/mlu/bridges/transpose_op.cc
+++ b/lite/kernels/mlu/bridges/transpose_op.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/mlu/bridges/graph.h"
+#include "lite/kernels/mlu/bridges/utility.h"
+#include "lite/kernels/npu/bridges/registry.h"
+
+namespace paddle {
+namespace lite {
+namespace subgraph {
+namespace mlu {
+
+std::vector<int> axis_to_4d(std::vector<int> axis) {
+  if (axis.size() >= 4) {
+    return axis;
+  }
+  std::vector<int> new_axis = {0, 1, 2, 3};
+  int i = 0;
+  for (i = 0; i < axis.size(); i++) {
+    new_axis[i] = axis[i];
+  }
+  return new_axis;
+}
+
+int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
+  CHECK(ctx != nullptr);
+  CHECK(op != nullptr);
+  auto graph = static_cast<Graph*>(ctx);
+  auto op_info = op->op_info();
+  auto op_type = op_info->Type();
+  auto scope = op->scope();
+  VLOG(3) << "[MLU] Converting " + op_type + "...";
+
+  // Get input vars and op attributes
+  auto x_var_name = op_info->Input("X").front();
+  // auto x = scope->FindMutableTensor(x_var_name)->GetMutable<Tensor>();
+  // auto x_dims = x->dims();
+
+  auto out_var_name = op_info->Output("Out").front();
+  auto output = scope->FindVar(out_var_name)->GetMutable<Tensor>();
+  auto output_dims = output->dims().Vectorize();
+
+  auto axis = op_info->GetAttr<std::vector<int>>("axis");
+  auto axis_4d = axis_to_4d(axis);
+  auto output_tensor = graph->AddNode(
+      out_var_name, output_dims, CNML_TENSOR, CNML_NHWC, graph->FPType());
+
+  CHECK(graph->HasNode(x_var_name));
+  auto input_tensor = graph->GetNode(x_var_name);
+  cnmlBaseOp_t transpose_op_{nullptr};
+
+  cnmlNdTransposeOpParam_t transpose_param{nullptr};
+
+  CNML_CALL(cnmlCreateNdTransposeOpParam(
+      &transpose_param, axis_4d.data(), axis_4d.size()));
+
+  // Use cnmlCreatexxxOpForward to create op.
+  CNML_CALL(cnmlCreateNdTransposeProOp(&transpose_op_,
+                                       input_tensor->mlu_tensor(),
+                                       output_tensor->mlu_tensor(),
+                                       transpose_param));
+
+  graph->FuseOp(transpose_op_);
+  return SUCCESS;
+}
+
+}  // namespace mlu
+}  // namespace subgraph
+}  // namespace lite
+}  // namespace paddle
+REGISTER_SUBGRAPH_BRIDGE(transpose,
+                         kMLU,
+                         paddle::lite::subgraph::mlu::TransposeConverter);
+
+REGISTER_SUBGRAPH_BRIDGE(transpose2,
+                         kMLU,
+                         paddle::lite::subgraph::mlu::TransposeConverter);
--- a/lite/kernels/mlu/bridges/transpose_op_test.cc
+++ b/lite/kernels/mlu/bridges/transpose_op_test.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/operators/transpose_op.h"
+#include <gtest/gtest.h>
+#include "lite/core/op_registry.h"
+#include "lite/kernels/mlu/bridges/test_helper.h"
+#include "lite/kernels/npu/bridges/registry.h"
+
+namespace paddle {
+namespace lite {
+namespace subgraph {
+namespace mlu {
+
+int data_index(std::vector<int> pos, DDimLite dims) {
+  int d1 = dims[1];
+  int d2 = dims[2];
+  int d3 = dims[3];
+  return pos[3] + pos[2] * d3 + pos[1] * d3 * d2 + pos[0] * d3 * d2 * d1;
+}
+
+std::vector<int> pos_trans(std::vector<int> in_pos, std::vector<int> axis) {
+  std::vector<int> out_pos(in_pos.size());
+  for (int i = 0; i < axis.size(); i++) {
+    out_pos[axis[i]] = in_pos[i];
+  }
+  return out_pos;
+}
+
+template <typename dtype>
+void transpose_ref(const std::shared_ptr<operators::TransposeOp> op) {
+  Scope* scope = op->scope();
+  const OpInfo* op_info = op->op_info();
+
+  auto input =
+      scope->FindVar(op_info->Input("X").front())->GetMutable<Tensor>();
+  auto output =
+      scope->FindVar(op_info->Output("Out").front())->GetMutable<Tensor>();
+  auto x_dims = input->dims();
+  auto y_dims = output->dims();
+  auto axis = op_info->GetAttr<std::vector<int>>("axis");
+
+  // auto input_data = input->data<dtype>();
+  auto* input_data = input->mutable_data<dtype>();
+  auto* output_data = output->mutable_data<dtype>();
+
+  int input_n = x_dims[0];
+  int input_c = x_dims[1];
+  int input_h = x_dims[2];
+  int input_w = x_dims[3];
+
+  for (int n = 0; n < input_n; ++n) {
+    for (int c = 0; c < input_c; ++c) {
+      for (int h = 0; h < input_h; ++h) {
+        for (int w = 0; w < input_w; ++w) {
+          std::vector<int> in_pos{n, c, h, w};
+          std::vector<int> out_pos = pos_trans(in_pos, axis);
+          int in_index = data_index(in_pos, x_dims);
+          int out_index = data_index(out_pos, y_dims);
+          output_data[out_index] = input_data[in_index];
+        }
+      }
+    }
+  }
+}
+
+void test_transpose(const std::vector<int64_t>& input_shape,
+                    std::vector<int> axis) {
+  // prepare input&output variables
+  Scope scope;
+  std::string x_var_name = "x";
+  std::string out_var_name = "out";
+  std::string out_ref_var_name = "out_ref";
+  auto* x = scope.Var(x_var_name)->GetMutable<Tensor>();
+  auto* out = scope.Var(out_var_name)->GetMutable<Tensor>();
+  auto* out_ref = scope.Var(out_ref_var_name)->GetMutable<Tensor>();
+  x->Resize(input_shape);
+
+  // initialize input&output data
+  FillTensor<float>(x);
+
+  // initialize op desc
+  cpp::OpDesc opdesc;
+  opdesc.SetType("transpose");
+  opdesc.SetInput("X", {x_var_name});
+  opdesc.SetOutput("Out", {out_var_name});
+  opdesc.SetAttr("axis", axis);
+
+  // create and convert op to MLU model, then run it on MLU
+  auto op = CreateOp<operators::TransposeOp>(opdesc, &scope);
+
+  // transpose_ref  must run befor LaunchOp
+  // otherwise get Cannot access memory
+  // execute reference implementation and save to output tensor
+  transpose_ref<float>(op);
+  out_ref->CopyDataFrom(*out);
+
+  LaunchOp(op, {x_var_name}, {out_var_name});
+  // compare results
+  auto* out_data = out->mutable_data<float>();
+  auto* out_ref_data = out_ref->mutable_data<float>();
+  for (int i = 0; i < out->dims().production(); i++) {
+    EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-2);
+  }
+}
+
+TEST(MLUBridges, transpose) {
+  std::vector<int64_t> input_shape = {2, 3, 4, 5};
+  test_transpose(input_shape, std::vector<int>{0, 1, 3, 2});
+}
+
+}  // namespace mlu
+}  // namespace subgraph
+}  // namespace lite
+}  // namespace paddle
+
+USE_SUBGRAPH_BRIDGE(transpose, kMLU);
+USE_SUBGRAPH_BRIDGE(transpose2, kMLU);