fc_op.cc 4.5 KB
Newer Older
Y
Yan Chunwei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

15
#include "lite/backends/npu/builder.h"
Z
zhupengyang 已提交
16
#include "lite/kernels/npu/bridges/registry.h"
Y
Yan Chunwei 已提交
17 18 19

namespace paddle {
namespace lite {
Z
zhupengyang 已提交
20
namespace kernels {
Y
Yan Chunwei 已提交
21
namespace npu {
Z
zhupengyang 已提交
22
namespace bridges {
Y
Yan Chunwei 已提交
23 24 25

node_map_type FCConverter(const std::shared_ptr<lite::OpLite> fc_op,
                          const node_map_type& inputs_map) {
26 27 28 29
  auto scope = fc_op->scope();
  auto op_info = fc_op->op_info();
  auto op_type = op_info->Type();
  auto unique_op_type = lite::npu::UniqueName(op_type);
30
  LOG(INFO) << "[NPU] Converting " + op_type + "...";
31 32

  auto fc_node = std::make_shared<ge::op::FullConnection>(unique_op_type);
Y
Yan Chunwei 已提交
33 34 35 36 37

  auto x_var_name = op_info->Input("Input").front();
  auto w_var_name = op_info->Input("W").front();

  int in_num_col_dims = op_info->GetAttr<int>("in_num_col_dims");
38 39 40 41
  auto x = scope->FindVar(x_var_name)->GetMutable<lite::Tensor>();
  auto w = scope->FindVar(w_var_name)->GetMutable<lite::Tensor>();
  auto x_dims = x->dims();
  auto w_dims = w->dims();
Y
Yan Chunwei 已提交
42 43 44 45 46 47 48

  CHECK_GE(x_dims.size(), 2UL);
  CHECK_EQ(w_dims.size(), 2UL);

  int m = x_dims.Slice(0, in_num_col_dims).production();
  int k = x_dims.Slice(in_num_col_dims, x_dims.size()).production();
  int n = w_dims[1];
49
  CHECK_EQ(k * n, w_dims.production());
50
  VLOG(3) << "[NPU] x dims: " << x_dims << " w dims: " << w_dims << " m: " << m
51
          << " k: " << k << " n: " << n;
Y
Yan Chunwei 已提交
52 53 54 55

  CHECK(inputs_map.count(x_var_name));
  CHECK(!inputs_map.count(w_var_name));

56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
  // reshape x to (m, k, 1, 1)
  auto reshaped_x_node =
      std::make_shared<ge::op::Reshape>(x_var_name + "_reshape");
  reshaped_x_node->set_input_tensor(*inputs_map.at(x_var_name));
  reshaped_x_node->set_attr_shape({m, k, 1, 1});
  reshaped_x_node->set_attr_axis(0);
  fc_node->set_input_x(*reshaped_x_node);
  lite::npu::OpList::Global().add(inputs_map.at(x_var_name));
  lite::npu::OpList::Global().add(reshaped_x_node);

  // create w const node, set its shape to (k, n, 1, 1) and fill with
  // the transposed w tensor
  auto w_const_node = std::make_shared<ge::op::Const>(w_var_name);
  ge::TensorDesc w_const_desc(
      ge::Shape({n, k, 1, 1}), ge::FORMAT_NCHW, ge::DT_FLOAT);
  ge::TensorPtr w_const_tensor = std::make_shared<ge::Tensor>();
  w_const_tensor->SetTensorDesc(w_const_desc);
  auto w_data = w->mutable_data<float>();
  std::vector<float> transposed_w_data(w_dims.production());
  for (int i = 0; i < k; i++) {
    for (int j = 0; j < n; j++) {
      transposed_w_data[j * k + i] = w_data[i * n + j];
    }
  }
  w_const_tensor->SetData(reinterpret_cast<uint8_t*>(transposed_w_data.data()),
                          transposed_w_data.size() * sizeof(float));
  w_const_node->set_attr_value(w_const_tensor);
  fc_node->set_input_w(*w_const_node);
  lite::npu::OpList::Global().add(w_const_node);
Y
Yan Chunwei 已提交
85

86
  // add bias node if bias tensor exists
87
  if (lite::npu::HasInputArg(op_info, scope, "Bias")) {
88 89 90 91 92 93 94
    auto bias_var_name = op_info->Input("Bias").front();
    auto bias = scope->FindVar(bias_var_name)->GetMutable<lite::Tensor>();
    auto bias_dims = bias->dims();
    CHECK(!inputs_map.count(bias_var_name));
    CHECK_EQ(bias_dims.production(), n);

    auto bias_const_node = std::make_shared<ge::op::Const>(bias_var_name);
95
    bias_const_node->set_attr_value(lite::npu::CvtTensor(bias, {1, n, 1, 1}));
96 97
    fc_node->set_input_b(*bias_const_node);
    lite::npu::OpList::Global().add(bias_const_node);
Y
Yan Chunwei 已提交
98
  }
99
  lite::npu::OpList::Global().add(fc_node);
Y
Yan Chunwei 已提交
100

101 102 103 104 105 106 107
  // reshape output of fc_node from (m, n, 1, 1) to (m, n)
  auto reshaped_fc_node =
      std::make_shared<ge::op::Reshape>(unique_op_type + "_reshape");
  reshaped_fc_node->set_input_tensor(*fc_node);
  reshaped_fc_node->set_attr_shape({m, n});
  reshaped_fc_node->set_attr_axis(0);
  lite::npu::OpList::Global().add(reshaped_fc_node);
Y
Yan Chunwei 已提交
108 109

  node_map_type outputs_map;
110
  outputs_map[op_info->Output("Out").front()] = reshaped_fc_node;
Y
Yan Chunwei 已提交
111 112 113
  return outputs_map;
}

Z
zhupengyang 已提交
114
}  // namespace bridges
Y
Yan Chunwei 已提交
115
}  // namespace npu
Z
zhupengyang 已提交
116
}  // namespace kernels
Y
Yan Chunwei 已提交
117 118 119
}  // namespace lite
}  // namespace paddle

Z
zhupengyang 已提交
120
REGISTER_NPU_BRIDGE(fc, paddle::lite::kernels::npu::bridges::FCConverter);