subgraph_compute.cc 7.7 KB
Newer Older
H
hong19860320 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/kernels/apu/subgraph_compute.h"
#include <dlfcn.h>
#include <sys/time.h>
#include <time.h>
#include <utility>
#include "lite/backends/apu/device.h"
#include "lite/core/op_registry.h"
#include "lite/kernels/apu/bridges/graph.h"
#include "lite/kernels/apu/bridges/paddle_use_bridges.h"
#include "lite/kernels/apu/bridges/utility.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace apu {

31
bool SubgraphEngine::BuildDeviceProgram() {
32 33 34 35
  if (!origin_program_) {
    BuildOriginProgram();
  }

36 37 38 39 40 41 42 43
  auto GetCurrentUS = []() -> double {
    struct timeval time;
    gettimeofday(&time, NULL);
    return 1e+6 * time.tv_sec + time.tv_usec;
  };

  auto start_time = GetCurrentUS();

H
hong19860320 已提交
44
  unsigned int version;
45
  Neuron_getVersion(&version);
H
hong19860320 已提交
46 47 48 49
  VLOG(3) << "Neuron Adapter version: " << version;

  int status = 0;
  subgraph::apu::Graph graph;
50
  int neuron_errCode = NeuronModel_create(&model_);
H
hong19860320 已提交
51
  if (NEURON_NO_ERROR != neuron_errCode) {
52
    LOG(WARNING) << "[APU] Failed to create the neuron model!";
53
    return false;
H
hong19860320 已提交
54 55 56 57 58 59 60
  }
  graph.set_model(model_);
  graph.set_input_names(input_names_);
  graph.set_output_names(output_names_);

  // Convert all of ops and their input vars and weights and added into the APU
  // NIR graph
61
  const auto& bridges = subgraph::SubgraphBridgeRegistry::Instance();
62 63
  const auto& insts = origin_program_->instructions(kRootBlockIdx);
  for (auto& inst : insts) {
H
hong19860320 已提交
64 65 66 67 68 69
    auto op = const_cast<OpLite*>(inst.op());
    CHECK(op);
    op->CheckShape();
    op->InferShape();
    std::string op_type = op->op_info()->Type();
    if (!bridges.Exists(op_type, TARGET(kAPU))) {
70
      return false;
H
hong19860320 已提交
71 72 73 74 75 76 77 78
    }

    auto kernel = inst.kernel();
    status |=
        bridges.Select(op_type, TARGET(kAPU))(reinterpret_cast<void*>(&graph),
                                              const_cast<OpLite*>(op),
                                              const_cast<KernelBase*>(kernel));
    if (subgraph::CHECK_FAILED(status)) {
79
      return false;
H
hong19860320 已提交
80 81 82
    }
  }

83 84
  // Get the index of input tensors
  std::vector<uint32_t> input_indices;
H
hong19860320 已提交
85
  for (int i = 0; i < input_names_.size(); i++) {
86 87 88 89 90 91
    CHECK(graph.Has(input_names_[i])) << "[APU] Failed to find input node "
                                      << input_names_[i];
    auto index = graph.Get(input_names_[i])->index();
    input_indices.push_back(index);
    VLOG(3) << "[APU] Input[" << i << "] name " << input_names_[i] << " dims "
            << origin_itensors_[i]->dims() << " index " << index;
H
hong19860320 已提交
92 93
  }

94 95
  // Get the index of output tensors
  std::vector<uint32_t> output_indices;
H
hong19860320 已提交
96
  for (int i = 0; i < output_names_.size(); i++) {
97 98
    CHECK(graph.Has(output_names_[i])) << "[APU] Failed to find output node "
                                       << output_names_[i];
H
hong19860320 已提交
99
    origin_otensors_[i]->mutable_data<int8_t>();
100 101 102 103
    auto index = graph.Get(output_names_[i])->index();
    output_indices.push_back(index);
    VLOG(3) << "[APU] Output[" << i << "] name " << output_names_[i] << " dims "
            << origin_otensors_[i]->dims() << " index " << index;
H
hong19860320 已提交
104 105
  }

106 107 108 109 110 111
  // Indentify the input and output tensors of the neuron model
  NeuronModel_identifyInputsAndOutputs(model_,
                                       input_indices.size(),
                                       &input_indices[0],
                                       output_indices.size(),
                                       &output_indices[0]);
112
  neuron_errCode = NeuronModel_finish(model_);
H
hong19860320 已提交
113
  if (NEURON_NO_ERROR != neuron_errCode) {
114
    LOG(WARNING) << "[APU] Fail to create NIR model:" << neuron_errCode;
115
    return false;
H
hong19860320 已提交
116 117 118
  }
  VLOG(3) << "[APU] APU NIR model created!";

119 120 121 122
  VLOG(1) << "[APU] APU NIR model created, Create cost "
          << GetCurrentUS() - start_time << " us";

  start_time = GetCurrentUS();
123
  compilation_ = lite::apu::Device::Global().Build(model_);
H
hong19860320 已提交
124 125
  if (compilation_ == nullptr) {
    LOG(WARNING) << "[APU] Build APU DLA model failed!";
126
    return false;
H
hong19860320 已提交
127
  }
128
  VLOG(1) << "[APU] APU DLA model created, Build cost "
H
hong19860320 已提交
129
          << GetCurrentUS() - start_time << " us";
130
  return true;
H
hong19860320 已提交
131 132
}

133
bool SubgraphEngine::LaunchDeviceProgram() {
H
hong19860320 已提交
134 135 136 137 138 139 140
  auto GetCurrentUS = []() -> double {
    struct timeval time;
    gettimeofday(&time, NULL);
    return 1e+6 * time.tv_sec + time.tv_usec;
  };

  auto start_time = GetCurrentUS();
141 142
  NeuronExecution* run = NULL;
  int neuron_errCode = NeuronExecution_create(compilation_, &run);
H
hong19860320 已提交
143 144
  if (NEURON_NO_ERROR != neuron_errCode) {
    LOG(WARNING) << "[APU] Build APU runtime failed!";
145
    return false;
H
hong19860320 已提交
146 147 148 149
  }

  // Set input buffer
  for (size_t i = 0; i < origin_itensors_.size(); i++) {
150 151
    auto origin_data = origin_itensors_[i]->mutable_data<int8_t>();
    auto converted_data = reinterpret_cast<uint8_t*>(origin_data);
H
hong19860320 已提交
152
    for (int j = 0; j < origin_itensors_[i]->data_size(); j++) {
153 154
      converted_data[j] =
          static_cast<uint8_t>(static_cast<int16_t>(origin_data[j]) + 128);
H
hong19860320 已提交
155
    }
156
    NeuronExecution_setInput(
157
        run, i, NULL, converted_data, origin_itensors_[i]->memory_size());
H
hong19860320 已提交
158 159 160 161
  }

  // Set output buffer
  for (size_t i = 0; i < origin_otensors_.size(); i++) {
162 163
    NeuronExecution_setOutput(
        run,
H
hong19860320 已提交
164 165 166 167 168 169
        i,
        NULL,
        reinterpret_cast<void*>(origin_otensors_[i]->raw_data()),
        origin_otensors_[i]->memory_size());
  }

170
  neuron_errCode = NeuronExecution_compute(run);
H
hong19860320 已提交
171 172
  if (NEURON_NO_ERROR != neuron_errCode) {
    LOG(WARNING) << "Fail to run execution!" << neuron_errCode;
173
    return false;
H
hong19860320 已提交
174 175 176
  }

  for (size_t i = 0; i < origin_otensors_.size(); i++) {
177 178
    auto converted_data = origin_otensors_[i]->mutable_data<int8_t>();
    auto origin_data = reinterpret_cast<uint8_t*>(converted_data);
H
hong19860320 已提交
179
    for (int j = 0; j < origin_otensors_[i]->data_size(); j++) {
180 181
      converted_data[j] =
          static_cast<int8_t>(static_cast<int16_t>(origin_data[j]) - 128);
H
hong19860320 已提交
182 183
    }
  }
184
  NeuronExecution_free(run);
185
  VLOG(1) << "[APU] Process cost " << GetCurrentUS() - start_time << " us";
186
  return true;
H
hong19860320 已提交
187 188
}

189 190 191 192 193 194 195 196 197
SubgraphEngine::~SubgraphEngine() {
  if (compilation_) {
    NeuronCompilation_free(compilation_);
  }
  if (model_) {
    NeuronModel_free(model_);
  }
}

H
hong19860320 已提交
198 199 200
void SubgraphCompute::PrepareForRun() {
  auto& param = this->Param<param_t>();
  engine_.reset(new SubgraphEngine(ctx_.get(),
201 202 203
                                   param.block_idx,
                                   param.program_desc,
                                   param.exec_scope,
H
hong19860320 已提交
204
                                   param.input_data_names,
205
                                   param.output_data_names));
H
hong19860320 已提交
206 207 208 209 210
  CHECK(engine_);
}

void SubgraphCompute::Run() {
  CHECK(engine_);
211
  engine_->Run();
H
hong19860320 已提交
212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
}

}  // namespace apu
}  // namespace kernels
}  // namespace lite
}  // namespace paddle

REGISTER_LITE_KERNEL(subgraph,
                     kAPU,
                     kInt8,
                     kNCHW,
                     paddle::lite::kernels::apu::SubgraphCompute,
                     def)
    .BindInput("Inputs",
               {LiteType::GetTensorTy(TARGET(kHost),
                                      PRECISION(kInt8),
                                      DATALAYOUT(kNCHW))})
    .BindOutput("Outputs",
                {LiteType::GetTensorTy(TARGET(kHost),
                                       PRECISION(kInt8),
                                       DATALAYOUT(kNCHW))})
    .Finalize();