subgraph_compute.cc 6.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/subgraph_compute.h"
#include <sys/time.h>
#include <time.h>
#include <string>
#include <utility>
#include <vector>
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/bm/bridges/paddle_use_bridges.h"
#include "lite/kernels/bm/bridges/utility.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace bm {

31
bool SubgraphEngine::BuildDeviceProgram() {
32 33 34 35 36
  int status = 0;
  subgraph::bm::Graph graph;
  const auto& bridges = subgraph::Registry::Instance();
  graph.CreateCompilerHandle();
  auto& ctx = this->ctx_->template As<BMContext>();
37
  if (!origin_program_) {
38 39
    BuildOriginProgram();
  }
40 41
  const auto& insts = origin_program_->instructions(kRootBlockIdx);
  for (auto& inst : insts) {
42
    auto op = const_cast<OpLite*>(inst.op());
43 44 45 46
    CHECK(op);
    op->CheckShape();
    op->InferShape();
    std::string op_type = op->op_info()->Type();
47
    LOG(INFO) << op_type;
48
    if (!bridges.Exists(op_type, TARGET(kBM))) {
49
      return false;
50 51 52 53 54 55 56
    }
    auto kernel = inst.kernel();
    status |=
        bridges.Select(op_type, TARGET(kBM))(reinterpret_cast<void*>(&graph),
                                             const_cast<OpLite*>(op),
                                             const_cast<KernelBase*>(kernel));
    if (subgraph::CHECK_FAILED(status)) {
57
      return false;
58 59
    }
  }
60 61
  std::string net_name = "bmnet_f32bmodel";
  auto unique_net_name = lite::subgraph::bm::UniqueName(net_name);
62
  __bmcompile_opt(
63
      graph.GetCompilerHandle(), const_cast<char*>(unique_net_name.c_str()), 2);
64 65 66 67
  void* bmodel_data = nullptr;
  unsigned int data_size = 0;
  bm_hd_ = static_cast<bm_handle_t>(ctx.GetHandle());
  finish_bmcompiler_data(graph.GetCompilerHandle(), &bmodel_data, &data_size);
68
  graph.UnlockCompilerMutex();
69 70
  bmrt_hd_ = bmrt_create(bm_hd_);
  if (false == bmrt_load_bmodel_data(bmrt_hd_, bmodel_data, data_size)) {
71
    return false;
72 73 74 75 76 77 78
  }
  bmrt_get_network_names(bmrt_hd_, &net_names_);
  net_info_ = bmrt_get_network_info(bmrt_hd_, net_names_[0]);
  auto& stage = net_info_->stages[0];
  // input
  device_inputs_.resize(input_names_.size());
  for (size_t i = 0; i < input_names_.size(); i++) {
79 80
    origin_itensors_[i] =
        exec_scope_->FindMutableTensor(net_info_->input_names[i]);
81 82 83 84 85 86 87 88 89 90 91 92 93
    CHECK(origin_itensors_[i]);
    bm_device_mem_t* p_mem =
        static_cast<bm_device_mem_t*>(malloc(sizeof(bm_device_mem_t)));
    CHECK(p_mem != nullptr);
    CHECK_EQ(bm_malloc_device_byte(
                 bm_hd_, p_mem, origin_itensors_[i]->memory_size()),
             BM_SUCCESS);
    bmrt_tensor_with_device(&device_inputs_[i],
                            *p_mem,
                            net_info_->input_dtypes[i],
                            stage.input_shapes[i]);
  }
  // output
94 95 96 97 98 99 100
  device_outputs_.resize(net_info_->output_num);
  int out_index = 0;
  for (int i = 0; i < output_names_.size(); i++) {
    outname_map_.insert(std::pair<std::string, int>(output_names_[i], i));
  }

  for (int i = 0; i < net_info_->output_num; i++) {
101
    Tensor* t_cur = exec_scope_->FindMutableTensor(net_info_->output_names[i]);
102
    CHECK(t_cur != nullptr);
103 104 105
    bm_device_mem_t* p_mem =
        static_cast<bm_device_mem_t*>(malloc(sizeof(bm_device_mem_t)));
    CHECK(p_mem != nullptr);
106 107 108 109 110 111 112 113
    if (outname_map_.find(net_info_->output_names[i]) != outname_map_.end()) {
      origin_otensors_[out_index] = t_cur;
      origin_otensors_[out_index]->mutable_data<float>();
      out_index += 1;
    }
    CHECK_EQ(
        bm_malloc_device_byte(bm_hd_, p_mem, net_info_->max_output_bytes[i]),
        BM_SUCCESS);
114 115 116 117 118
    bmrt_tensor_with_device(&device_outputs_[i],
                            *p_mem,
                            net_info_->output_dtypes[i],
                            stage.output_shapes[i]);
  }
119
  return true;
120 121
}

122
bool SubgraphEngine::LaunchDeviceProgram() {
123 124 125 126 127 128 129 130 131 132 133 134 135 136
  for (size_t i = 0; i < device_inputs_.size(); i++) {
    bm_memcpy_s2d(bm_hd_,
                  device_inputs_[i].device_mem,
                  const_cast<void*>(origin_itensors_[i]->raw_data()));
  }
  bmrt_launch_tensor_ex(bmrt_hd_,
                        net_names_[0],
                        static_cast<const bm_tensor_t*>(&device_inputs_[0]),
                        net_info_->input_num,
                        static_cast<bm_tensor_t*>(&device_outputs_[0]),
                        net_info_->output_num,
                        true,
                        false);
  bm_thread_sync(bm_hd_);
137
  int out_index = 0;
138
  for (size_t i = 0; i < device_outputs_.size(); i++) {
139 140 141 142 143 144
    if (outname_map_.find(net_info_->output_names[i]) != outname_map_.end()) {
      bm_memcpy_d2s(bm_hd_,
                    const_cast<void*>(origin_otensors_[out_index]->raw_data()),
                    device_outputs_[i].device_mem);
      out_index++;
    }
145
  }
146
  return true;
147 148 149 150 151
}

void SubgraphCompute::PrepareForRun() {
  auto& param = this->Param<param_t>();
  engine_.reset(new SubgraphEngine(ctx_.get(),
152 153 154
                                   param.block_idx,
                                   param.program_desc,
                                   param.exec_scope,
155
                                   param.input_data_names,
156
                                   param.output_data_names));
157 158 159 160 161
  CHECK(engine_);
}

void SubgraphCompute::Run() {
  CHECK(engine_);
162
  engine_->Run();
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
}

}  // namespace bm
}  // namespace kernels
}  // namespace lite
}  // namespace paddle

REGISTER_LITE_KERNEL(subgraph,
                     kBM,
                     kFloat,
                     kNCHW,
                     paddle::lite::kernels::bm::SubgraphCompute,
                     def)
    .BindInput("Inputs", {LiteType::GetTensorTy(TARGET(kHost))})
    .BindOutput("Outputs", {LiteType::GetTensorTy(TARGET(kHost))})
    .Finalize();