subgraph_compute.cc 5.9 KB
Newer Older
B
baolei.an 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/kernels/bm/subgraph_compute.h"
#include <sys/time.h>
#include <time.h>
#include <string>
#include <vector>
B
baolei.an 已提交
20
#include <utility>
B
baolei.an 已提交
21 22 23 24 25 26 27 28 29 30 31
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/bm/bridges/paddle_use_bridges.h"
#include "lite/kernels/bm/bridges/utility.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace bm {

C
cen.li 已提交
32 33 34 35 36
int SubgraphEngine::BuildDeviceProgram() {
  int status = 0;
  subgraph::bm::Graph graph;
  const auto& bridges = subgraph::Registry::Instance();
  graph.CreateCompilerHandle();
B
baolei.an 已提交
37
  auto& ctx = this->ctx_->template As<BMContext>();
C
cen.li 已提交
38 39 40 41 42 43
  for (auto& inst : origin_program_) {
    auto op = inst.op();
    CHECK(op);
    op->CheckShape();
    op->InferShape();
    std::string op_type = op->op_info()->Type();
B
baolei.an 已提交
44
    if (!bridges.Exists(op_type, "kBM")) {
C
cen.li 已提交
45 46 47
      return subgraph::FAILED;
    }
    auto kernel = inst.kernel();
B
baolei.an 已提交
48
    status |= bridges.Select(op_type, "kBM")(reinterpret_cast<void*>(&graph),
C
cen.li 已提交
49 50 51 52
                                             const_cast<OpLite*>(op),
                                             const_cast<KernelBase*>(kernel));
    if (subgraph::CHECK_FAILED(status)) {
      return subgraph::FAILED;
B
baolei.an 已提交
53
    }
C
cen.li 已提交
54 55 56
  }

  std::string net_name = "paddle_bitmain";
B
baolei.an 已提交
57 58
  __bmcompile_opt(graph.GetCompilerHandle(),
                  const_cast<char*>(net_name.c_str()), 2);
B
baolei.an 已提交
59 60 61 62 63 64 65 66 67 68 69 70 71 72
  void* bmodel_data = nullptr;
  unsigned int data_size = 0;
  bm_hd_ = static_cast<bm_handle_t>(ctx.GetHandle());
  finish_bmcompiler_data(graph.GetCompilerHandle(), &bmodel_data, &data_size);
  bmrt_hd_ = bmrt_create(bm_hd_);
  if (false == bmrt_load_bmodel_data(bmrt_hd_, bmodel_data, data_size)) {
    return subgraph::FAILED;
  }
  bmrt_get_network_names(bmrt_hd_, &net_names_);
  net_info_ = bmrt_get_network_info(bmrt_hd_, net_names_[0]);
  auto &stage = net_info_->stages[0];
  // input
  origin_idims_.resize(input_names_.size());
  origin_itensors_.resize(input_names_.size());
B
baolei.an 已提交
73
  device_inputs_.resize(input_names_.size());
B
baolei.an 已提交
74 75 76
  for (size_t i = 0; i < input_names_.size(); i++) {
    origin_itensors_[i] = scope_->FindMutableTensor(input_names_[i]);
    CHECK(origin_itensors_[i]);
B
baolei.an 已提交
77 78 79
    origin_idims_[i] = origin_itensors_[i]->dims();
    bm_device_mem_t* p_mem =
       static_cast<bm_device_mem_t*>(malloc(sizeof(bm_device_mem_t)));
B
baolei.an 已提交
80
    CHECK(p_mem != nullptr);
B
baolei.an 已提交
81 82
    CHECK_EQ(bm_malloc_device_byte(bm_hd_,
            p_mem, origin_itensors_[i]->memory_size()), BM_SUCCESS);
B
baolei.an 已提交
83 84 85 86
    bmrt_tensor_with_device(&device_inputs_[i], *p_mem,
                    net_info_->input_dtypes[i],
                    stage.input_shapes[i]);
  }
B
baolei.an 已提交
87
  // output
B
baolei.an 已提交
88 89 90 91 92 93 94 95 96 97 98 99
  origin_odims_.resize(output_names_.size());
  origin_otensors_.resize(output_names_.size());
  device_outputs_.resize(output_names_.size());
  for (size_t i = 0; i < output_names_.size(); i++) {
    origin_otensors_[i] = scope_->FindMutableTensor(output_names_[i]);
    CHECK(origin_otensors_[i]);
    origin_odims_[i] = origin_otensors_[i]->dims();
    output_map_.insert(std::pair<std::string, int>(output_names_[i], i));
    origin_otensors_[i]->mutable_data<float>();
  }
  for (size_t i = 0; i < output_names_.size(); i++) {
    int mapping_index = output_map_.at(net_info_->output_names[i]);
B
baolei.an 已提交
100 101
    bm_device_mem_t* p_mem =
             static_cast<bm_device_mem_t*>(malloc(sizeof(bm_device_mem_t)));
B
baolei.an 已提交
102
    CHECK(p_mem != nullptr);
B
baolei.an 已提交
103
    CHECK_EQ(bm_malloc_device_byte(bm_hd_,
B
baolei.an 已提交
104 105
             p_mem, origin_otensors_[mapping_index]->memory_size()), 
             BM_SUCCESS);
B
baolei.an 已提交
106 107 108 109 110
    bmrt_tensor_with_device(&device_outputs_[i], *p_mem,
                    net_info_->output_dtypes[i],
                    stage.output_shapes[i]);
  }

C
cen.li 已提交
111 112 113 114
  return status;
}

int SubgraphEngine::LaunchDeviceProgram() {
B
baolei.an 已提交
115
  for (size_t i = 0; i < device_inputs_.size(); i++) {
B
baolei.an 已提交
116 117 118
    bm_memcpy_s2d(bm_hd_,
                  device_inputs_[i].device_mem,
                  const_cast<void*>(origin_itensors_[i]->raw_data()));
B
baolei.an 已提交
119
  }
B
baolei.an 已提交
120 121 122 123 124 125 126
  bmrt_launch_tensor_ex(bmrt_hd_,
            net_names_[0],
            static_cast<const bm_tensor_t*>(&device_inputs_[0]),
            net_info_->input_num,
            static_cast<bm_tensor_t*>(&device_outputs_[0]),
            net_info_->output_num, true, false);
  bm_thread_sync(bm_hd_);
B
baolei.an 已提交
127
  for (size_t i = 0; i < device_outputs_.size(); i++) {
B
baolei.an 已提交
128 129 130
    bm_memcpy_d2s(bm_hd_,
       const_cast<void*>(origin_otensors_[i]->raw_data()),
       device_outputs_[i].device_mem);
B
baolei.an 已提交
131
  }
C
cen.li 已提交
132 133 134 135 136 137 138 139 140 141 142 143 144
  return 0;
}

void SubgraphCompute::PrepareForRun() {
  auto& param = this->Param<param_t>();
  engine_.reset(new SubgraphEngine(ctx_.get(),
                                   param.sub_block_idx,
                                   param.sub_block_desc,
                                   param.input_data_names,
                                   param.output_data_names,
                                   param.scope));
  CHECK(engine_);
  engine_->Build();
B
baolei.an 已提交
145 146 147
}

void SubgraphCompute::Run() {
C
cen.li 已提交
148 149
  CHECK(engine_);
  engine_->Launch();
B
baolei.an 已提交
150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
}

}  // namespace bm
}  // namespace kernels
}  // namespace lite
}  // namespace paddle

REGISTER_LITE_KERNEL(subgraph,
                     kBM,
                     kFloat,
                     kNCHW,
                     paddle::lite::kernels::bm::SubgraphCompute,
                     def)
    .BindInput("Inputs", {LiteType::GetTensorTy(TARGET(kHost))})
    .BindOutput("Outputs", {LiteType::GetTensorTy(TARGET(kHost))})
    .Finalize();