未验证 提交 0348583f 编写于 作者: S Santa An 提交者: GitHub

[LITE][BM] fix input shape order changed issue,test=develop (#4407)

* [LITE][BM] support multiclass_nms2 and fix some issues, test=develop

* create

* [LITE][BM] fix input shape order changed issue,test=develop
上级 ba92e951
...@@ -23,7 +23,7 @@ int TargetWrapperBM::device_id_ = 0; ...@@ -23,7 +23,7 @@ int TargetWrapperBM::device_id_ = 0;
std::map<int, void*> TargetWrapperBM::bm_hds_; std::map<int, void*> TargetWrapperBM::bm_hds_;
size_t TargetWrapperBM::num_devices() { size_t TargetWrapperBM::num_devices() {
int count = 0; int count = 1;
bm_status_t ret = bm_dev_getcount(&count); bm_status_t ret = bm_dev_getcount(&count);
CHECK_EQ(ret, BM_SUCCESS) << "Failed with error code: " CHECK_EQ(ret, BM_SUCCESS) << "Failed with error code: "
<< static_cast<int>(ret); << static_cast<int>(ret);
......
...@@ -66,9 +66,9 @@ bool SubgraphEngine::BuildDeviceProgram() { ...@@ -66,9 +66,9 @@ bool SubgraphEngine::BuildDeviceProgram() {
graph.GetCompilerHandle(), const_cast<char*>(unique_net_name.c_str()), 1); graph.GetCompilerHandle(), const_cast<char*>(unique_net_name.c_str()), 1);
void* bmodel_data = nullptr; void* bmodel_data = nullptr;
unsigned int data_size = 0; unsigned int data_size = 0;
bm_hd_ = static_cast<bm_handle_t>(ctx.GetHandle());
finish_bmcompiler_data(graph.GetCompilerHandle(), &bmodel_data, &data_size); finish_bmcompiler_data(graph.GetCompilerHandle(), &bmodel_data, &data_size);
graph.UnlockCompilerMutex(); graph.UnlockCompilerMutex();
bm_hd_ = static_cast<bm_handle_t>(ctx.GetHandle());
bmrt_hd_ = bmrt_create(bm_hd_); bmrt_hd_ = bmrt_create(bm_hd_);
if (false == bmrt_load_bmodel_data(bmrt_hd_, bmodel_data, data_size)) { if (false == bmrt_load_bmodel_data(bmrt_hd_, bmodel_data, data_size)) {
return false; return false;
...@@ -79,15 +79,15 @@ bool SubgraphEngine::BuildDeviceProgram() { ...@@ -79,15 +79,15 @@ bool SubgraphEngine::BuildDeviceProgram() {
// input // input
device_inputs_.resize(input_names_.size()); device_inputs_.resize(input_names_.size());
for (size_t i = 0; i < input_names_.size(); i++) { for (size_t i = 0; i < input_names_.size(); i++) {
origin_itensors_[i] = auto origin_itensor =
exec_scope_->FindMutableTensor(net_info_->input_names[i]); exec_scope_->FindMutableTensor(net_info_->input_names[i]);
CHECK(origin_itensors_[i]); CHECK(origin_itensor);
bm_device_mem_t* p_mem = bm_device_mem_t* p_mem =
static_cast<bm_device_mem_t*>(malloc(sizeof(bm_device_mem_t))); static_cast<bm_device_mem_t*>(malloc(sizeof(bm_device_mem_t)));
CHECK(p_mem != nullptr); CHECK(p_mem != nullptr);
CHECK_EQ(bm_malloc_device_byte( CHECK_EQ(
bm_hd_, p_mem, origin_itensors_[i]->memory_size()), bm_malloc_device_byte(bm_hd_, p_mem, origin_itensor->memory_size()),
BM_SUCCESS); BM_SUCCESS);
bmrt_tensor_with_device(&device_inputs_[i], bmrt_tensor_with_device(&device_inputs_[i],
*p_mem, *p_mem,
net_info_->input_dtypes[i], net_info_->input_dtypes[i],
...@@ -124,9 +124,11 @@ bool SubgraphEngine::BuildDeviceProgram() { ...@@ -124,9 +124,11 @@ bool SubgraphEngine::BuildDeviceProgram() {
bool SubgraphEngine::LaunchDeviceProgram() { bool SubgraphEngine::LaunchDeviceProgram() {
for (size_t i = 0; i < device_inputs_.size(); i++) { for (size_t i = 0; i < device_inputs_.size(); i++) {
auto origin_itensor =
exec_scope_->FindMutableTensor(net_info_->input_names[i]);
bm_memcpy_s2d(bm_hd_, bm_memcpy_s2d(bm_hd_,
device_inputs_[i].device_mem, device_inputs_[i].device_mem,
const_cast<void*>(origin_itensors_[i]->raw_data())); const_cast<void*>(origin_itensor->raw_data()));
} }
bmrt_launch_tensor_ex(bmrt_hd_, bmrt_launch_tensor_ex(bmrt_hd_,
net_names_[0], net_names_[0],
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册