Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
8278f114
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
338
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
8278f114
编写于
4月 15, 2020
作者:
Y
yanghongtian
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add subgraph compute and backends
上级
841061b1
变更
22
隐藏空白更改
内联
并排
Showing
22 changed file
with
1385 addition
and
28 deletion
+1385
-28
cmake/lite.cmake
cmake/lite.cmake
+10
-10
lite/api/CMakeLists.txt
lite/api/CMakeLists.txt
+6
-7
lite/backends/CMakeLists.txt
lite/backends/CMakeLists.txt
+1
-0
lite/backends/hw_ascend_npu/CMakeLists.txt
lite/backends/hw_ascend_npu/CMakeLists.txt
+14
-0
lite/backends/hw_ascend_npu/device.cc
lite/backends/hw_ascend_npu/device.cc
+58
-0
lite/backends/hw_ascend_npu/device.h
lite/backends/hw_ascend_npu/device.h
+56
-0
lite/backends/hw_ascend_npu/runtime.cc
lite/backends/hw_ascend_npu/runtime.cc
+296
-0
lite/backends/hw_ascend_npu/runtime.h
lite/backends/hw_ascend_npu/runtime.h
+109
-0
lite/backends/hw_ascend_npu/target_wrapper.cc
lite/backends/hw_ascend_npu/target_wrapper.cc
+50
-0
lite/backends/hw_ascend_npu/target_wrapper.h
lite/backends/hw_ascend_npu/target_wrapper.h
+38
-0
lite/core/context.h
lite/core/context.h
+1
-0
lite/gen_code/CMakeLists.txt
lite/gen_code/CMakeLists.txt
+1
-1
lite/kernels/hw_ascend_npu/CMakeLists.txt
lite/kernels/hw_ascend_npu/CMakeLists.txt
+3
-0
lite/kernels/hw_ascend_npu/bridges/CMakeLists.txt
lite/kernels/hw_ascend_npu/bridges/CMakeLists.txt
+6
-2
lite/kernels/hw_ascend_npu/bridges/act_op.cc
lite/kernels/hw_ascend_npu/bridges/act_op.cc
+164
-0
lite/kernels/hw_ascend_npu/bridges/graph.cc
lite/kernels/hw_ascend_npu/bridges/graph.cc
+6
-5
lite/kernels/hw_ascend_npu/bridges/graph.h
lite/kernels/hw_ascend_npu/bridges/graph.h
+4
-2
lite/kernels/hw_ascend_npu/bridges/utility.cc
lite/kernels/hw_ascend_npu/bridges/utility.cc
+163
-0
lite/kernels/hw_ascend_npu/bridges/utility.h
lite/kernels/hw_ascend_npu/bridges/utility.h
+54
-0
lite/kernels/hw_ascend_npu/subgraph_compute.cc
lite/kernels/hw_ascend_npu/subgraph_compute.cc
+260
-0
lite/kernels/hw_ascend_npu/subgraph_compute.h
lite/kernels/hw_ascend_npu/subgraph_compute.h
+84
-0
lite/kernels/npu/bridges/CMakeLists.txt
lite/kernels/npu/bridges/CMakeLists.txt
+1
-1
未找到文件。
cmake/lite.cmake
浏览文件 @
8278f114
...
...
@@ -153,12 +153,12 @@ function(lite_cc_library TARGET)
FPGA_DEPS
${
args_FPGA_DEPS
}
NPU_DEPS
${
args_NPU_DEPS
}
XPU_DEPS
${
args_XPU_DEPS
}
HW_ASCEND_NPU_DEPS
${
args_HW_ASCEND_NPU_DEPS
}
PROFILE_DEPS
${
args_PROFILE_DEPS
}
LIGHT_DEPS
${
args_LIGHT_DEPS
}
HVY_DEPS
${
args_HVY_DEPS
}
MLU_DEPS
${
args_MLU_DEPS
}
)
# MLU_DEPS ${args_MLU_DEPS}
HW_ASCEND_NPU_DEPS
${
args_HW_ASCEND_NPU_DEPS
}
)
if
(
args_SHARED OR ARGS_shared
)
cc_library
(
${
TARGET
}
SRCS
${
args_SRCS
}
DEPS
${
deps
}
SHARED
)
...
...
@@ -204,7 +204,7 @@ function(lite_cc_binary TARGET)
LIGHT_DEPS
${
args_LIGHT_DEPS
}
HVY_DEPS
${
args_HVY_DEPS
}
CV_DEPS
${
CV_DEPS
}
MLU_DEPS
${
args_MLU_DEPS
}
#
MLU_DEPS ${args_MLU_DEPS}
)
cc_binary
(
${
TARGET
}
SRCS
${
args_SRCS
}
DEPS
${
deps
}
)
target_compile_options
(
${
TARGET
}
BEFORE PRIVATE -Wno-ignored-qualifiers
)
...
...
@@ -261,7 +261,7 @@ function(lite_cc_test TARGET)
LIGHT_DEPS
${
args_LIGHT_DEPS
}
HVY_DEPS
${
args_HVY_DEPS
}
CV_DEPS
${
args_CV_DEPS
}
MLU_DEPS
${
args_MLU_DEPS
}
#
MLU_DEPS ${args_MLU_DEPS}
)
_lite_cc_test
(
${
TARGET
}
SRCS
${
args_SRCS
}
DEPS
${
deps
}
ARGS
${
args_ARGS
}
)
# strip binary target to reduce size
...
...
@@ -443,8 +443,8 @@ function(add_kernel TARGET device level)
NPU_DEPS
${
args_NPU_DEPS
}
XPU_DEPS
${
args_XPU_DEPS
}
HW_ASCEND_NPU_DEPS
${
args_HW_ASCEND_NPU_DEPS
}
BM_DEPS
${
args_BM_DEPS
}
MLU_DEPS
${
args_MLU_DEPS
}
BM_DEPS
${
args_BM_DEPS
}
#
MLU_DEPS ${args_MLU_DEPS}
PROFILE_DEPS
${
args_PROFILE_DEPS
}
LIGHT_DEPS
${
args_LIGHT_DEPS
}
HVY_DEPS
${
args_HVY_DEPS
}
...
...
@@ -463,7 +463,7 @@ endif()
function
(
add_operator TARGET level
)
set
(
options
""
)
set
(
oneValueArgs
""
)
set
(
multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS NPU_DEPS XPU_DEPS
MLU_DEPS
PROFILE_DEPS
set
(
multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS NPU_DEPS XPU_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS
ARGS
)
cmake_parse_arguments
(
args
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
...
...
@@ -498,8 +498,8 @@ function(add_operator TARGET level)
NPU_DEPS
${
args_NPU_DEPS
}
XPU_DEPS
${
args_XPU_DEPS
}
HW_ASCEND_NPU_DEPS
${
args_HW_ASCEND_NPU_DEPS
}
BM_DEPS
${
args_BM_DEPS
}
MLU_DEPS
${
args_MLU_DEPS
}
BM_DEPS
${
args_BM_DEPS
}
#
MLU_DEPS ${args_MLU_DEPS}
PROFILE_DEPS
${
args_PROFILE_DEPS
}
LIGHT_DEPS
${
args_LIGHT_DEPS
}
HVY_DEPS
${
args_HVY_DEPS
}
...
...
lite/api/CMakeLists.txt
浏览文件 @
8278f114
...
...
@@ -68,7 +68,7 @@ if (WITH_TESTING)
X86_DEPS
${
x86_kernels
}
XPU_DEPS
${
xpu_kernels
}
BM_DEPS
${
bm_kernels
}
HW
_ASCENND_
NPU_DEPS
${
hw_ascend_npu_kernels
}
HW
Ascend
NPU_DEPS
${
hw_ascend_npu_kernels
}
MLU_DEPS
${
mlu_kernels
}
)
endif
()
if
(
LITE_WITH_FPGA
)
...
...
@@ -110,7 +110,7 @@ if (NOT LITE_ON_TINY_PUBLISH)
BM_DEPS
${
bm_kernels
}
CL_DEPS
${
opencl_kernels
}
FPGA_DEPS
${
fpga_kernels
}
HW
_ASCEND_
NPU_DEPS
${
hw_ascend_npu_kernels
}
)
HW
Ascend
NPU_DEPS
${
hw_ascend_npu_kernels
}
)
endif
()
# for light api
...
...
@@ -132,8 +132,7 @@ lite_cc_library(light_api SRCS light_api.cc
CL_DEPS
${
opencl_kernels
}
FPGA_DEPS
${
fpga_kernels
}
BM_DEPS
${
bm_kernels
}
HW_ASCEND_NPU_DEPS
${
hw_ascend_npu_kernels
}
MLU_DEPS
${
mlu_kernels
}
)
HWAscendNPU_DEPS
${
hw_ascend_npu_kernels
}
)
include
(
ExternalProject
)
set
(
LITE_DEMO_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/inference_demo"
CACHE STRING
...
...
@@ -152,8 +151,7 @@ if(WITH_TESTING)
CL_DEPS
${
opencl_kernels
}
FPGA_DEPS
${
fpga_kernels
}
BM_DEPS
${
bm_kernels
}
HW_ASCEND_NPU_DEPS
${
hw_ascend_npu_kernels
}
MLU_DEPS
${
mlu_kernels
}
HWAscendNPU_DEPS
${
hw_ascend_npu_kernels
}
EXCLUDE_COMPILE_DEPS
"ON"
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/lite_naive_model
--optimized_model=
${
LITE_MODEL_DIR
}
/lite_naive_model_opt SERIAL
)
...
...
@@ -301,6 +299,7 @@ lite_cc_test(test_apis SRCS apis_test.cc
FPGA_DEPS
${
fpga_kernels
}
BM_DEPS
${
bm_kernels
}
MLU_DEPS
${
mlu_kernels
}
HWAscendNPU_DEPS
${
hw_ascend_npu_kernels
}
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/lite_naive_model
--optimized_model=
${
LITE_MODEL_DIR
}
/lite_naive_model_opt SERIAL
)
...
...
@@ -338,8 +337,8 @@ lite_cc_test(test_paddle_api SRCS paddle_api_test.cc DEPS paddle_api_full paddle
X86_DEPS
${
x86_kernels
}
FPGA_DEPS
${
fpga_kernels
}
BM_DEPS
${
bm_kernels
}
HW_ASCEND_NPU_DEPS
${
hw_ascend_npu_kernels
}
MLU_DEPS
${
mlu_kernels
}
HWAscendNPU_DEPS
${
hw_ascend_npu_kernels
}
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/lite_naive_model SERIAL
)
if
(
WITH_TESTING
)
add_dependencies
(
test_paddle_api extern_lite_download_lite_naive_model_tar_gz
)
...
...
lite/backends/CMakeLists.txt
浏览文件 @
8278f114
...
...
@@ -8,3 +8,4 @@ add_subdirectory(npu)
add_subdirectory
(
xpu
)
add_subdirectory
(
mlu
)
add_subdirectory
(
bm
)
add_subdirectory
(
hw_ascend_npu
)
lite/backends/hw_ascend_npu/CMakeLists.txt
0 → 100644
浏览文件 @
8278f114
if
(
NOT LITE_WITH_HW_ASCEND_NPU
)
return
()
endif
()
lite_cc_library
(
runtime_hw_ascend_npu SRCS runtime.cc DEPS
${
hw_ascend_npu_builder_libs
}
${
hw_ascend_npu_runtime_libs
}
)
lite_cc_library
(
target_wrapper_hw_ascend_npu SRCS target_wrapper.cc DEPS
${
hw_ascend_npu_builder_libs
}
${
hw_ascend_npu_runtime_libs
}
)
lite_cc_library
(
device_hw_ascend_npu SRCS device.cc DEPS
${
hw_ascend_npu_builder_libs
}
${
hw_ascend_npu_runtime_libs
}
target_wrapper_hw_ascend_npu
runtime_hw_ascend_npu
)
lite/backends/hw_ascend_npu/device.cc
0 → 100644
浏览文件 @
8278f114
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/backends/hw_ascend_npu/device.h"
#include <map>
#include <string>
#include "ge/ge_api_types.h"
#include "lite/backends/hw_ascend_npu/runtime.h"
#include "lite/utils/cp_logging.h"
namespace
paddle
{
namespace
lite
{
namespace
hw_ascend_npu
{
std
::
shared_ptr
<
HWAscendNPURuntime
>
Device
::
Build
(
std
::
vector
<
ge
::
Operator
>&
input_nodes
,
// NOLINT
std
::
vector
<
ge
::
Operator
>&
output_nodes
// NOLINT
)
{
VLOG
(
3
)
<<
"[HWAscendNPU] Build model"
;
// Build the IR graph to the om model
ge
::
Graph
ir_graph
(
"graph"
);
ir_graph
.
SetInputs
(
input_nodes
).
SetOutputs
(
output_nodes
);
ge
::
ModelBufferData
model
;
std
::
map
<
std
::
string
,
std
::
string
>
build_options
;
build_options
.
insert
({
ge
::
ir_option
::
EXEC_DISABLE_REUSED_MEMORY
,
"1"
});
ge
::
graphStatus
ret
=
aclgrphBuildModel
(
ir_graph
,
build_options
,
model
);
if
(
ret
!=
ge
::
GRAPH_SUCCESS
)
{
LOG
(
ERROR
)
<<
"[HWAscendNPU] Build model failed, error code: "
<<
ret
;
return
nullptr
;
}
std
::
shared_ptr
<
HWAscendNPURuntime
>
model_runtime
(
new
HWAscendNPURuntime
(
model
.
data
,
model
.
length
));
CHECK
(
model_runtime
!=
nullptr
);
if
(
!
model_runtime
->
model_loaded
())
{
LOG
(
ERROR
)
<<
"[HWAscendNPU]: Can not create model runtime instance"
;
return
nullptr
;
}
VLOG
(
3
)
<<
"[HWAscendNPU]: Build done"
;
return
model_runtime
;
}
}
// namespace hw_ascend_npu
}
// namespace lite
}
// namespace paddle
lite/backends/hw_ascend_npu/device.h
0 → 100644
浏览文件 @
8278f114
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "ge/ge_ir_build.h" // NOLINT
#include "lite/backends/hw_ascend_npu/runtime.h"
namespace
paddle
{
namespace
lite
{
namespace
hw_ascend_npu
{
class
Device
{
public:
static
Device
&
Global
()
{
static
Device
x
;
return
x
;
}
Device
()
{}
int
freq_level
()
{
return
freq_level_
;
}
int
framework_type
()
{
return
framework_type_
;
}
int
model_type
()
{
return
model_type_
;
}
int
device_type
()
{
return
device_type_
;
}
// Build the IR graph to om model, return a HWAscendNPURuntime instance to
// load om model and run inference.
std
::
shared_ptr
<
HWAscendNPURuntime
>
Build
(
std
::
vector
<
ge
::
Operator
>&
input_nodes
,
// NOLINT
std
::
vector
<
ge
::
Operator
>&
output_nodes
// NOLINT
);
// NOLINT
private:
int
freq_level_
{
3
};
int
framework_type_
{
0
};
int
model_type_
{
0
};
int
device_type_
{
0
};
};
}
// namespace hw_ascend_npu
}
// namespace lite
}
// namespace paddle
lite/backends/hw_ascend_npu/runtime.cc
0 → 100644
浏览文件 @
8278f114
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/backends/hw_ascend_npu/runtime.h"
#include "lite/backends/hw_ascend_npu/target_wrapper.h"
#include "lite/utils/cp_logging.h"
namespace
paddle
{
namespace
lite
{
namespace
hw_ascend_npu
{
HWAscendNPURuntime
::
HWAscendNPURuntime
(
std
::
shared_ptr
<
uint8_t
>
model_buff_built
,
size_t
model_buff_size
)
{
model_loaded_
=
(
0
==
LoadModelFromMem
(
model_buff_built
,
model_buff_size
));
}
HWAscendNPURuntime
::~
HWAscendNPURuntime
()
{
UnloadModel
();
DestroyDesc
();
DestroyInput
();
DestroyOutput
();
}
int
HWAscendNPURuntime
::
LoadModelFromMem
(
std
::
shared_ptr
<
uint8_t
>
model_buff_built
,
size_t
model_buff_size
)
{
if
(
model_loaded_
)
{
LOG
(
ERROR
)
<<
"[HWAscendNPU]: Has already loaded a model"
;
return
0
;
}
aclError
ret
=
aclmdlQuerySizeFromMem
(
model_buff_built
.
get
(),
model_buff_size
,
&
model_size_
,
&
model_weights_size_
);
if
(
ret
!=
ACL_ERROR_NONE
)
{
LOG
(
ERROR
)
<<
"[HWAscendNPU]: Can query size from a built model buffer, "
"error code: "
<<
ret
;
return
ret
;
}
ret
=
aclrtMalloc
(
&
model_ptr_
,
model_size_
,
ACL_MEM_MALLOC_NORMAL_ONLY
);
if
(
ret
!=
ACL_ERROR_NONE
)
{
LOG
(
ERROR
)
<<
"[HWAscendNPU]: Can not allocate a device memory for model, "
"error code: "
<<
ret
;
return
ret
;
}
ret
=
aclrtMalloc
(
&
model_weights_ptr_
,
model_weights_size_
,
ACL_MEM_MALLOC_NORMAL_ONLY
);
if
(
ret
!=
ACL_ERROR_NONE
)
{
LOG
(
ERROR
)
<<
"[HWAscendNPU]: Can not allocate a device memory for model "
"weights, error code: "
<<
ret
;
return
ret
;
}
ret
=
aclmdlLoadFromMemWithMem
(
model_buff_built
.
get
(),
model_buff_size
,
&
model_id_
,
model_ptr_
,
model_size_
,
model_weights_ptr_
,
model_weights_size_
);
if
(
ret
!=
ACL_ERROR_NONE
)
{
LOG
(
ERROR
)
<<
"[HWAscendNPU]: Can not Load model from memory, error code: "
<<
ret
;
return
ret
;
}
model_desc_
=
aclmdlCreateDesc
();
if
(
model_desc_
==
nullptr
)
{
LOG
(
ERROR
)
<<
"HWAscendNPU]: Can not create model descriptor."
;
return
ACL_ERROR_FAILURE
;
}
ret
=
aclmdlGetDesc
(
model_desc_
,
model_id_
);
if
(
ret
!=
ACL_ERROR_NONE
)
{
LOG
(
ERROR
)
<<
"[HWAscendNPU]: Can not get model descriptor from model, "
"error code: "
<<
ret
;
return
ret
;
}
return
ret
;
}
int
HWAscendNPURuntime
::
CreateInput
(
const
std
::
vector
<
DDim
>&
idims
)
{
if
(
itensors_
!=
nullptr
)
{
DestroyInput
();
}
itensors_
=
aclmdlCreateDataset
();
if
(
itensors_
==
nullptr
)
{
LOG
(
ERROR
)
<<
"[HWAscendNPU]: Can not create input dataset"
;
return
ACL_ERROR_FAILURE
;
}
for
(
auto
&
dim
:
idims
)
{
void
*
buff_dev_ptr
=
nullptr
;
CHECK
(
ACL_ERROR_NONE
==
aclrtMalloc
(
&
buff_dev_ptr
,
dim
.
production
(),
ACL_MEM_MALLOC_NORMAL_ONLY
));
aclDataBuffer
*
input_data_buffer
=
aclCreateDataBuffer
(
buff_dev_ptr
,
dim
.
production
());
CHECK
(
input_data_buffer
!=
nullptr
);
CHECK
(
ACL_ERROR_NONE
==
aclmdlAddDatasetBuffer
(
itensors_
,
input_data_buffer
));
}
return
0
;
}
int
HWAscendNPURuntime
::
CreateOutput
(
const
std
::
vector
<
DDim
>&
odims
)
{
if
(
otensors_
!=
nullptr
)
{
DestroyOutput
();
}
otensors_
=
aclmdlCreateDataset
();
if
(
otensors_
==
nullptr
)
{
LOG
(
ERROR
)
<<
"[HWAscendNPU]: Can not create output dataset"
;
return
ACL_ERROR_FAILURE
;
}
for
(
auto
&
dim
:
odims
)
{
void
*
buff_dev_ptr
=
nullptr
;
CHECK
(
ACL_ERROR_NONE
==
aclrtMalloc
(
&
buff_dev_ptr
,
dim
.
production
(),
ACL_MEM_MALLOC_NORMAL_ONLY
));
aclDataBuffer
*
output_data_buffer
=
aclCreateDataBuffer
(
buff_dev_ptr
,
dim
.
production
());
CHECK
(
output_data_buffer
!=
nullptr
);
CHECK
(
ACL_ERROR_NONE
==
aclmdlAddDatasetBuffer
(
otensors_
,
output_data_buffer
));
}
return
0
;
}
void
HWAscendNPURuntime
::
UnloadModel
()
{
if
(
!
model_loaded_
)
{
LOG
(
ERROR
)
<<
"[HWAscendNPU]: No model has been loaded"
;
return
;
}
aclError
ret
=
ACL_ERROR_NONE
;
ret
=
aclmdlUnload
(
model_id_
);
if
(
ret
!=
ACL_ERROR_NONE
)
{
LOG
(
ERROR
)
<<
"[HWAscendNPU]: Unload a model, error code: "
<<
ret
;
return
;
}
if
(
model_ptr_
)
{
aclrtFree
(
model_ptr_
);
model_ptr_
=
nullptr
;
}
if
(
model_weights_ptr_
)
{
aclrtFree
(
model_weights_ptr_
);
model_weights_ptr_
=
nullptr
;
}
model_loaded_
=
false
;
}
void
HWAscendNPURuntime
::
DestroyDesc
()
{
if
(
model_desc_
)
{
(
void
)
aclmdlDestroyDesc
(
model_desc_
);
model_desc_
=
nullptr
;
}
}
void
HWAscendNPURuntime
::
DestroyInput
()
{
if
(
itensors_
==
nullptr
)
{
return
;
}
size_t
buf_num
=
aclmdlGetDatasetNumBuffers
(
itensors_
);
for
(
size_t
i
=
0
;
i
<
buf_num
;
++
i
)
{
aclDataBuffer
*
data_buffer
=
aclmdlGetDatasetBuffer
(
itensors_
,
i
);
aclDestroyDataBuffer
(
data_buffer
);
}
aclmdlDestroyDataset
(
itensors_
);
itensors_
=
nullptr
;
}
void
HWAscendNPURuntime
::
DestroyOutput
()
{
if
(
otensors_
==
nullptr
)
{
return
;
}
size_t
buf_num
=
aclmdlGetDatasetNumBuffers
(
otensors_
);
for
(
size_t
i
=
0
;
i
<
buf_num
;
++
i
)
{
aclDataBuffer
*
data_buffer
=
aclmdlGetDatasetBuffer
(
otensors_
,
i
);
aclDestroyDataBuffer
(
data_buffer
);
}
aclmdlDestroyDataset
(
otensors_
);
otensors_
=
nullptr
;
}
int
HWAscendNPURuntime
::
SetInput
(
const
std
::
vector
<
Tensor
*>&
itensors
,
const
std
::
vector
<
DDim
>&
idims
)
{
CHECK
(
itensors
.
size
()
==
idims
.
size
());
size_t
input_tensor_num
=
itensors
.
size
();
for
(
size_t
i
=
0
;
i
<
input_tensor_num
;
++
i
)
{
CHECK
(
itensors
[
i
]
->
memory_size
()
==
idims
[
i
].
production
());
}
size_t
num_buffers_in_dataset
=
aclmdlGetDatasetNumBuffers
(
itensors_
);
if
(
num_buffers_in_dataset
!=
input_tensor_num
)
{
if
(
0
!=
CreateInput
(
idims
))
{
return
-
1
;
}
}
else
{
bool
need_to_create_input
=
false
;
for
(
size_t
i
=
0
;
i
<
num_buffers_in_dataset
;
++
i
)
{
aclDataBuffer
*
data_buffer
=
aclmdlGetDatasetBuffer
(
itensors_
,
i
);
int64_t
buf_size
=
aclGetDataBufferSize
(
data_buffer
);
if
(
buf_size
!=
idims
[
i
].
production
())
{
need_to_create_input
=
true
;
}
}
if
(
need_to_create_input
&&
0
!=
CreateInput
(
idims
))
{
return
-
1
;
}
}
// copy input data from host to device
for
(
size_t
i
=
0
;
i
<
input_tensor_num
;
++
i
)
{
aclDataBuffer
*
data_buffer
=
aclmdlGetDatasetBuffer
(
itensors_
,
i
);
void
*
buf_dev_ptr
=
aclGetDataBufferAddr
(
data_buffer
);
TargetWrapperHWAscendNPU
::
MemcpySync
(
buf_dev_ptr
,
itensors
[
i
]
->
raw_data
(),
itensors
[
i
]
->
memory_size
(),
IoDirection
::
HtoD
);
}
return
0
;
}
void
HWAscendNPURuntime
::
GetOutput
(
const
std
::
vector
<
Tensor
*>*
otensors_ptr
)
{
CHECK
(
otensors_ptr
!=
nullptr
);
size_t
num_output
=
aclmdlGetDatasetNumBuffers
(
otensors_
);
const
std
::
vector
<
Tensor
*>
otensors
=
*
otensors_ptr
;
CHECK
(
num_output
==
otensors
.
size
());
for
(
size_t
i
=
0
;
i
<
num_output
;
++
i
)
{
aclDataBuffer
*
data_buffer
=
aclmdlGetDatasetBuffer
(
otensors_
,
i
);
TargetWrapperHWAscendNPU
::
MemcpySync
(
otensors
[
i
]
->
raw_data
(),
aclGetDataBufferAddr
(
data_buffer
),
aclGetDataBufferSize
(
data_buffer
),
IoDirection
::
DtoH
);
}
}
int
HWAscendNPURuntime
::
Process
()
{
aclError
ret
=
aclmdlExecute
(
model_id_
,
itensors_
,
otensors_
);
if
(
ret
!=
ACL_ERROR_NONE
)
{
LOG
(
ERROR
)
<<
"[HWAscendNPU]: Execute model failed, model_id: "
<<
model_id_
<<
", error code: "
<<
ret
;
}
return
ret
;
}
int
HWAscendNPURuntime
::
GetModelIOTensorDim
(
std
::
vector
<
TensorDesc
>*
idims
,
std
::
vector
<
TensorDesc
>*
odims
)
{
aclError
ret
=
ACL_ERROR_NONE
;
size_t
num_inputs
=
aclmdlGetNumInputs
(
model_desc_
);
size_t
num_outputs
=
aclmdlGetNumOutputs
(
model_desc_
);
for
(
size_t
i
=
0
;
i
<
num_inputs
;
++
i
)
{
aclmdlIODims
dims
;
if
(
ret
!=
aclmdlGetInputDims
(
model_desc_
,
i
,
&
dims
))
{
LOG
(
ERROR
)
<<
"[HWAscendNPU]: Get input dims failed, index: "
<<
i
;
return
ret
;
}
aclDataType
data_type
=
aclmdlGetInputDataType
(
model_desc_
,
i
);
aclFormat
format
=
aclmdlGetInputFormat
(
model_desc_
,
i
);
idims
->
push_back
(
TensorDesc
(
data_type
,
dims
,
format
));
}
for
(
size_t
i
=
0
;
i
<
num_outputs
;
++
i
)
{
aclmdlIODims
dims
;
if
(
ret
!=
aclmdlGetOutputDims
(
model_desc_
,
i
,
&
dims
))
{
LOG
(
ERROR
)
<<
"[HWAscendNPU]: Get output dims failed, index: "
<<
i
;
return
ret
;
}
aclDataType
data_type
=
aclmdlGetOutputDataType
(
model_desc_
,
i
);
aclFormat
format
=
aclmdlGetOutputFormat
(
model_desc_
,
i
);
odims
->
push_back
(
TensorDesc
(
data_type
,
dims
,
format
));
}
return
0
;
}
}
// namespace hw_ascend_npu
}
// namespace lite
}
// namespace paddle
lite/backends/hw_ascend_npu/runtime.h
0 → 100644
浏览文件 @
8278f114
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
// hw_ascend_npu runtime library
#include <acl/acl.h>
#include <acl/tensor.h>
#include <memory>
#include <vector>
#include "lite/core/tensor.h"
#include "lite/utils/cp_logging.h"
namespace
paddle
{
namespace
lite
{
namespace
hw_ascend_npu
{
class
TensorDesc
{
public:
TensorDesc
(
aclDataType
data_type
,
aclmdlIODims
dims
,
aclFormat
format
)
{
tensor_desc_
=
aclCreateTensorDesc
(
data_type
,
dims
.
dimCount
,
dims
.
dims
,
format
);
CHECK
(
tensor_desc_
!=
nullptr
);
aclSetTensorDescName
(
tensor_desc_
,
dims
.
name
);
if
(
format
==
ACL_FORMAT_NHWC
)
{
dim_order
[
1
]
=
3
;
dim_order
[
2
]
=
1
;
dim_order
[
3
]
=
2
;
}
}
~
TensorDesc
()
{
if
(
tensor_desc_
!=
nullptr
)
{
aclDestroyTensorDesc
(
tensor_desc_
);
tensor_desc_
=
nullptr
;
}
}
uint32_t
GetNumber
()
const
{
return
static_cast
<
uint32_t
>
(
aclGetTensorDescDim
(
tensor_desc_
,
dim_order
[
0
]));
}
uint32_t
GetChannel
()
const
{
return
static_cast
<
uint32_t
>
(
aclGetTensorDescDim
(
tensor_desc_
,
dim_order
[
1
]));
}
uint32_t
GetHeight
()
const
{
return
static_cast
<
uint32_t
>
(
aclGetTensorDescDim
(
tensor_desc_
,
dim_order
[
2
]));
}
uint32_t
GetWidth
()
const
{
return
static_cast
<
uint32_t
>
(
aclGetTensorDescDim
(
tensor_desc_
,
dim_order
[
3
]));
}
const
aclTensorDesc
&
GetTensorDesc
()
const
{
return
*
tensor_desc_
;
}
private:
aclTensorDesc
*
tensor_desc_
{
nullptr
};
// n c h w order, default to ACL_FORMAT_NCHW
std
::
vector
<
uint32_t
>
dim_order
{
0
,
1
,
2
,
3
};
};
class
HWAscendNPURuntime
{
public:
HWAscendNPURuntime
(
std
::
shared_ptr
<
uint8_t
>
model_buff_built
,
size_t
model_buff_size
);
~
HWAscendNPURuntime
();
int
SetInput
(
const
std
::
vector
<
Tensor
*>&
itensors
,
const
std
::
vector
<
DDim
>&
idims
);
void
GetOutput
(
const
std
::
vector
<
Tensor
*>*
otensors_ptr
);
int
Process
();
bool
model_loaded
()
const
{
return
model_loaded_
;
}
int
CreateInput
(
const
std
::
vector
<
DDim
>&
idims
);
int
CreateOutput
(
const
std
::
vector
<
DDim
>&
odims
);
int
GetModelIOTensorDim
(
std
::
vector
<
TensorDesc
>*
idims
,
std
::
vector
<
TensorDesc
>*
odims
);
private:
int
LoadModelFromMem
(
std
::
shared_ptr
<
uint8_t
>
model_buff_built
,
size_t
model_buff_size
);
void
UnloadModel
();
void
DestroyDesc
();
void
DestroyInput
();
void
DestroyOutput
();
private:
aclmdlDataset
*
itensors_
{
nullptr
};
aclmdlDataset
*
otensors_
{
nullptr
};
uint32_t
model_id_
{
0
};
void
*
model_ptr_
{
nullptr
};
void
*
model_weights_ptr_
{
nullptr
};
size_t
model_size_
{
0
};
size_t
model_weights_size_
{
0
};
bool
model_loaded_
{
false
};
aclmdlDesc
*
model_desc_
;
};
}
// namespace hw_ascend_npu
}
// namespace lite
}
// namespace paddle
lite/backends/hw_ascend_npu/target_wrapper.cc
0 → 100644
浏览文件 @
8278f114
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/backends/hw_ascend_npu/target_wrapper.h"
#include <acl/acl.h>
#include <glog/logging.h>
namespace
paddle
{
namespace
lite
{
void
*
TargetWrapperHWAscendNPU
::
Malloc
(
size_t
size
)
{
void
*
ptr
{
nullptr
};
if
(
ACL_ERROR_NONE
!=
aclrtMalloc
(
&
ptr
,
size
,
ACL_MEM_MALLOC_NORMAL_ONLY
))
{
LOG
(
ERROR
)
<<
"[HWAscendNPU]: Allocate memory from device failed"
;
ptr
=
nullptr
;
}
return
ptr
;
}
void
TargetWrapperHWAscendNPU
::
Free
(
void
*
ptr
)
{
aclrtFree
(
ptr
);
}
void
TargetWrapperHWAscendNPU
::
MemcpySync
(
void
*
dst
,
const
void
*
src
,
size_t
size
,
IoDirection
dir
)
{
switch
(
dir
)
{
case
IoDirection
::
HtoD
:
aclrtMemcpy
(
dst
,
size
,
src
,
size
,
ACL_MEMCPY_HOST_TO_DEVICE
);
break
;
case
IoDirection
::
DtoH
:
aclrtMemcpy
(
dst
,
size
,
src
,
size
,
ACL_MEMCPY_DEVICE_TO_HOST
);
break
;
default:
LOG
(
FATAL
)
<<
"Unsupported IoDirection "
<<
static_cast
<
int
>
(
dir
);
}
}
}
// namespace lite
}
// namespace paddle
lite/backends/hw_ascend_npu/target_wrapper.h
0 → 100644
浏览文件 @
8278f114
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/target_wrapper.h"
namespace
paddle
{
namespace
lite
{
template
<
>
class
TargetWrapper
<
TARGET
(
kHWAscendNPU
)
>
{
public:
static
size_t
num_devices
()
{
return
4
;
}
static
size_t
maximum_stream
()
{
return
0
;
}
static
void
*
Malloc
(
size_t
size
);
static
void
Free
(
void
*
ptr
);
static
void
MemcpySync
(
void
*
dst
,
const
void
*
src
,
size_t
size
,
IoDirection
dir
);
};
using
TargetWrapperHWAscendNPU
=
TargetWrapper
<
TARGET
(
kHWAscendNPU
)
>
;
}
// namespace lite
}
// namespace paddle
lite/core/context.h
浏览文件 @
8278f114
...
...
@@ -535,6 +535,7 @@ class ContextScheduler {
.
As
<
HWAscendNPUContext
>
()
.
CopySharedTo
(
&
ctx
->
As
<
HWAscendNPUContext
>
());
break
;
#endif
#ifdef LITE_WITH_MLU
case
TARGET
(
kMLU
):
{
int
dev_id
=
TargetWrapper
<
TargetType
::
kMLU
>::
GetCurDevice
();
...
...
lite/gen_code/CMakeLists.txt
浏览文件 @
8278f114
...
...
@@ -17,7 +17,7 @@ lite_cc_test(test_gen_code SRCS gen_code_test.cc
NPU_DEPS
${
npu_kernels
}
XPU_DEPS
${
xpu_kernels
}
CL_DEPS
${
opencl_kernels
}
HW
_ASCEND_
NPU_DEPS
${
hw_ascend_npu_kernels
}
HW
Ascend
NPU_DEPS
${
hw_ascend_npu_kernels
}
FPGA_DEPS
${
fpga_kernels
}
EXCLUDE_COMPILE_DEPS
"ON"
ARGS --optimized_model=
${
LITE_MODEL_DIR
}
/lite_naive_model_opt SERIAL
)
...
...
lite/kernels/hw_ascend_npu/CMakeLists.txt
浏览文件 @
8278f114
add_subdirectory
(
bridges
)
add_kernel
(
subgraph_compute_hw_ascend_npu HWAscendNPU basic SRCS subgraph_compute.cc DEPS
${
lite_kernel_deps
}
device_hw_ascend_npu subgraph_bridge_engine
${
hw_ascend_npu_subgraph_bridges
}
)
lite/kernels/hw_ascend_npu/bridges/CMakeLists.txt
浏览文件 @
8278f114
...
...
@@ -2,11 +2,15 @@ if (NOT LITE_WITH_HW_ASCEND_NPU)
return
()
endif
()
message
(
STATUS
"======compile hw_ascend_npu bridges,
${
ascend_builder_libs
}
"
)
message
(
STATUS
"======compile hw_ascend_npu bridges,
${
hw_ascend_npu_builder_libs
}
"
)
lite_cc_library
(
subgraph_bridge_utility_hw_ascend_npu
SRCS utility.cc
DEPS
${
hw_ascend_npu_builder_libs
}
tensor
)
lite_cc_library
(
subgraph_bridge_graph_hw_ascend_npu
SRCS graph.cc
DEPS
${
ascend_builder_libs
}
)
DEPS
${
hw_ascend_npu_builder_libs
}
subgraph_bridge_utility_hw_ascend_npu
)
set
(
hw_ascend_npu_subgraph_bridges
subgraph_bridge_graph_hw_ascend_npu
...
...
lite/kernels/hw_ascend_npu/bridges/act_op.cc
0 → 100644
浏览文件 @
8278f114
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/hw_ascend_npu/bridges/graph.h"
#include "lite/kernels/hw_ascend_npu/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace
paddle
{
namespace
lite
{
namespace
subgraph
{
namespace
hw_ascend_npu
{
template
<
typename
ActType
>
int
ActConverter
(
void
*
ctx
,
OpLite
*
op
,
KernelBase
*
kernel
)
{
CHECK
(
ctx
!=
nullptr
);
CHECK
(
op
!=
nullptr
);
auto
graph
=
static_cast
<
Graph
*>
(
ctx
);
auto
op_info
=
op
->
op_info
();
auto
op_type
=
op_info
->
Type
();
auto
scope
=
op
->
scope
();
VLOG
(
3
)
<<
"[HWAscendNPU] Converting "
+
op_type
+
"..."
;
// Get input and output vars and op attributes
auto
x_name
=
op_info
->
Input
(
"X"
).
front
();
auto
x
=
scope
->
FindTensor
(
x_name
);
auto
out_name
=
op_info
->
Output
(
"Out"
).
front
();
// X node
std
::
shared_ptr
<
Node
>
x_node
=
nullptr
;
if
(
graph
->
Has
(
x_name
))
{
x_node
=
graph
->
Get
(
x_name
);
}
else
{
x_node
=
graph
->
Add
(
x_name
,
*
x
);
}
// Act node
auto
act_node
=
graph
->
template
Add
<
ActType
>(
out_name
);
auto
act_op
=
act_node
->
template
data
<
ActType
>();
act_op
->
set_input_x
(
*
x_node
->
data
());
return
SUCCESS
;
}
template
<
>
int
ActConverter
<
ge
::
op
::
Activation
>
(
void
*
ctx
,
OpLite
*
op
,
KernelBase
*
kernel
)
{
CHECK
(
ctx
!=
nullptr
);
CHECK
(
op
!=
nullptr
);
auto
graph
=
static_cast
<
Graph
*>
(
ctx
);
auto
op_info
=
op
->
op_info
();
auto
op_type
=
op_info
->
Type
();
auto
scope
=
op
->
scope
();
VLOG
(
3
)
<<
"[HWAscendNPU] Converting "
+
op_type
+
"..."
;
// Get input and output vars and op attributes
auto
x_name
=
op_info
->
Input
(
"X"
).
front
();
auto
x
=
scope
->
FindMutableTensor
(
x_name
);
auto
x_dims
=
x
->
dims
();
auto
out_name
=
op_info
->
Output
(
"Out"
).
front
();
// X node
std
::
shared_ptr
<
Node
>
x_node
=
nullptr
;
if
(
graph
->
Has
(
x_name
))
{
x_node
=
graph
->
Get
(
x_name
);
}
else
{
x_node
=
graph
->
Add
(
x_name
,
*
x
);
}
// Act node
auto
act_node
=
graph
->
template
Add
<
ge
::
op
::
Activation
>(
out_name
);
auto
act_op
=
act_node
->
template
data
<
ge
::
op
::
Activation
>();
act_op
->
set_input_x
(
*
x_node
->
data
());
// TODO(hong19860320) set the coef value for act Ops, such as leaky_relu,
// clipped_relu etc.
act_op
->
set_attr_mode
(
CvtActMode
(
op_type
));
if
(
op_type
==
"relu_clipped"
)
{
auto
Relu_clipped_coef
=
op_info
->
GetAttr
<
float
>
(
"Relu_clipped_coef"
);
act_op
->
set_attr_coef
(
Relu_clipped_coef
);
}
else
if
(
op_type
==
"relu6"
)
{
float
Relu_clipped_coef
=
6.
f
;
act_op
->
set_attr_coef
(
Relu_clipped_coef
);
}
else
if
(
op_type
==
"leaky_relu"
)
{
auto
alpha
=
op_info
->
GetAttr
<
float
>
(
"alpha"
);
act_op
->
set_attr_negative_slope
(
alpha
);
}
else
if
(
op_type
==
"hard_sigmoid"
)
{
auto
slope
=
op_info
->
GetAttr
<
float
>
(
"slope"
);
auto
offset
=
op_info
->
GetAttr
<
float
>
(
"offset"
);
act_op
->
set_attr_negative_slope
(
slope
);
act_op
->
set_attr_coef
(
offset
);
}
return
SUCCESS
;
}
}
// namespace hw_ascend_npu
}
// namespace subgraph
}
// namespace lite
}
// namespace paddle
REGISTER_SUBGRAPH_BRIDGE
(
sigmoid
,
kHWAscendNPU
,
paddle
::
lite
::
subgraph
::
hw_ascend_npu
::
ActConverter
<
ge
::
Activation
>
);
REGISTER_SUBGRAPH_BRIDGE
(
relu
,
kHWAscendNPU
,
paddle
::
lite
::
subgraph
::
hw_ascend_npu
::
ActConverter
<
ge
::
Activation
>
);
REGISTER_SUBGRAPH_BRIDGE
(
tanh
,
kHWAscendNPU
,
paddle
::
lite
::
subgraph
::
hw_ascend_npu
::
ActConverter
<
ge
::
Activation
>
);
REGISTER_SUBGRAPH_BRIDGE
(
relu_clipped
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
ActConverter
<
ge
::
Activation
>
);
REGISTER_SUBGRAPH_BRIDGE
(
relu6
,
kHWAscendNPU
,
paddle
::
lite
::
subgraph
::
hw_ascend_npu
::
ActConverter
<
ge
::
Activation
>
);
REGISTER_SUBGRAPH_BRIDGE
(
leaky_relu
,
kHWAscendNPU
,
paddle
::
lite
::
subgraph
::
npu
::
ActConverter
<
ge
::
Activation
>
);
REGISTER_SUBGRAPH_BRIDGE
(
abs
,
kHWAscendNPU
,
paddle
::
lite
::
subgraph
::
hw_ascend_npu
::
ActConverter
<
ge
::
Activation
>
);
REGISTER_SUBGRAPH_BRIDGE
(
softsign
,
kNPU
,
paddle
::
lite
::
subgraph
::
hw_ascend_npu
::
ActConverter
<
ge
::
Activation
>
);
REGISTER_SUBGRAPH_BRIDGE
(
softplus
,
kHWAscendNPU
,
paddle
::
lite
::
subgraph
::
hw_ascend_npu
::
ActConverter
<
ge
::
Activation
>
);
REGISTER_SUBGRAPH_BRIDGE
(
hard_sigmoid
,
kHWAscendNPU
,
paddle
::
lite
::
subgraph
::
hw_ascend_npu
::
ActConverter
<
ge
::
Activation
>
);
REGISTER_SUBGRAPH_BRIDGE
(
log
,
kHWAscendNPU
,
paddle
::
lite
::
subgraph
::
hw_ascend_npu
::
ActConverter
<
ge
::
Log
>
);
REGISTER_SUBGRAPH_BRIDGE
(
square
,
kHWAscendNPU
,
paddle
::
lite
::
subgraph
::
hw_ascend_npu
::
ActConverter
<
ge
::
Square
>
);
REGISTER_SUBGRAPH_BRIDGE
(
sqrt
,
kHWAscendNPU
,
paddle
::
lite
::
subgraph
::
hw_ascend_npu
::
ActConverter
<
ge
::
Sqrt
>
);
lite/kernels/hw_ascend_npu/bridges/graph.cc
浏览文件 @
8278f114
...
...
@@ -14,8 +14,8 @@
#include "lite/kernels/hw_ascend_npu/bridges/graph.h"
/// reference from opp package
#include <all_ops.h>
#include <utility>
#include "lite/kernels/hw_ascend_npu/utility.h"
namespace
paddle
{
namespace
lite
{
...
...
@@ -49,8 +49,9 @@ std::shared_ptr<Node> Graph::Add(const std::string& name,
PrecisionType
precision
=
tensor
.
precision
();
if
(
tensor
.
persistable
())
{
// Const node
node
=
Add
<
ge
::
Const
>
(
name
,
precision
,
layout
);
node
->
data
<
ge
::
Const
>
()
->
set_attr_value
(
CvtTensor
(
tensor
,
shape
,
layout
));
node
=
Add
<
ge
::
op
::
Const
>
(
name
,
precision
,
layout
);
node
->
data
<
ge
::
op
::
Const
>
()
->
set_attr_value
(
CvtTensor
(
tensor
,
shape
,
layout
));
}
else
{
// Data node
node
=
Add
(
name
,
shape
,
precision
,
layout
);
...
...
@@ -63,10 +64,10 @@ std::shared_ptr<Node> Graph::Add(const std::string& name,
std
::
vector
<
int64_t
>
shape
,
PrecisionType
precision
,
DataLayoutType
layout
)
{
auto
node
=
Add
<
ge
::
Data
>
(
name
,
precision
,
layout
);
auto
node
=
Add
<
ge
::
op
::
Data
>
(
name
,
precision
,
layout
);
ge
::
TensorDesc
desc
(
ge
::
Shape
(
shape
),
CvtDataLayoutType
(
layout
),
CvtPrecisionType
(
precision
));
node
->
data
<
ge
::
Data
>
()
->
update_input_desc_x
(
desc
);
node
->
data
<
ge
::
op
::
Data
>
()
->
update_input_desc_data
(
desc
);
return
node
;
}
...
...
lite/kernels/hw_ascend_npu/bridges/graph.h
浏览文件 @
8278f114
...
...
@@ -14,13 +14,15 @@
#pragma once
// reference from atc package
#include <all_ops.h>
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
// reference from atc package
#include "graph/operator
s
.h"
#include "graph/operator.h"
#include "graph/operator
_reg
.h"
#include "lite/core/op_lite.h"
#include "lite/core/tensor.h"
...
...
lite/kernels/hw_ascend_npu/bridges/utility.cc
0 → 100644
浏览文件 @
8278f114
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/hw_ascend_npu/utility.h"
#include <utility>
#include <algorithm
namespace
paddle
{
namespace
lite
{
namespace
subgraph
{
namespace
hw_ascend_npu
{
bool
HasInputArg
(
const
OpInfo
*
op_info
,
const
Scope
*
scope
,
const
std
::
string
&
argname
)
{
auto
iarg_names
=
op_info
->
input_argnames
();
if
(
std
::
find
(
iarg_names
.
begin
(),
iarg_names
.
end
(),
argname
)
!=
iarg_names
.
end
())
{
auto
inputs
=
op_info
->
Input
(
argname
);
if
(
inputs
.
empty
())
{
return
false
;
}
auto
var_name
=
inputs
.
front
();
auto
var
=
scope
->
FindVar
(
var_name
);
return
var
!=
nullptr
;
}
else
{
return
false
;
}
}
ge
::
DataType
CvtPrecisionType
(
PrecisionType
itype
)
{
ge
::
DataType
otype
=
ge
::
DT_FLOAT
;
switch
(
itype
)
{
case
PRECISION
(
kFloat
):
otype
=
ge
::
DT_FLOAT
;
break
;
case
PRECISION
(
kInt8
):
otype
=
ge
::
DT_INT8
;
break
;
case
PRECISION
(
kInt32
):
otype
=
ge
::
DT_INT32
;
break
;
case
PRECISION
(
kFP16
):
otype
=
ge
::
DT_FLOAT16
;
break
;
case
PRECISION
(
kBool
):
otype
=
ge
::
DT_BOOL
;
break
;
case
PRECISION
(
kInt64
):
otype
=
ge
::
DT_INT64
;
break
;
case
PRECISION
(
kInt16
):
otype
=
ge
::
DT_INT16
;
break
;
default:
LOG
(
FATAL
)
<<
"[HW_ASCEND_NPU] Can not convert precision type("
<<
PrecisionToStr
(
itype
)
<<
") from Lite to HW_ASCEND_NPU"
;
break
;
}
return
otype
;
}
ge
::
Format
CvtDataLayoutType
(
DataLayoutType
itype
)
{
ge
::
Format
otype
=
ge
::
FORMAT_NCHW
;
switch
(
itype
)
{
case
DATALAYOUT
(
kNCHW
):
otype
=
ge
::
FORMAT_NCHW
;
break
;
case
DATALAYOUT
(
kNHWC
):
otype
=
ge
::
FORMAT_NHWC
;
break
;
// TODO(yanghongtian): support more data layout type
default:
LOG
(
FATAL
)
<<
"[HW_ASCEND_NPU] Can not convert data layout type("
<<
DataLayoutToStr
(
itype
)
<<
") from Lite to HW_ASCEND_NPU"
;
break
;
}
return
otype
;
}
std
::
vector
<
int64_t
>
CvtShape
(
const
std
::
vector
<
int64_t
>&
in_shape
)
{
CHECK
(
in_shape
.
size
()
<=
4
&&
in_shape
.
size
()
>
0
)
<<
"[HW_ASCEND_NPU] The size of in_shape is invalid: "
<<
in_shape
.
size
();
// Padding the shape to 4-dimensions(NCHW)
std
::
vector
<
int64_t
>
out_shape
(
4
,
1
);
std
::
copy
(
in_shape
.
begin
(),
in_shape
.
end
(),
out_shape
.
begin
()
+
4
-
in_shape
.
size
());
return
out_shape
;
}
std
::
vector
<
int64_t
>
CvtShape
(
const
DDim
&
in_dims
)
{
return
CvtShape
(
in_dims
.
Vectorize
());
}
ge
::
Tensor
CvtTensor
(
const
Tensor
&
in_tensor
,
std
::
vector
<
int64_t
>
out_shape
,
DataLayoutType
in_layout
)
{
PrecisionType
in_precision
=
in_tensor
.
precision
();
auto
in_size
=
in_tensor
.
dims
().
production
();
auto
in_shape
=
in_tensor
.
dims
().
Vectorize
();
if
(
out_shape
.
empty
())
{
out_shape
=
in_shape
;
}
ge
::
TensorDesc
out_desc
(
ge
::
Shape
(
out_shape
),
CvtDataLayoutType
(
in_layout
),
CvtPrecisionType
(
in_precision
));
auto
out_size
=
out_desc
.
GetShape
().
GetShapeSize
();
CHECK_EQ
(
out_size
,
in_size
);
ge
::
Tensor
out_tensor
;
out_tensor
.
SetTensorDesc
(
out_desc
);
out_tensor
.
SetData
(
reinterpret_cast
<
const
uint8_t
*>
(
in_tensor
.
raw_data
()),
in_tensor
.
memory_size
());
return
std
::
move
(
out_tensor
);
}
int
CvtActMode
(
const
std
::
string
&
act_type
)
{
// based on the nonlinear_fuc_ops.h in OPP (line 210)
// default to Relu
int
act_mode
=
1
;
if
(
act_type
==
"sigmoid"
)
{
act_mode
=
0
;
}
else
if
(
act_type
==
"relu"
)
{
act_mode
=
1
;
}
else
if
(
act_type
==
"tanh"
)
{
act_mode
=
2
;
}
else
if
(
act_type
==
"relu_clipped"
||
act_type
==
"relu6"
)
{
act_mode
=
3
;
}
else
if
(
act_type
==
"elu"
)
{
act_mode
=
4
;
}
else
if
(
act_type
==
"leaky_relu"
)
{
act_mode
=
5
;
}
else
if
(
act_type
==
"abs"
)
{
act_mode
=
6
;
}
else
if
(
act_type
==
"relu1"
)
{
// TODO(yanghongtian): check hw_ascend_npu supports relu1 or not.
act_mode
=
7
;
}
else
if
(
act_type
==
"softsign"
)
{
act_mode
=
8
;
}
else
if
(
act_type
==
"softplus"
)
{
act_mode
=
9
;
}
else
{
// TODO(yanghongtian): support more activation mode
LOG
(
FATAL
)
<<
"[NPU] Unsupported activation type "
<<
act_type
;
}
return
act_mode
;
}
}
// namespace hw_ascend_npu
}
// namespace subgraph
}
// namespace lite
}
// namespace paddle
lite/kernels/hw_ascend_npu/bridges/utility.h
0 → 100644
浏览文件 @
8278f114
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <graph/tensor.h>
#include <graph/types.h>
#include <functional>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "lite/core/op_lite.h"
#include "lite/utils/macros.h"
namespace
paddle
{
namespace
lite
{
namespace
subgraph
{
namespace
hw_ascend_npu
{
// Type/tensor converters for converting Paddle type/tensor to hw ascend npu IR
// type
bool
HasInputArg
(
const
OpInfo
*
op_info
,
const
Scope
*
scope
,
const
std
::
string
&
argname
);
ge
::
DataType
CvtPrecisionType
(
PrecisionType
itype
);
ge
::
Format
CvtDataLayoutType
(
DataLayoutType
itype
);
// Padding the shape to 4-dimensions(NCHW) for HW_ASCEND_NPU
std
::
vector
<
int64_t
>
CvtShape
(
const
std
::
vector
<
int64_t
>&
in_shape
);
std
::
vector
<
int64_t
>
CvtShape
(
const
DDim
&
in_dims
);
ge
::
Tensor
CvtTensor
(
const
Tensor
&
in_tensor
,
std
::
vector
<
int64_t
>
out_shape
=
{},
DataLayoutType
in_layout
=
DATALAYOUT
(
kNCHW
));
int
CvtActMode
(
std
::
string
act_type
);
}
// namespace hw_ascend_npu
}
// namespace subgraph
}
// namespace lite
}
// namespace paddle
lite/kernels/hw_ascend_npu/subgraph_compute.cc
0 → 100644
浏览文件 @
8278f114
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/hw_ascend_npu/subgraph_compute.h"
#include <sys/time.h>
#include <time.h>
#include <utility>
#include "lite/backends/hw_ascend_npu/device.h"
#include "lite/core/op_registry.h"
#include "lite/kernels/hw_ascend_npu/bridges/graph.h"
#include "lite/kernels/npu/bridges/paddle_use_bridges.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
hw_ascend_npu
{
int
SubgraphEngine
::
BuildDeviceProgram
()
{
int
status
=
0
;
// Convert all of ops and their input vars and weights and added into
// the HWAscendNPU IR graph
subgraph
::
hw_ascend_npu
::
Graph
graph
;
const
auto
&
bridges
=
subgraph
::
Registry
::
Instance
();
for
(
auto
&
inst
:
origin_program_
)
{
auto
op
=
const_cast
<
OpLite
*>
(
inst
.
op
());
CHECK
(
op
);
op
->
CheckShape
();
op
->
InferShape
();
std
::
string
op_type
=
op
->
op_info
()
->
Type
();
if
(
!
bridges
.
Exists
(
op_type
,
TARGET
(
kHWAscendNPU
)))
{
return
subgraph
::
FAILED
;
}
auto
kernel
=
inst
.
kernel
();
status
|=
bridges
.
Select
(
op_type
,
TARGET
(
kHWAscendNPU
))(
reinterpret_cast
<
void
*>
(
&
graph
),
op
,
const_cast
<
KernelBase
*>
(
kernel
));
if
(
subgraph
::
CHECK_FAILED
(
status
))
{
return
subgraph
::
FAILED
;
}
}
// Collect the valid input and output nodes in the HiAI IR graph and update
// the input and output names
device_inames_
.
clear
();
device_onames_
.
clear
();
std
::
vector
<
ge
::
Operator
>
device_inodes
;
std
::
vector
<
ge
::
Operator
>
device_onodes
;
for
(
auto
&
input_name
:
input_names_
)
{
if
(
graph
.
Has
(
input_name
))
{
if
(
graph
.
Get
(
input_name
)
->
is_data
())
{
device_inodes
.
push_back
(
*
graph
.
Get
(
input_name
)
->
data
());
device_inames_
.
push_back
(
input_name
);
}
else
{
LOG
(
WARNING
)
<<
"[HWAscendNPU] Input node "
<<
input_name
<<
" is ignored because it is not a data node."
;
}
}
else
{
LOG
(
WARNING
)
<<
"[HWAscendNPU] Input node "
<<
input_name
<<
" is ignored because it does not exist."
;
}
}
for
(
auto
&
output_name
:
output_names_
)
{
if
(
graph
.
Has
(
output_name
))
{
device_onodes
.
push_back
(
*
graph
.
Get
(
output_name
)
->
data
());
device_onames_
.
push_back
(
output_name
);
}
else
{
LOG
(
WARNING
)
<<
"[HWAscendNPU] Output node "
<<
output_name
<<
" is ignored because it does not exist."
;
}
}
CHECK
(
!
device_inames_
.
empty
())
<<
"[HWAscendNPU] No input nodes found for building NPU model"
;
CHECK
(
!
device_onames_
.
empty
())
<<
"[HWAscendNPU] No output nodes found for building NPU model"
;
// Build the IR graph to om model as the device program
if
(
device_program_map_
.
count
(
inputs_shape_
)
>
0
)
{
return
status
;
}
auto
device_client
=
lite
::
hw_ascend_npu
::
Device
::
Global
().
Build
(
device_inodes
,
device_onodes
);
if
(
device_client
==
nullptr
)
{
LOG
(
WARNING
)
<<
"[HWAscendNPU] Build model failed!"
;
return
subgraph
::
FAILED
;
}
auto
device_program
=
std
::
make_shared
<
device_program_t
>
(
device_client
);
device_program_map_
[
inputs_shape_
]
=
device_program
;
// Query and check the dimensions of valid input and output tensors
std
::
vector
<
TensorDesc
>
device_idims
,
device_odims
;
if
(
device_program
->
client
->
GetModelIOTensorDim
(
&
device_idims
,
&
device_odims
)
!=
0
)
{
LOG
(
WARNING
)
<<
"[HWAscendNPU] Get the dimensions of input and output "
"tensors failed!"
;
return
subgraph
::
FAILED
;
}
device_program
->
device_idims
=
device_idims
;
device_program
->
device_odims
=
device_odims
;
CHECK_EQ
(
device_idims
.
size
(),
device_inames_
.
size
());
CHECK_EQ
(
device_odims
.
size
(),
device_onames_
.
size
());
origin_idims_
.
resize
(
device_inames_
.
size
());
origin_itensors_
.
resize
(
device_inames_
.
size
());
origin_odims_
.
resize
(
device_onames_
.
size
());
origin_otensors_
.
resize
(
device_onames_
.
size
());
for
(
size_t
i
=
0
;
i
<
device_inames_
.
size
();
i
++
)
{
auto
node
=
graph
.
Get
(
device_inames_
[
i
]);
auto
precision
=
node
->
precision
();
auto
layout
=
node
->
layout
();
origin_itensors_
[
i
]
=
scope_
->
FindMutableTensor
(
device_inames_
[
i
]);
CHECK
(
origin_itensors_
[
i
]);
origin_idims_
[
i
]
=
origin_itensors_
[
i
]
->
dims
();
VLOG
(
3
)
<<
"[HWAscendNPU] Inputs["
<<
i
<<
"] name: "
<<
device_inames_
[
i
]
<<
" precision: "
<<
PrecisionToStr
(
precision
)
<<
" layout: "
<<
DataLayoutToStr
(
layout
)
<<
" dims: {"
<<
device_idims
[
i
].
GetNumber
()
<<
","
<<
device_idims
[
i
].
GetChannel
()
<<
","
<<
device_idims
[
i
].
GetHeight
()
<<
","
<<
device_idims
[
i
].
GetWidth
()
<<
"}"
;
// Prepare the device input tensors
CHECK_EQ
(
origin_idims_
[
i
].
production
(),
device_idims
[
i
].
GetNumber
()
*
device_idims
[
i
].
GetChannel
()
*
device_idims
[
i
].
GetHeight
()
*
device_idims
[
i
].
GetWidth
());
}
device_program
->
origin_idims
=
origin_idims_
;
for
(
size_t
i
=
0
;
i
<
device_onames_
.
size
();
i
++
)
{
auto
node
=
graph
.
Get
(
device_onames_
[
i
]);
auto
precision
=
node
->
precision
();
auto
layout
=
node
->
layout
();
origin_otensors_
[
i
]
=
scope_
->
FindMutableTensor
(
device_onames_
[
i
]);
CHECK
(
origin_otensors_
[
i
]);
origin_odims_
[
i
]
=
origin_otensors_
[
i
]
->
dims
();
VLOG
(
3
)
<<
"[HWAscendNPU] Outputs["
<<
i
<<
"] name: "
<<
device_onames_
[
i
]
<<
" precision: "
<<
PrecisionToStr
(
precision
)
<<
" layout: "
<<
DataLayoutToStr
(
layout
)
<<
" dims: {"
<<
device_odims
[
i
].
GetNumber
()
<<
","
<<
device_odims
[
i
].
GetChannel
()
<<
","
<<
device_odims
[
i
].
GetHeight
()
<<
","
<<
device_odims
[
i
].
GetWidth
()
<<
"}"
;
// Prepare the device output tensors
switch
(
precision
)
{
case
PRECISION
(
kFloat
):
origin_otensors_
[
i
]
->
mutable_data
<
float
>
();
break
;
case
PRECISION
(
kBool
):
origin_otensors_
[
i
]
->
mutable_data
<
bool
>
();
break
;
case
PRECISION
(
kInt8
):
origin_otensors_
[
i
]
->
mutable_data
<
int8_t
>
();
break
;
case
PRECISION
(
kInt16
):
origin_otensors_
[
i
]
->
mutable_data
<
int16_t
>
();
break
;
case
PRECISION
(
kInt32
):
origin_otensors_
[
i
]
->
mutable_data
<
int32_t
>
();
break
;
case
PRECISION
(
kInt64
):
origin_otensors_
[
i
]
->
mutable_data
<
int64_t
>
();
break
;
default:
LOG
(
FATAL
)
<<
"[HWAscendNPU] "
<<
device_onames_
[
i
]
<<
" can't mutable data with precision type "
<<
PrecisionToStr
(
precision
);
break
;
}
device_program
->
origin_odims
=
origin_odims_
;
CHECK_EQ
(
origin_odims_
[
i
].
production
(),
device_odims
[
i
].
GetNumber
()
*
device_odims
[
i
].
GetChannel
()
*
device_odims
[
i
].
GetHeight
()
*
device_odims
[
i
].
GetWidth
());
}
return
status
;
}
int
SubgraphEngine
::
LaunchDeviceProgram
()
{
// Copy the data of origin input tensors to the buffer of input HWAscendNPU
// tensors
auto
device_program
=
device_program_map_
[
inputs_shape_
];
int
ret
=
0
;
ret
=
device_program
->
client
->
SetInput
(
origin_itensors_
,
device_program
->
origin_idims
);
if
(
ret
!=
0
)
{
return
ret
;
}
device_program
->
client
->
CreateOutput
(
device_program
->
origin_odims
);
// run inference
auto
GetCurrentUS
=
[]()
->
double
{
struct
timeval
time
;
gettimeofday
(
&
time
,
NULL
);
return
1e+6
*
time
.
tv_sec
+
time
.
tv_usec
;
};
auto
start_time
=
GetCurrentUS
();
CHECK_EQ
(
device_program
->
client
->
Process
(),
0
);
VLOG
(
3
)
<<
"[HWAscendNPU] Process cost "
<<
GetCurrentUS
()
-
start_time
<<
" us"
;
device_program
->
client
->
GetOutput
(
&
origin_otensors_
);
return
0
;
}
bool
SubgraphEngine
::
InputShapeChanged
()
{
std
::
vector
<
std
::
vector
<
int64_t
>>
new_shape
;
for
(
auto
origin_itensor
:
origin_itensors_
)
{
new_shape
.
push_back
(
origin_itensor
->
dims
().
Vectorize
());
}
inputs_shape_
=
new_shape
;
if
(
device_program_map_
.
count
(
inputs_shape_
)
>
0
)
{
return
false
;
}
return
true
;
}
void
SubgraphCompute
::
PrepareForRun
()
{
auto
&
param
=
this
->
Param
<
param_t
>
();
engine_
.
reset
(
new
SubgraphEngine
(
ctx_
.
get
(),
param
.
sub_block_idx
,
param
.
sub_block_desc
,
param
.
input_data_names
,
param
.
output_data_names
,
param
.
scope
));
CHECK
(
engine_
);
engine_
->
Build
();
}
void
SubgraphCompute
::
Run
()
{
CHECK
(
engine_
);
engine_
->
Launch
();
}
}
// namespace hw_ascend_npu
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
subgraph
,
kHWAscendNPU
,
kAny
,
kNCHW
,
paddle
::
lite
::
kernels
::
hw_ascend_npu
::
SubgraphCompute
,
def
)
.
BindInput
(
"Inputs"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
),
PRECISION
(
kAny
))})
.
BindOutput
(
"Outputs"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
),
PRECISION
(
kAny
))})
.
Finalize
();
lite/kernels/hw_ascend_npu/subgraph_compute.h
0 → 100644
浏览文件 @
8278f114
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <graph/tensor.h>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "lite/backends/hw_ascend_npu/runtime.h"
#include "lite/core/kernel.h"
#include "lite/kernels/npu/bridges/engine.h"
#include "lite/kernels/npu/bridges/registry.h"
using
HWAscendNPURuntime
=
paddle
::
lite
::
hw_ascend_npu
::
HWAscendNPURuntime
;
using
TensorDesc
=
paddle
::
lite
::
hw_ascend_npu
::
TensorDesc
;
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
hw_ascend_npu
{
class
SubgraphEngine
:
public
subgraph
::
Engine
{
public:
SubgraphEngine
(
KernelContext
*
ctx
,
int
block_idx
,
cpp
::
BlockDesc
*
block_desc
,
const
std
::
vector
<
std
::
string
>
&
input_names
,
const
std
::
vector
<
std
::
string
>
&
output_names
,
Scope
*
scope
)
:
subgraph
::
Engine
(
ctx
,
block_idx
,
block_desc
,
input_names
,
output_names
,
scope
)
{}
struct
device_program_t
{
explicit
device_program_t
(
std
::
shared_ptr
<
HWAscendNPURuntime
>
_client
)
:
client
(
_client
)
{}
std
::
shared_ptr
<
HWAscendNPURuntime
>
client
{
nullptr
};
std
::
vector
<
DDim
>
origin_idims
{};
std
::
vector
<
DDim
>
origin_odims
{};
std
::
vector
<
TensorDesc
>
device_idims
{};
std
::
vector
<
TensorDesc
>
device_odims
{};
};
protected:
int
BuildDeviceProgram
()
override
;
int
LaunchDeviceProgram
()
override
;
bool
InputShapeChanged
()
override
;
std
::
vector
<
std
::
vector
<
int64_t
>>
inputs_shape_
{};
std
::
map
<
std
::
vector
<
std
::
vector
<
int64_t
>>
,
std
::
shared_ptr
<
device_program_t
>>
device_program_map_
{};
std
::
vector
<
std
::
string
>
device_inames_
{};
std
::
vector
<
std
::
string
>
device_onames_
{};
};
class
SubgraphCompute
:
public
KernelLite
<
TARGET
(
kHWAscendNPU
),
PRECISION
(
kAny
)
>
{
public:
using
param_t
=
operators
::
SubgraphParam
;
void
PrepareForRun
()
override
;
void
Run
()
override
;
virtual
~
SubgraphCompute
()
=
default
;
private:
std
::
unique_ptr
<
SubgraphEngine
>
engine_
;
};
}
// namespace hw_ascend_npu
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
lite/kernels/npu/bridges/CMakeLists.txt
浏览文件 @
8278f114
if
(
NOT LITE_WITH_NPU AND NOT LITE_WITH_XTCL AND NOT LITE_WITH_BM AND NOT LITE_WITH_MLU
)
if
(
NOT LITE_WITH_NPU AND NOT LITE_WITH_XTCL AND NOT LITE_WITH_BM AND NOT LITE_WITH_MLU
AND NOT LITE_WITH_HW_ASCEND_NPU
)
return
()
endif
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录