Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
71d9dbd1
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
332
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
71d9dbd1
编写于
8月 03, 2020
作者:
Q
Qi Li
提交者:
GitHub
8月 03, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[ASCEND] add concat op and update ascend subraph, test=develop (#4031)
上级
88c05d74
变更
15
隐藏空白更改
内联
并排
Showing
15 changed file
with
405 addition
and
316 deletion
+405
-316
lite/backends/huawei_ascend_npu/device.cc
lite/backends/huawei_ascend_npu/device.cc
+9
-0
lite/backends/huawei_ascend_npu/model_client.cc
lite/backends/huawei_ascend_npu/model_client.cc
+74
-159
lite/backends/huawei_ascend_npu/model_client.h
lite/backends/huawei_ascend_npu/model_client.h
+29
-18
lite/backends/huawei_ascend_npu/utils.h
lite/backends/huawei_ascend_npu/utils.h
+62
-4
lite/kernels/huawei_ascend_npu/bridges/CMakeLists.txt
lite/kernels/huawei_ascend_npu/bridges/CMakeLists.txt
+2
-0
lite/kernels/huawei_ascend_npu/bridges/act_op.cc
lite/kernels/huawei_ascend_npu/bridges/act_op.cc
+8
-0
lite/kernels/huawei_ascend_npu/bridges/concat_op.cc
lite/kernels/huawei_ascend_npu/bridges/concat_op.cc
+124
-0
lite/kernels/huawei_ascend_npu/bridges/conv_op.cc
lite/kernels/huawei_ascend_npu/bridges/conv_op.cc
+40
-25
lite/kernels/huawei_ascend_npu/bridges/interpolate_op.cc
lite/kernels/huawei_ascend_npu/bridges/interpolate_op.cc
+24
-10
lite/kernels/huawei_ascend_npu/bridges/paddle_use_bridges.h
lite/kernels/huawei_ascend_npu/bridges/paddle_use_bridges.h
+1
-0
lite/kernels/huawei_ascend_npu/bridges/utility.cc
lite/kernels/huawei_ascend_npu/bridges/utility.cc
+0
-55
lite/kernels/huawei_ascend_npu/bridges/utility.h
lite/kernels/huawei_ascend_npu/bridges/utility.h
+4
-3
lite/kernels/huawei_ascend_npu/subgraph_compute.cc
lite/kernels/huawei_ascend_npu/subgraph_compute.cc
+13
-41
lite/tests/kernels/concat_compute_test.cc
lite/tests/kernels/concat_compute_test.cc
+6
-0
lite/tests/kernels/conv_compute_test.cc
lite/tests/kernels/conv_compute_test.cc
+9
-1
未找到文件。
lite/backends/huawei_ascend_npu/device.cc
浏览文件 @
71d9dbd1
...
...
@@ -67,6 +67,15 @@ bool Device::Build(std::vector<ge::Operator>& input_nodes, // NOLINT
std
::
lock_guard
<
std
::
mutex
>
lock
(
device_mutex_
);
// Convert the HiAI IR graph to the HiAI om model
ge
::
Graph
ir_graph
(
"graph"
);
// set input node attr index is node size > 1
if
(
input_nodes
.
size
()
>
1
)
{
int
idx
=
0
;
for
(
auto
node
:
input_nodes
)
{
node
.
SetAttr
(
"index"
,
idx
);
idx
++
;
}
}
VLOG
(
3
)
<<
"Getting input node size "
<<
input_nodes
.
size
();
ir_graph
.
SetInputs
(
input_nodes
).
SetOutputs
(
output_nodes
);
// Build IR model
...
...
lite/backends/huawei_ascend_npu/model_client.cc
浏览文件 @
71d9dbd1
...
...
@@ -24,50 +24,28 @@ bool AclModelClient::LoadFromMem(const void* data, uint32_t size) {
return
true
;
}
auto
ret
=
aclmdlQuerySizeFromMem
(
data
,
size
,
&
model_memory_size_
,
&
model_weight_size_
);
if
(
ret
!=
ACL_ERROR_NONE
)
{
LOG
(
WARNING
)
<<
"[HUAWEI_ASCEND_NPU] query model size from memory failed!"
;
return
false
;
}
ret
=
aclrtMalloc
(
&
model_memory_ptr_
,
model_memory_size_
,
ACL_MEM_MALLOC_HUGE_FIRST
);
if
(
ret
!=
ACL_ERROR_NONE
)
{
LOG
(
WARNING
)
<<
"[HUAWEI_ASCEND_NPU] malloc buffer for model memory "
"failed, require size is "
<<
model_memory_size_
;
return
false
;
}
ret
=
aclrtMalloc
(
&
model_weight_ptr_
,
model_weight_size_
,
ACL_MEM_MALLOC_HUGE_FIRST
);
if
(
ret
!=
ACL_ERROR_NONE
)
{
LOG
(
WARNING
)
<<
"[HUAWEI_ASCEND_NPU] malloc buffer for model weigth "
"failed, require size is "
<<
model_weight_size_
;
return
false
;
}
ret
=
aclmdlLoadFromMemWithMem
(
data
,
size
,
&
model_id_
,
model_memory_ptr_
,
model_memory_size_
,
model_weight_ptr_
,
model_weight_size_
);
if
(
ret
!=
ACL_ERROR_NONE
)
{
LOG
(
WARNING
)
<<
"[HUAWEI_ASCEND_NPU] Load model from memory failed!"
;
return
false
;
}
ACL_CALL
(
aclmdlQuerySizeFromMem
(
data
,
size
,
&
model_memory_size_
,
&
model_weight_size_
));
ACL_CALL
(
aclrtMalloc
(
&
model_memory_ptr_
,
model_memory_size_
,
ACL_MEM_MALLOC_HUGE_FIRST
));
ACL_CALL
(
aclrtMalloc
(
&
model_weight_ptr_
,
model_weight_size_
,
ACL_MEM_MALLOC_HUGE_FIRST
));
ACL_CALL
(
aclmdlLoadFromMemWithMem
(
data
,
size
,
&
model_id_
,
model_memory_ptr_
,
model_memory_size_
,
model_weight_ptr_
,
model_weight_size_
));
model_desc_
=
aclmdlCreateDesc
();
if
(
model_desc_
==
nullptr
)
{
LOG
(
WARNING
)
<<
"[HUAWEI_ASCEND_NPU] create model description failed!"
;
return
false
;
}
ret
=
aclmdlGetDesc
(
model_desc_
,
model_id_
);
if
(
ret
!=
ACL_ERROR_NONE
)
{
LOG
(
WARNING
)
<<
"[HUAWEI_ASCEND_NPU] get model description failed!"
;
return
false
;
}
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] AclModelClient LoadFromMem success."
;
ACL_CALL
(
aclmdlGetDesc
(
model_desc_
,
model_id_
));
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] Load model form memeory success."
;
load_flag_
=
true
;
return
true
;
}
...
...
@@ -77,49 +55,28 @@ bool AclModelClient::LoadFromFile(const char* model_path) {
LOG
(
WARNING
)
<<
"[HUAWEI_ASCEND_NPU] model is already loaded!"
;
return
true
;
}
auto
ret
=
aclmdlQuerySize
(
model_path
,
&
model_memory_size_
,
&
model_weight_size_
);
if
(
ret
!=
ACL_ERROR_NONE
)
{
LOG
(
WARNING
)
<<
"[HUAWEI_ASCEND_NPU] query model size from file failed!"
;
return
false
;
}
ret
=
aclrtMalloc
(
&
model_memory_ptr_
,
model_memory_size_
,
ACL_MEM_MALLOC_HUGE_FIRST
);
if
(
ret
!=
ACL_ERROR_NONE
)
{
LOG
(
WARNING
)
<<
"[HUAWEI_ASCEND_NPU] malloc buffer for model memory "
"failed, require size is "
<<
model_memory_size_
;
return
false
;
}
ret
=
aclrtMalloc
(
&
model_weight_ptr_
,
model_weight_size_
,
ACL_MEM_MALLOC_HUGE_FIRST
);
if
(
ret
!=
ACL_ERROR_NONE
)
{
LOG
(
WARNING
)
<<
"[HUAWEI_ASCEND_NPU] malloc buffer for model weigth "
"failed, require size is "
<<
model_weight_size_
;
return
false
;
}
ret
=
aclmdlLoadFromFileWithMem
(
model_path
,
&
model_id_
,
model_memory_ptr_
,
model_memory_size_
,
model_weight_ptr_
,
model_weight_size_
);
if
(
ret
!=
ACL_ERROR_NONE
)
{
LOG
(
WARNING
)
<<
"[HUAWEI_ASCEND_NPU] Load model from file failed!"
;
return
false
;
}
ACL_CALL
(
aclmdlQuerySize
(
model_path
,
&
model_memory_size_
,
&
model_weight_size_
));
ACL_CALL
(
aclrtMalloc
(
&
model_memory_ptr_
,
model_memory_size_
,
ACL_MEM_MALLOC_HUGE_FIRST
));
ACL_CALL
(
aclrtMalloc
(
&
model_weight_ptr_
,
model_weight_size_
,
ACL_MEM_MALLOC_HUGE_FIRST
));
ACL_CALL
(
aclmdlLoadFromFileWithMem
(
model_path
,
&
model_id_
,
model_memory_ptr_
,
model_memory_size_
,
model_weight_ptr_
,
model_weight_size_
));
model_desc_
=
aclmdlCreateDesc
();
if
(
model_desc_
==
nullptr
)
{
LOG
(
WARNING
)
<<
"[HUAWEI_ASCEND_NPU] create model description failed!"
;
return
false
;
}
ret
=
aclmdlGetDesc
(
model_desc_
,
model_id_
);
if
(
ret
!=
ACL_ERROR_NONE
)
{
LOG
(
WARNING
)
<<
"[HUAWEI_ASCEND_NPU] get model description failed!"
;
return
false
;
}
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] Loading model file success:"
<<
model_path
;
ACL_CALL
(
aclmdlGetDesc
(
model_desc_
,
model_id_
));
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] Load model form file success: "
<<
model_path
;
load_flag_
=
true
;
return
true
;
}
...
...
@@ -132,33 +89,25 @@ bool AclModelClient::GetModelIOTensorDim(
return
false
;
}
size_t
input_num
=
aclmdlGetNumInputs
(
model_desc_
);
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] input num
h
er is "
<<
input_num
;
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] input num
b
er is "
<<
input_num
;
for
(
size_t
i
=
0
;
i
<
input_num
;
i
++
)
{
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] printing input ["
<<
i
<<
"] ...."
;
aclmdlIODims
input_dim
;
aclmdlGetInputDims
(
model_desc_
,
i
,
&
input_dim
);
ACL_CALL
(
aclmdlGetInputDims
(
model_desc_
,
i
,
&
input_dim
)
);
aclDataType
data_type
=
aclmdlGetInputDataType
(
model_desc_
,
i
);
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] data_type of inputs["
<<
i
<<
"] is "
<<
data_type
;
aclFormat
data_format
=
aclmdlGetInputFormat
(
model_desc_
,
i
);
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] data_format of inputs["
<<
i
<<
"] is "
<<
data_format
;
TensorDesc
tensor_desc
=
TensorDesc
(
data_type
,
input_dim
,
data_format
);
input_tensor
->
push_back
(
tensor_desc
);
}
size_t
output_num
=
aclmdlGetNumOutputs
(
model_desc_
);
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] output num
h
er is "
<<
output_num
;
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] output num
b
er is "
<<
output_num
;
for
(
size_t
i
=
0
;
i
<
output_num
;
i
++
)
{
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] printing output ["
<<
i
<<
"] ...."
;
aclmdlIODims
output_dim
;
aclmdlGetOutputDims
(
model_desc_
,
i
,
&
output_dim
);
ACL_CALL
(
aclmdlGetOutputDims
(
model_desc_
,
i
,
&
output_dim
)
);
aclDataType
data_type
=
aclmdlGetOutputDataType
(
model_desc_
,
i
);
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] data_type of outputs["
<<
i
<<
"] is "
<<
data_type
;
aclFormat
data_format
=
aclmdlGetOutputFormat
(
model_desc_
,
i
);
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] data_format of outputs["
<<
i
<<
"] is "
<<
data_format
;
TensorDesc
tensor_desc
=
TensorDesc
(
data_type
,
output_dim
,
data_format
);
output_tensor
->
push_back
(
tensor_desc
);
}
...
...
@@ -181,28 +130,16 @@ bool AclModelClient::GetTensorFromDataset(
uint32_t
device_size
=
aclGetDataBufferSize
(
buffer_device
);
void
*
tensor_data
=
nullptr
;
aclError
ret
=
aclrtMallocHost
(
&
tensor_data
,
device_size
);
if
(
ret
!=
ACL_ERROR_NONE
)
{
LOG
(
ERROR
)
<<
"[HUAWEI_ASCEND_NPU] aclrtMallocHost failed, ret "
<<
ret
;
return
false
;
}
ret
=
aclrtMemcpy
(
tensor_data
,
device_size
,
device_data
,
device_size
,
ACL_MEMCPY_DEVICE_TO_HOST
);
if
(
ret
!=
ACL_ERROR_NONE
)
{
LOG
(
ERROR
)
<<
"[HUAWEI_ASCEND_NPU] aclrtMemcpy failed, ret "
<<
ret
;
return
false
;
}
if
(
output_tensor
->
at
(
i
)
->
SetData
(
reinterpret_cast
<
uint8_t
*>
(
tensor_data
),
device_size
)
!=
ge
::
GRAPH_SUCCESS
)
{
LOG
(
ERROR
)
<<
"[HUAWEI_ASCEND_NPU] SetData to output tensor failed"
;
return
false
;
}
}
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] Get output tensor from output dataset succeed."
;
ACL_CALL
(
aclrtMallocHost
(
&
tensor_data
,
device_size
));
ACL_CALL
(
aclrtMemcpy
(
tensor_data
,
device_size
,
device_data
,
device_size
,
ACL_MEMCPY_DEVICE_TO_HOST
));
ATC_CALL
(
output_tensor
->
at
(
i
)
->
SetData
(
reinterpret_cast
<
uint8_t
*>
(
tensor_data
),
device_size
));
}
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] Get output tensor from dataset succeed."
;
return
true
;
}
...
...
@@ -218,37 +155,33 @@ void AclModelClient::CreateInputDataset(
auto
item
=
input_tensor
->
at
(
i
);
size_t
buffer_size
=
item
->
GetSize
();
void
*
buffer_device
=
nullptr
;
aclError
ret
=
aclrtMalloc
(
&
buffer_device
,
buffer_size
,
ACL_MEM_MALLOC_NORMAL_ONLY
);
if
(
ret
!=
ACL_ERROR_NONE
)
{
LOG
(
ERROR
)
<<
"[HUAWEI_ASCEND_NPU] input malloc device buffer failed. size is "
<<
buffer_size
;
return
;
}
ACL_CALL
(
aclrtMalloc
(
&
buffer_device
,
buffer_size
,
ACL_MEM_MALLOC_NORMAL_ONLY
));
void
*
buffer_data
=
reinterpret_cast
<
void
*>
(
item
->
GetData
());
ret
=
aclrtMemcpy
(
buffer_device
,
buffer_size
,
buffer_data
,
buffer_size
,
ACL_MEMCPY_HOST_TO_DEVICE
);
auto
ret
=
aclrtMemcpy
(
buffer_device
,
buffer_size
,
buffer_data
,
buffer_size
,
ACL_MEMCPY_HOST_TO_DEVICE
);
if
(
ret
!=
ACL_ERROR_NONE
)
{
LOG
(
ERROR
)
<<
"[HUAWEI_ASCEND_NPU] input memcpy failed, buffer size is "
<<
buffer_size
;
aclrtFree
(
buffer_device
);
ACL_CALL
(
aclrtFree
(
buffer_device
)
);
return
;
}
aclDataBuffer
*
data_buffer
=
aclCreateDataBuffer
(
buffer_device
,
buffer_size
);
if
(
data_buffer
==
nullptr
)
{
LOG
(
ERROR
)
<<
"[HUAWEI_ASCEND_NPU] output aclCreateDataBuffer failed!"
;
aclrtFree
(
buffer_device
);
ACL_CALL
(
aclrtFree
(
buffer_device
)
);
return
;
}
if
(
aclmdlAddDatasetBuffer
(
input_dataset_
,
data_buffer
)
!=
ACL_ERROR_NONE
)
{
LOG
(
ERROR
)
<<
"[HUAWEI_ASCEND_NPU] input aclmdlAddDatasetBuffer failed!"
;
aclrtFree
(
buffer_device
);
aclDestroyDataBuffer
(
data_buffer
);
ACL_CALL
(
aclrtFree
(
buffer_device
)
);
ACL_CALL
(
aclDestroyDataBuffer
(
data_buffer
)
);
return
;
}
}
...
...
@@ -266,26 +199,20 @@ void AclModelClient::CreateOutputDataset(
for
(
size_t
i
=
0
;
i
<
output_size
;
i
++
)
{
size_t
buffer_size
=
aclmdlGetOutputSizeByIndex
(
model_desc_
,
i
);
void
*
buffer_device
=
nullptr
;
aclError
ret
=
aclrtMalloc
(
&
buffer_device
,
buffer_size
,
ACL_MEM_MALLOC_NORMAL_ONLY
);
if
(
ret
!=
ACL_ERROR_NONE
)
{
LOG
(
ERROR
)
<<
"[HUAWEI_ASCEND_NPU] output malloc device buffer failed. size is "
<<
buffer_size
;
return
;
}
ACL_CALL
(
aclrtMalloc
(
&
buffer_device
,
buffer_size
,
ACL_MEM_MALLOC_NORMAL_ONLY
));
aclDataBuffer
*
data_buffer
=
aclCreateDataBuffer
(
buffer_device
,
buffer_size
);
if
(
data_buffer
==
nullptr
)
{
LOG
(
ERROR
)
<<
"[HUAWEI_ASCEND_NPU] output aclCreateDataBuffer failed!"
;
aclrtFree
(
buffer_device
);
ACL_CALL
(
aclrtFree
(
buffer_device
)
);
return
;
}
if
(
aclmdlAddDatasetBuffer
(
output_dataset_
,
data_buffer
)
!=
ACL_ERROR_NONE
)
{
LOG
(
ERROR
)
<<
"[HUAWEI_ASCEND_NPU] output aclmdlAddDatasetBuffer failed!"
;
aclrtFree
(
buffer_device
);
aclDestroyDataBuffer
(
data_buffer
);
ACL_CALL
(
aclrtFree
(
buffer_device
)
);
ACL_CALL
(
aclDestroyDataBuffer
(
data_buffer
)
);
return
;
}
}
...
...
@@ -332,21 +259,13 @@ void AclModelClient::DestroyDataset(aclmdlDataset** dataset) {
aclDataBuffer
*
buffer_device
=
aclmdlGetDatasetBuffer
(
*
dataset
,
i
);
void
*
device_data
=
aclGetDataBufferAddr
(
buffer_device
);
if
(
device_data
==
nullptr
)
{
LOG
(
WARNING
)
<<
"[HUAWEI_ASCEND_NPU] failed to get data buffer of deivce data!"
;
LOG
(
WARNING
)
<<
"[HUAWEI_ASCEND_NPU] failed to get data buffer!"
;
}
else
{
if
(
aclrtFree
(
device_data
)
!=
ACL_ERROR_NONE
)
{
LOG
(
WARNING
)
<<
"[HUAWEI_ASCEND_NPU] failed to free deivce data!"
;
}
}
if
(
aclDestroyDataBuffer
(
buffer_device
)
!=
ACL_ERROR_NONE
)
{
LOG
(
WARNING
)
<<
"[HUAWEI_ASCEND_NPU] failed to destroy deivce data buffer!"
;
ACL_CALL
(
aclrtFree
(
device_data
));
}
ACL_CALL
(
aclDestroyDataBuffer
(
buffer_device
));
}
if
(
aclmdlDestroyDataset
(
*
dataset
)
!=
ACL_ERROR_NONE
)
{
LOG
(
WARNING
)
<<
"[HUAWEI_ASCEND_NPU] failed to destroy dataset!"
;
}
ACL_CALL
(
aclmdlDestroyDataset
(
*
dataset
));
*
dataset
=
nullptr
;
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] Destroy dataset success."
;
}
...
...
@@ -361,24 +280,20 @@ bool AclModelClient::UnloadModel() {
DestroyDataset
(
&
input_dataset_
);
DestroyDataset
(
&
output_dataset_
);
aclError
ret
=
aclmdlUnload
(
model_id_
);
if
(
ret
!=
ACL_ERROR_NONE
)
{
LOG
(
ERROR
)
<<
"unload model failed, model id is "
<<
model_id_
;
return
false
;
}
ACL_CALL
(
aclmdlUnload
(
model_id_
));
if
(
model_desc_
!=
nullptr
)
{
(
void
)
aclmdlDestroyDesc
(
model_desc_
);
ACL_CALL
(
aclmdlDestroyDesc
(
model_desc_
)
);
model_desc_
=
nullptr
;
}
if
(
model_memory_ptr_
!=
nullptr
)
{
aclrtFree
(
model_memory_ptr_
);
ACL_CALL
(
aclrtFree
(
model_memory_ptr_
)
);
model_memory_ptr_
=
nullptr
;
model_memory_size_
=
0
;
}
if
(
model_weight_ptr_
!=
nullptr
)
{
aclrtFree
(
model_weight_ptr_
);
ACL_CALL
(
aclrtFree
(
model_weight_ptr_
)
);
model_weight_ptr_
=
nullptr
;
model_weight_size_
=
0
;
}
...
...
lite/backends/huawei_ascend_npu/model_client.h
浏览文件 @
71d9dbd1
...
...
@@ -35,32 +35,39 @@ class TensorDesc {
ge_tensor_desc_
=
new
ge
::
TensorDesc
(
GetGeShape
(
dims
),
GetGeFormat
(
format
),
GetGeDataType
(
data_type
));
CHECK
(
ge_tensor_desc_
!=
nullptr
);
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] Getting data shape : "
<<
repr
();
}
~
TensorDesc
()
{
ge_tensor_desc_
=
nullptr
;
}
int64_t
GetNumber
()
const
{
return
ge_tensor_desc_
->
GetShape
().
GetDim
(
dim_order
[
0
]);
}
int64_t
GetChannel
()
const
{
return
ge_tensor_desc_
->
GetShape
().
GetDim
(
dim_order
[
1
]);
}
int64_t
GetHeight
()
const
{
return
ge_tensor_desc_
->
GetShape
().
GetDim
(
dim_order
[
2
]);
const
ge
::
TensorDesc
&
GetGeTensorDesc
()
const
{
return
*
ge_tensor_desc_
;
}
std
::
string
repr
()
const
{
STL
::
stringstream
ss
;
size_t
dim_size
=
ge_tensor_desc_
->
GetShape
().
GetDimNum
();
if
(
dim_size
==
0
)
{
ss
<<
"{}"
;
return
ss
.
str
();
}
ss
<<
"{"
;
for
(
size_t
i
=
0
;
i
<
dim_size
-
1
;
i
++
)
{
ss
<<
ge_tensor_desc_
->
GetShape
().
GetDim
(
i
)
<<
","
;
}
ss
<<
ge_tensor_desc_
->
GetShape
().
GetDim
(
dim_size
-
1
);
ss
<<
"}"
;
return
ss
.
str
();
}
int64_t
GetWidth
()
const
{
return
ge_tensor_desc_
->
GetShape
().
GetDim
(
dim_order
[
3
]);
int64_t
production
()
const
{
return
ge_tensor_desc_
->
GetShape
().
GetShapeSize
();
}
const
ge
::
TensorDesc
&
GetGeTensorDesc
()
const
{
return
*
ge_tensor_desc_
;
}
private:
ge
::
Shape
GetGeShape
(
aclmdlIODims
dims
)
{
ge
::
Shape
ge_shape
({
0
,
0
,
0
,
0
});
auto
shape_data
=
std
::
vector
<
int64_t
>
({
1L
,
1L
,
1L
,
1L
});
shape_data
.
resize
(
dims
.
dimCount
);
ge
::
Shape
ge_shape
(
shape_data
);
for
(
size_t
i
=
0
;
i
<
dims
.
dimCount
;
i
++
)
{
if
(
ge_shape
.
SetDim
(
i
,
dims
.
dims
[
i
])
!=
ge
::
GRAPH_SUCCESS
)
{
LOG
(
WARNING
)
<<
"[HUAWEI_ASCEND_NPU] ge::Shape SetDim failed!"
;
}
else
{
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] Setting Ge Shape["
<<
i
<<
"] = <"
<<
dims
.
dims
[
i
]
<<
">"
;
}
ATC_CALL
(
ge_shape
.
SetDim
(
i
,
dims
.
dims
[
i
]));
}
return
ge_shape
;
}
...
...
@@ -80,6 +87,8 @@ class TensorDesc {
LOG
(
FATAL
)
<<
"[HUAWEI_ASCEND_NPU] format not supported:"
<<
format
;
break
;
}
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] Getting data format : "
<<
CvtFormat
(
ge_format
);
return
ge_format
;
}
ge
::
DataType
GetGeDataType
(
aclDataType
data_type
)
{
...
...
@@ -110,6 +119,8 @@ class TensorDesc {
LOG
(
FATAL
)
<<
"[HUAWEI_ASCEND_NPU] data type not supported!"
;
break
;
}
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] Getting data type : "
<<
CvtDataType
(
ge_datatype
);
return
ge_datatype
;
}
...
...
lite/backends/huawei_ascend_npu/utils.h
浏览文件 @
71d9dbd1
...
...
@@ -13,6 +13,8 @@
// limitations under the License.
#pragma once
#include <string>
#include "acl/acl.h"
#include "ge/ge_api_types.h"
#include "ge/ge_ir_build.h"
...
...
@@ -21,11 +23,16 @@
#include "graph/tensor.h"
#include "graph/types.h"
#include "lite/utils/cp_logging.h"
#include "lite/utils/replace_stl/stream.h"
/*
* This file contains some Huawei Ascend NPU specific uitls.
*/
namespace
paddle
{
namespace
lite
{
namespace
huawei_ascend_npu
{
#define ACL_CALL(msg) \
CHECK_EQ(reinterpret_cast<aclError>(msg), ACL_ERROR_NONE) \
<< (msg) << " Huawei Ascend NPU ACL Error: " \
...
...
@@ -38,10 +45,6 @@
<< ::paddle::lite::huawei_ascend_npu::AtcErrorInfo( \
reinterpret_cast<uint32_t>(msg))
namespace
paddle
{
namespace
lite
{
namespace
huawei_ascend_npu
{
static
const
char
*
AtcErrorInfo
(
uint32_t
error
)
{
switch
(
error
)
{
#define LITE_ATC_ERROR_INFO(xx) \
...
...
@@ -123,6 +126,61 @@ static const char* AclErrorInfo(int error) {
}
}
static
const
std
::
string
&
CvtFormat
(
ge
::
Format
format
)
{
static
const
int
MAX_FORMAT_LENGTH
=
25
;
static
const
std
::
string
format2string
[]
=
{
"FORMAT_NCHW = 0"
,
"FORMAT_NHWC = 1"
,
"FORMAT_ND = 2"
,
"FORMAT_NC1HWC0 = 3"
,
"FORMAT_FRACTAL_Z = 4"
,
"FORMAT_NC1C0HWPAD = 5"
,
"FORMAT_NHWC1C0 = 6"
,
"FORMAT_FSR_NCHW = 7"
,
"FORMAT_FRACTAL_DECONV = 8"
,
"FORMAT_C1HWNC0 = 9"
,
"FORMAT_FRACTAL_DECONV_TRANSPOSE = 10"
,
"FORMAT_FRACTAL_DECONV_SP_STRIDE_TRANS = 11"
,
"FORMAT_NC1HWC0_C04 = 12"
,
"FORMAT_FRACTAL_Z_C04 = 13"
,
"FORMAT_CHWN = 14"
,
"FORMAT_FRACTAL_DECONV_SP_STRIDE8_TRANS = 15"
,
"FORMAT_HWCN = 16"
,
"FORMAT_NC1KHKWHWC0 = 17"
,
"FORMAT_BN_WEIGHT = 18"
,
"FORMAT_FILTER_HWCK = 19"
,
"FORMAT_HASHTABLE_LOOKUP_LOOKUPS = 20"
,
"FORMAT_HASHTABLE_LOOKUP_KEYS = 21"
,
"FORMAT_HASHTABLE_LOOKUP_VALUE = 22"
,
"FORMAT_HASHTABLE_LOOKUP_OUTPUT = 23"
,
"FORMAT_HASHTABLE_LOOKUP_HITS = 24"
};
auto
x
=
static_cast
<
int
>
(
format
);
CHECK_LT
(
x
,
MAX_FORMAT_LENGTH
);
return
format2string
[
x
];
}
static
const
std
::
string
&
CvtDataType
(
ge
::
DataType
data_type
)
{
static
const
int
MAX_DATATYPE_LENGTH
=
14
;
static
const
std
::
string
datatype2string
[]
=
{
"DT_FLOAT=0"
,
"DT_FLOAT16=1"
,
"DT_INT8=2"
,
"DT_INT32=3"
,
"DT_UINT8=4"
,
"Unknown=5"
,
"DT_INT16=6"
,
"DT_UINT16=7"
,
"DT_UINT32=8"
,
"DT_INT64=9"
,
"DT_UINT64=10"
,
"DT_DOUBLE=11"
,
"DT_BOOL=12"
,
"DT_STRING=13"
};
auto
x
=
static_cast
<
int
>
(
data_type
);
CHECK_LT
(
x
,
MAX_DATATYPE_LENGTH
);
return
datatype2string
[
x
];
}
}
// namespace huawei_ascend_npu
}
// namespace lite
}
// namespace paddle
lite/kernels/huawei_ascend_npu/bridges/CMakeLists.txt
浏览文件 @
71d9dbd1
...
...
@@ -10,6 +10,7 @@ set(huawei_ascend_npu_subgraph_bridge_deps subgraph_bridge_registry subgraph_bri
lite_cc_library
(
subgraph_bridge_act_op_huawei_ascend_npu SRCS act_op.cc DEPS
${
huawei_ascend_npu_subgraph_bridge_deps
}
)
lite_cc_library
(
subgraph_bridge_conv_op_huawei_ascend_npu SRCS conv_op.cc DEPS
${
huawei_ascend_npu_subgraph_bridge_deps
}
)
lite_cc_library
(
subgraph_bridge_interpolate_op_huawei_ascend_npu SRCS interpolate_op.cc DEPS
${
huawei_ascend_npu_subgraph_bridge_deps
}
)
lite_cc_library
(
subgraph_bridge_concat_op_huawei_ascend_npu SRCS concat_op.cc DEPS
${
huawei_ascend_npu_subgraph_bridge_deps
}
)
set
(
huawei_ascend_npu_subgraph_bridges
subgraph_bridge_registry
...
...
@@ -18,4 +19,5 @@ set(huawei_ascend_npu_subgraph_bridges
subgraph_bridge_act_op_huawei_ascend_npu
subgraph_bridge_conv_op_huawei_ascend_npu
subgraph_bridge_interpolate_op_huawei_ascend_npu
subgraph_bridge_concat_op_huawei_ascend_npu
CACHE INTERNAL
"huawei_ascend_npu_subgraph_bridges"
)
lite/kernels/huawei_ascend_npu/bridges/act_op.cc
浏览文件 @
71d9dbd1
...
...
@@ -49,6 +49,10 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto
act_node
=
graph
->
template
Add
<
ActType
>(
out_name
);
auto
act_op
=
act_node
->
template
data
<
ActType
>();
act_op
->
set_input_x
(
*
x_node
->
data
());
TENSOR_UPDATE_INPUT
(
act_op
,
x
,
ge
::
FORMAT_NCHW
,
CvtPrecisionType
(
x_node
->
precision
()));
TENSOR_UPDATE_OUTPUT
(
act_op
,
y
,
ge
::
FORMAT_NCHW
,
CvtPrecisionType
(
act_node
->
precision
()));
return
SUCCESS
;
}
...
...
@@ -84,6 +88,10 @@ int ActConverter<ge::op::LeakyRelu>(void* ctx, OpLite* op, KernelBase* kernel) {
// only for leaky_relu
auto
alpha
=
op_info
->
GetAttr
<
float
>
(
"alpha"
);
act_op
->
set_attr_negative_slope
(
alpha
);
TENSOR_UPDATE_INPUT
(
act_op
,
x
,
ge
::
FORMAT_NCHW
,
CvtPrecisionType
(
x_node
->
precision
()));
TENSOR_UPDATE_OUTPUT
(
act_op
,
y
,
ge
::
FORMAT_NCHW
,
CvtPrecisionType
(
act_node
->
precision
()));
return
SUCCESS
;
}
...
...
lite/kernels/huawei_ascend_npu/bridges/concat_op.cc
0 → 100644
浏览文件 @
71d9dbd1
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/huawei_ascend_npu/bridges/graph.h"
#include "lite/kernels/huawei_ascend_npu/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace
paddle
{
namespace
lite
{
namespace
subgraph
{
namespace
huawei_ascend_npu
{
int
ConcatConverter
(
void
*
ctx
,
OpLite
*
op
,
KernelBase
*
kernel
)
{
CHECK
(
ctx
!=
nullptr
);
CHECK
(
op
!=
nullptr
);
auto
graph
=
static_cast
<
Graph
*>
(
ctx
);
auto
op_info
=
op
->
op_info
();
auto
op_type
=
op_info
->
Type
();
auto
scope
=
op
->
scope
();
VLOG
(
3
)
<<
"[NPU] Converting "
<<
op_type
<<
" ... "
;
// Get input and output vars and op attributes
auto
x_names
=
op_info
->
Input
(
"X"
);
auto
axis
=
op_info
->
GetAttr
<
int
>
(
"axis"
);
auto
out_name
=
op_info
->
Output
(
"Out"
).
front
();
auto
num
=
x_names
.
size
();
if
(
op_info
->
HasInput
(
"AxisTensor"
))
{
// axis node
auto
axis_name
=
op_info
->
Input
(
"AxisTensor"
).
front
();
auto
axis_tensor
=
scope
->
FindMutableTensor
(
axis_name
);
std
::
shared_ptr
<
Node
>
axis_node
=
nullptr
;
if
(
graph
->
Has
(
axis_name
))
{
axis_node
=
graph
->
Get
(
axis_name
);
}
else
{
axis_node
=
graph
->
Add
(
axis_name
,
*
axis_tensor
);
}
// concat node
auto
concat_node
=
graph
->
Add
<
ge
::
op
::
Concat
>
(
out_name
);
auto
concat_op
=
concat_node
->
data
<
ge
::
op
::
Concat
>
();
// set axis input
concat_op
->
set_input_concat_dim
(
*
axis_node
->
data
());
TENSOR_UPDATE_INPUT
(
concat_op
,
concat_dim
,
ge
::
FORMAT_NCHW
,
CvtPrecisionType
(
axis_node
->
precision
()));
// set dynamic input
concat_op
->
set_attr_N
(
num
);
concat_op
->
create_dynamic_input_x
(
num
);
int
idx
=
0
;
for
(
auto
&
x_name
:
x_names
)
{
auto
x
=
scope
->
FindMutableTensor
(
x_name
);
auto
x_dims
=
x
->
dims
();
std
::
shared_ptr
<
Node
>
x_node
=
nullptr
;
if
(
graph
->
Has
(
x_name
))
{
x_node
=
graph
->
Get
(
x_name
);
}
else
{
x_node
=
graph
->
Add
(
x_name
,
*
x
);
}
concat_op
->
set_dynamic_input_x
(
idx
,
*
x_node
->
data
());
TENSOR_UPDATE_DYNAMIC_INPUT
(
concat_op
,
x
,
idx
,
ge
::
FORMAT_NCHW
,
CvtPrecisionType
(
x_node
->
precision
()));
idx
++
;
}
TENSOR_UPDATE_OUTPUT
(
concat_op
,
y
,
ge
::
FORMAT_NCHW
,
CvtPrecisionType
(
concat_node
->
precision
()));
}
else
{
auto
concat_node
=
graph
->
Add
<
ge
::
op
::
ConcatD
>
(
out_name
);
auto
concat_op
=
concat_node
->
data
<
ge
::
op
::
ConcatD
>
();
concat_op
->
set_attr_concat_dim
(
axis
);
concat_op
->
set_attr_N
(
num
);
concat_op
->
create_dynamic_input_x
(
num
);
int
idx
=
0
;
for
(
auto
&
x_name
:
x_names
)
{
auto
x
=
scope
->
FindMutableTensor
(
x_name
);
auto
x_dims
=
x
->
dims
();
std
::
shared_ptr
<
Node
>
x_node
=
nullptr
;
if
(
graph
->
Has
(
x_name
))
{
x_node
=
graph
->
Get
(
x_name
);
}
else
{
x_node
=
graph
->
Add
(
x_name
,
*
x
);
}
concat_op
->
set_dynamic_input_x
(
idx
,
*
x_node
->
data
());
TENSOR_UPDATE_DYNAMIC_INPUT
(
concat_op
,
x
,
idx
,
ge
::
FORMAT_NCHW
,
CvtPrecisionType
(
x_node
->
precision
()));
idx
++
;
}
TENSOR_UPDATE_OUTPUT
(
concat_op
,
y
,
ge
::
FORMAT_NCHW
,
CvtPrecisionType
(
concat_node
->
precision
()));
}
return
SUCCESS
;
}
}
// namespace huawei_ascend_npu
}
// namespace subgraph
}
// namespace lite
}
// namespace paddle
REGISTER_SUBGRAPH_BRIDGE
(
concat
,
kHuaweiAscendNPU
,
paddle
::
lite
::
subgraph
::
huawei_ascend_npu
::
ConcatConverter
);
lite/kernels/huawei_ascend_npu/bridges/conv_op.cc
浏览文件 @
71d9dbd1
...
...
@@ -35,7 +35,6 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto
input_name
=
op_info
->
Input
(
"Input"
).
front
();
auto
input
=
scope
->
FindMutableTensor
(
input_name
);
auto
input_dims
=
input
->
dims
();
ge
::
DataType
ge_data_type
=
CvtPrecisionType
(
input
->
precision
());
auto
filter_name
=
op_info
->
Input
(
"Filter"
).
front
();
auto
filter
=
scope
->
FindMutableTensor
(
filter_name
);
...
...
@@ -99,6 +98,22 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
input_dims
,
filter_dims
);
// Check Restrictions: HxW(input) == HxW(filter) if output feature h*w = 1*1
if
(
output_dims
[
2
]
==
1
&&
output_dims
[
3
]
==
1
)
{
int
input_h
=
input_dims
[
2
]
+
paddings
[
0
]
+
paddings
[
1
];
int
input_w
=
input_dims
[
3
]
+
paddings
[
2
]
+
paddings
[
3
];
int
filter_h
=
(
filter_dims
[
2
]
-
1
)
*
dilations
[
0
]
+
1
;
int
filter_w
=
(
filter_dims
[
3
]
-
1
)
*
dilations
[
1
]
+
1
;
CHECK_EQ
(
input_h
,
filter_h
)
<<
"[HUAWEI_ASCEND_NPU] Huawei Ascend NPU DDK "
"restriction: if output HxW = 1x1, then "
"input height after padding should equal to "
"filter height after dilation"
;
CHECK_EQ
(
input_w
,
filter_w
)
<<
"[HUAWEI_ASCEND_NPU] Huawei Ascend NPU DDK "
"restriction: if output HxW = 1x1, then "
"input width after padding should equal to "
"filter width after dilation"
;
}
// Check depthwise mode, and decide whether use DepthwiseConv2D Op
bool
use_depthwise_conv
=
false
;
bool
is_depthwise_mode
=
(
ic
==
groups
&&
oc
==
groups
&&
groups
!=
1
);
...
...
@@ -148,20 +163,6 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
}
}
// Ascend must update convop desc, or IR model build will fail
ge
::
TensorDesc
conv2d_input_desc_x
(
ge
::
Shape
(
CvtShape
(
input_dims
)),
ge
::
FORMAT_NCHW
,
ge_data_type
);
ge
::
TensorDesc
conv2d_input_desc_filter
(
ge
::
Shape
(
CvtShape
(
filter_dims
)),
ge
::
FORMAT_NCHW
,
ge_data_type
);
ge
::
TensorDesc
conv2d_input_desc_bias
(
ge
::
Shape
(
bias_shape
),
ge
::
FORMAT_ND
,
ge_data_type
);
ge
::
TensorDesc
conv2d_output_desc_y
(
ge
::
Shape
(
CvtShape
(
output_dims
)),
ge
::
FORMAT_NCHW
,
ge_data_type
);
// Setting desc name
conv2d_input_desc_x
.
SetName
(
"conv2d_input_desc_x"
);
conv2d_input_desc_filter
.
SetName
(
"conv2d_input_desc_filter"
);
conv2d_input_desc_bias
.
SetName
(
"conv2d_input_desc_bias"
);
conv2d_output_desc_y
.
SetName
(
"conv2d_output_desc_y"
);
// Conv node
std
::
shared_ptr
<
Node
>
conv_node
=
nullptr
;
if
(
use_depthwise_conv
&&
is_depthwise_mode
)
{
...
...
@@ -177,12 +178,19 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
conv_op
->
set_attr_data_format
(
"NCHW"
);
if
(
bias_node
!=
nullptr
&&
is_channel_bias
)
{
conv_op
->
set_input_bias
(
*
bias_node
->
data
());
conv_op
->
update_input_desc_bias
(
conv2d_input_desc_bias
);
TENSOR_UPDATE_INPUT
(
conv_op
,
bias
,
ge
::
FORMAT_NCHW
,
CvtPrecisionType
(
bias_node
->
precision
()));
}
// update tensor desc to conv2d
conv_op
->
update_input_desc_x
(
conv2d_input_desc_x
);
conv_op
->
update_input_desc_filter
(
conv2d_input_desc_filter
);
conv_op
->
update_output_desc_y
(
conv2d_output_desc_y
);
TENSOR_UPDATE_INPUT
(
conv_op
,
x
,
ge
::
FORMAT_NCHW
,
CvtPrecisionType
(
input_node
->
precision
()));
TENSOR_UPDATE_INPUT
(
conv_op
,
filter
,
ge
::
FORMAT_NCHW
,
CvtPrecisionType
(
filter_node
->
precision
()));
TENSOR_UPDATE_OUTPUT
(
conv_op
,
y
,
ge
::
FORMAT_NCHW
,
CvtPrecisionType
(
conv_node
->
precision
()));
}
else
{
conv_node
=
graph
->
Add
<
ge
::
op
::
Conv2D
>
(
output_name
);
auto
conv_op
=
conv_node
->
data
<
ge
::
op
::
Conv2D
>
();
...
...
@@ -198,12 +206,19 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
conv_op
->
set_attr_data_format
(
"NCHW"
);
if
(
bias_node
!=
nullptr
&&
is_channel_bias
)
{
conv_op
->
set_input_bias
(
*
bias_node
->
data
());
conv_op
->
update_input_desc_bias
(
conv2d_input_desc_bias
);
TENSOR_UPDATE_INPUT
(
conv_op
,
bias
,
ge
::
FORMAT_NCHW
,
CvtPrecisionType
(
bias_node
->
precision
()));
}
// update tensor desc to conv2d
conv_op
->
update_input_desc_x
(
conv2d_input_desc_x
);
conv_op
->
update_input_desc_filter
(
conv2d_input_desc_filter
);
conv_op
->
update_output_desc_y
(
conv2d_output_desc_y
);
TENSOR_UPDATE_INPUT
(
conv_op
,
x
,
ge
::
FORMAT_NCHW
,
CvtPrecisionType
(
input_node
->
precision
()));
TENSOR_UPDATE_INPUT
(
conv_op
,
filter
,
ge
::
FORMAT_NCHW
,
CvtPrecisionType
(
filter_node
->
precision
()));
TENSOR_UPDATE_OUTPUT
(
conv_op
,
y
,
ge
::
FORMAT_NCHW
,
CvtPrecisionType
(
conv_node
->
precision
()));
}
// append Add node to support bias
if
(
bias_node
!=
nullptr
&&
!
is_channel_bias
)
{
...
...
lite/kernels/huawei_ascend_npu/bridges/interpolate_op.cc
浏览文件 @
71d9dbd1
...
...
@@ -53,9 +53,6 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) {
return
FAILED
;
}
// get ge date type
ge
::
DataType
ge_data_type
=
CvtPrecisionType
(
x
->
precision
());
// X node
std
::
shared_ptr
<
Node
>
x_node
=
nullptr
;
if
(
graph
->
Has
(
x_name
))
{
...
...
@@ -100,10 +97,18 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) {
bilinear_interp_op
->
set_input_x
(
*
x_node
->
data
());
bilinear_interp_op
->
set_input_size
(
*
out_size_node
->
data
());
bilinear_interp_op
->
set_attr_align_corners
(
align_corners
);
TENSOR_UPDATE_INPUT
(
bilinear_interp_op
,
x
,
ge
::
FORMAT_NCHW
,
ge_data_type
);
TENSOR_UPDATE_INPUT
(
bilinear_interp_op
,
size
,
ge
::
FORMAT_NCHW
,
ge_data_type
);
TENSOR_UPDATE_OUTPUT
(
bilinear_interp_op
,
y
,
ge
::
FORMAT_NCHW
,
ge_data_type
);
TENSOR_UPDATE_INPUT
(
bilinear_interp_op
,
x
,
ge
::
FORMAT_NCHW
,
CvtPrecisionType
(
x_node
->
precision
()));
TENSOR_UPDATE_INPUT
(
bilinear_interp_op
,
size
,
ge
::
FORMAT_NCHW
,
CvtPrecisionType
(
out_size_node
->
precision
()));
TENSOR_UPDATE_OUTPUT
(
bilinear_interp_op
,
y
,
ge
::
FORMAT_NCHW
,
CvtPrecisionType
(
bilinear_interp_node
->
precision
()));
}
else
if
(
interp_method
==
"nearest"
)
{
auto
nearest_interp_node
=
graph
->
Add
<
ge
::
op
::
ResizeNearestNeighborV2
>
(
out_name
);
...
...
@@ -112,9 +117,18 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) {
nearest_interp_op
->
set_input_x
(
*
x_node
->
data
());
nearest_interp_op
->
set_input_size
(
*
out_size_node
->
data
());
nearest_interp_op
->
set_attr_align_corners
(
align_corners
);
TENSOR_UPDATE_INPUT
(
nearest_interp_op
,
x
,
ge
::
FORMAT_NCHW
,
ge_data_type
);
TENSOR_UPDATE_INPUT
(
nearest_interp_op
,
size
,
ge
::
FORMAT_NCHW
,
ge_data_type
);
TENSOR_UPDATE_OUTPUT
(
nearest_interp_op
,
y
,
ge
::
FORMAT_NCHW
,
ge_data_type
);
TENSOR_UPDATE_INPUT
(
nearest_interp_op
,
x
,
ge
::
FORMAT_NCHW
,
CvtPrecisionType
(
x_node
->
precision
()));
TENSOR_UPDATE_INPUT
(
nearest_interp_op
,
size
,
ge
::
FORMAT_NCHW
,
CvtPrecisionType
(
out_size_node
->
precision
()));
TENSOR_UPDATE_OUTPUT
(
nearest_interp_op
,
y
,
ge
::
FORMAT_NCHW
,
CvtPrecisionType
(
nearest_interp_node
->
precision
()));
}
else
{
LOG
(
WARNING
)
<<
"[HUAWEI_ASCEND_NPU] Unsupported interpolate method: "
<<
interp_method
;
...
...
lite/kernels/huawei_ascend_npu/bridges/paddle_use_bridges.h
浏览文件 @
71d9dbd1
...
...
@@ -27,3 +27,4 @@ USE_SUBGRAPH_BRIDGE(conv2d, kHuaweiAscendNPU);
USE_SUBGRAPH_BRIDGE
(
depthwise_conv2d
,
kHuaweiAscendNPU
);
USE_SUBGRAPH_BRIDGE
(
bilinear_interp
,
kHuaweiAscendNPU
);
USE_SUBGRAPH_BRIDGE
(
nearest_interp
,
kHuaweiAscendNPU
);
USE_SUBGRAPH_BRIDGE
(
concat
,
kHuaweiAscendNPU
);
lite/kernels/huawei_ascend_npu/bridges/utility.cc
浏览文件 @
71d9dbd1
...
...
@@ -156,61 +156,6 @@ int CvtActMode(std::string act_type) {
return
act_mode
;
}
const
std
::
string
&
CvtFormat
(
ge
::
Format
format
)
{
static
const
int
MAX_FORMAT_LENGTH
=
25
;
static
const
std
::
string
format2string
[]
=
{
"FORMAT_NCHW = 0"
,
"FORMAT_NHWC = 1"
,
"FORMAT_ND = 2"
,
"FORMAT_NC1HWC0 = 3"
,
"FORMAT_FRACTAL_Z = 4"
,
"FORMAT_NC1C0HWPAD = 5"
,
"FORMAT_NHWC1C0 = 6"
,
"FORMAT_FSR_NCHW = 7"
,
"FORMAT_FRACTAL_DECONV = 8"
,
"FORMAT_C1HWNC0 = 9"
,
"FORMAT_FRACTAL_DECONV_TRANSPOSE = 10"
,
"FORMAT_FRACTAL_DECONV_SP_STRIDE_TRANS = 11"
,
"FORMAT_NC1HWC0_C04 = 12"
,
"FORMAT_FRACTAL_Z_C04 = 13"
,
"FORMAT_CHWN = 14"
,
"FORMAT_FRACTAL_DECONV_SP_STRIDE8_TRANS = 15"
,
"FORMAT_HWCN = 16"
,
"FORMAT_NC1KHKWHWC0 = 17"
,
"FORMAT_BN_WEIGHT = 18"
,
"FORMAT_FILTER_HWCK = 19"
,
"FORMAT_HASHTABLE_LOOKUP_LOOKUPS = 20"
,
"FORMAT_HASHTABLE_LOOKUP_KEYS = 21"
,
"FORMAT_HASHTABLE_LOOKUP_VALUE = 22"
,
"FORMAT_HASHTABLE_LOOKUP_OUTPUT = 23"
,
"FORMAT_HASHTABLE_LOOKUP_HITS = 24"
};
auto
x
=
static_cast
<
int
>
(
format
);
CHECK_LT
(
x
,
MAX_FORMAT_LENGTH
);
return
format2string
[
x
];
}
const
std
::
string
&
CvtDataType
(
ge
::
DataType
data_type
)
{
static
const
int
MAX_DATATYPE_LENGTH
=
14
;
static
const
std
::
string
datatype2string
[]
=
{
"DT_FLOAT=0"
,
"DT_FLOAT16=1"
,
"DT_INT8=2"
,
"DT_INT32=3"
,
"DT_UINT8=4"
,
"Unknown=5"
,
"DT_INT16=6"
,
"DT_UINT16=7"
,
"DT_UINT32=8"
,
"DT_INT64=9"
,
"DT_UINT64=10"
,
"DT_DOUBLE=11"
,
"DT_BOOL=12"
,
"DT_STRING=13"
};
auto
x
=
static_cast
<
int
>
(
data_type
);
CHECK_LT
(
x
,
MAX_DATATYPE_LENGTH
);
return
datatype2string
[
x
];
}
}
// namespace huawei_ascend_npu
}
// namespace subgraph
}
// namespace lite
...
...
lite/kernels/huawei_ascend_npu/bridges/utility.h
浏览文件 @
71d9dbd1
...
...
@@ -36,6 +36,10 @@ namespace huawei_ascend_npu {
#define TENSOR_UPDATE_OUTPUT(op, attr, format, dtype) \
ge::TensorDesc _##op##_output_desc_##attr(ge::Shape(), format, dtype); \
op->update_output_desc_##attr(_##op##_output_desc_##attr);
#define TENSOR_UPDATE_DYNAMIC_INPUT(op, attr, idx, format, dtype) \
ge::TensorDesc _##op##_input_desc_##attr##_##idx( \
ge::Shape(), format, dtype); \
op->update_dynamic_input_desc_##attr(idx, _##op##_input_desc_##attr##_##idx);
// Type/tensor converters for converting Paddle type/tensor to HiAI type/tensor
bool
HasInputArg
(
const
OpInfo
*
op_info
,
...
...
@@ -57,9 +61,6 @@ ge::Tensor CvtTensor(const Tensor& in_tensor,
int
CvtActMode
(
std
::
string
act_type
);
const
std
::
string
&
CvtFormat
(
ge
::
Format
format
);
const
std
::
string
&
CvtDataType
(
ge
::
DataType
data_type
);
}
// namespace huawei_ascend_npu
}
// namespace subgraph
}
// namespace lite
...
...
lite/kernels/huawei_ascend_npu/subgraph_compute.cc
浏览文件 @
71d9dbd1
...
...
@@ -241,32 +241,18 @@ bool DeviceProgram::ShareBufferWithOriginTensors(
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] Inputs["
<<
i
<<
"] name: "
<<
input_names
[
i
]
<<
" origin dims:"
<<
(
*
origin_itensors
)[
i
]
->
dims
().
repr
()
<<
" device dims: {"
<<
device_idims_
[
i
].
GetNumber
()
<<
","
<<
device_idims_
[
i
].
GetChannel
()
<<
","
<<
device_idims_
[
i
].
GetHeight
()
<<
","
<<
device_idims_
[
i
].
GetWidth
()
<<
"}"
;
<<
" device dims:"
<<
device_idims_
[
i
].
repr
();
CHECK_EQ
((
*
origin_itensors
)[
i
]
->
dims
().
production
(),
device_idims_
[
i
].
GetNumber
()
*
device_idims_
[
i
].
GetChannel
()
*
device_idims_
[
i
].
GetHeight
()
*
device_idims_
[
i
].
GetWidth
());
device_idims_
[
i
].
production
());
// reset tensor desc
if
((
*
device_itensors
)[
i
]
->
SetTensorDesc
(
device_idims_
[
i
].
GetGeTensorDesc
())
!=
ge
::
GRAPH_SUCCESS
)
{
LOG
(
WARNING
)
<<
"[HUAWEI_ASCEND_NPU] ge::Tensor input tensor "
"SetTensorDesc failed!"
;
}
else
{
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] ge::Tensor input tensor SetTensorDesc "
"success."
;
}
ATC_CALL
((
*
device_itensors
)[
i
]
->
SetTensorDesc
(
device_idims_
[
i
].
GetGeTensorDesc
()));
// copy data from origin to device
if
((
*
device_itensors
)[
i
]
->
SetData
(
reinterpret_cast
<
uint8_t
*>
((
*
origin_itensors
)[
i
]
->
raw_data
()),
(
*
origin_itensors
)[
i
]
->
memory_size
())
!=
ge
::
GRAPH_SUCCESS
)
{
LOG
(
WARNING
)
<<
"[HUAWEI_ASCEND_NPU] ge::Tensor input tensor SetData failed!"
;
}
else
{
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] ge::Tensor input tensor SetData success."
;
}
ATC_CALL
((
*
device_itensors
)[
i
]
->
SetData
(
reinterpret_cast
<
uint8_t
*>
((
*
origin_itensors
)[
i
]
->
raw_data
()),
(
*
origin_itensors
)[
i
]
->
memory_size
()));
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] Init the input tensors for the device program "
"and share their buffers with the origin input tensors"
;
...
...
@@ -285,26 +271,13 @@ bool DeviceProgram::ShareBufferWithOriginTensors(
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] Outputs["
<<
i
<<
"] name: "
<<
output_names
[
i
]
<<
" origin dims:"
<<
(
*
origin_otensors
)[
i
]
->
dims
().
repr
()
<<
" device dims: {"
<<
device_odims_
[
i
].
GetNumber
()
<<
","
<<
device_odims_
[
i
].
GetChannel
()
<<
","
<<
device_odims_
[
i
].
GetHeight
()
<<
","
<<
device_odims_
[
i
].
GetWidth
()
<<
"}"
;
<<
" device dims:"
<<
device_odims_
[
i
].
repr
();
CHECK_EQ
((
*
origin_otensors
)[
i
]
->
dims
().
production
(),
device_odims_
[
i
].
GetNumber
()
*
device_odims_
[
i
].
GetChannel
()
*
device_odims_
[
i
].
GetHeight
()
*
device_odims_
[
i
].
GetWidth
());
device_odims_
[
i
].
production
());
// reset tensor desc
if
((
*
device_otensors
)[
i
]
->
SetTensorDesc
(
device_odims_
[
i
].
GetGeTensorDesc
())
!=
ge
::
GRAPH_SUCCESS
)
{
LOG
(
WARNING
)
<<
"[HUAWEI_ASCEND_NPU] ge::Tensor output tensor "
"SetTensorDesc failed!"
;
}
else
{
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] ge::Tensor output tensor SetTensorDesc "
"success."
;
}
VLOG
(
3
)
<<
"[HUAWEI_ASCEND_NPU] Init the output tensors for the device program "
"and share their buffers with the origin output tensors"
;
ATC_CALL
((
*
device_otensors
)[
i
]
->
SetTensorDesc
(
device_odims_
[
i
].
GetGeTensorDesc
()));
}
return
true
;
}
...
...
@@ -321,8 +294,7 @@ bool DeviceProgram::SharedBufferWithOutputTensors(
for
(
size_t
i
=
0
;
i
<
output_names
.
size
();
i
++
)
{
CHECK_EQ
((
*
origin_otensors
)[
i
]
->
dims
().
production
(),
device_odims_
[
i
].
GetNumber
()
*
device_odims_
[
i
].
GetChannel
()
*
device_odims_
[
i
].
GetHeight
()
*
device_odims_
[
i
].
GetWidth
());
device_odims_
[
i
].
production
());
// Share data buf between device_itensor and origin_itensor
std
::
shared_ptr
<
Buffer
>
buffer
=
std
::
make_shared
<
Buffer
>
(
...
...
lite/tests/kernels/concat_compute_test.cc
浏览文件 @
71d9dbd1
...
...
@@ -147,6 +147,8 @@ TEST(Concat, precision) {
#if defined(LITE_WITH_NPU)
place
=
TARGET
(
kNPU
);
abs_error
=
1e-2
;
// use fp16 in npu
#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
place
=
TARGET
(
kHuaweiAscendNPU
);
#elif defined(LITE_WITH_ARM)
place
=
TARGET
(
kARM
);
#elif defined(LITE_WITH_X86)
...
...
@@ -157,6 +159,10 @@ TEST(Concat, precision) {
for
(
int
axis
:
{
1
,
2
})
{
for
(
bool
is_use_axis_tensor
:
{
false
,
true
})
{
// is_use_axis_tensor = true has bugs in Huawei Ascend NPU DDK
if
(
place
==
TARGET
(
kHuaweiAscendNPU
)
&&
is_use_axis_tensor
)
{
continue
;
}
LOG
(
INFO
)
<<
"axis:"
<<
axis
<<
", is_use_axis_tensor:"
<<
is_use_axis_tensor
;
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
...
...
lite/tests/kernels/conv_compute_test.cc
浏览文件 @
71d9dbd1
...
...
@@ -296,6 +296,11 @@ void TestConvStrides(Place place, float abs_error = 2e-5) {
for
(
auto
out_channels
:
{
1
,
3
})
{
for
(
auto
strides
:
std
::
vector
<
std
::
vector
<
int
>>
{{
2
,
2
},
{
3
,
3
},
{
1
,
2
},
{
3
,
1
}})
{
// Check Huawei Ascend NPU restriction if output HxW = 1x1
// input_w after padding = 4 should equal to fitler_w after dilation = 3
if
(
place
==
TARGET
(
kHuaweiAscendNPU
)
&&
dims
[
3
]
==
4
)
{
continue
;
}
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ConvComputeTester
(
place
,
"def"
,
DDim
(
dims
),
out_channels
,
3
,
strides
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
abs_error
);
...
...
@@ -415,13 +420,16 @@ TEST(Conv2d, precision) {
abs_error
=
5e-2
;
// Using fp16 in NPU
#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
place
=
TARGET
(
kHuaweiAscendNPU
);
abs_error
=
5
e-2
;
// Using fp16 in NPU
abs_error
=
1
e-2
;
// Using fp16 in NPU
#else
return
;
#endif
TestConvKsize
(
place
,
abs_error
);
// Huawei Ascend NPU DDK not support groups > 1
#if !defined(LITE_WITH_HUAWEI_ASCEND_NPU)
TestConvGroups
(
place
,
abs_error
);
#endif
TestConvDilations
(
place
,
abs_error
);
TestConvStrides
(
place
,
abs_error
);
TestConvPaddings
(
place
,
abs_error
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录