Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OAID
Tengine
提交
eaa1cb37
T
Tengine
项目概览
OAID
/
Tengine
10 个月 前同步成功
通知
53
Star
4429
Fork
1032
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
Tengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
eaa1cb37
编写于
6月 01, 2021
作者:
B
BUG1989
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add yolov4-tiny timvx example
上级
e6152e2a
变更
10
展开全部
隐藏空白更改
内联
并排
Showing
10 changed file
with
930 addition
and
766 deletion
+930
-766
examples/CMakeLists.txt
examples/CMakeLists.txt
+2
-1
examples/tm_yolov4_tiny.cpp
examples/tm_yolov4_tiny.cpp
+2
-2
examples/tm_yolov4_tiny_timvx.cpp
examples/tm_yolov4_tiny_timvx.cpp
+287
-725
examples/tm_yolov4_tiny_uint8.cpp
examples/tm_yolov4_tiny_uint8.cpp
+540
-0
source/device/cpu/op/slice/slice_ref.c
source/device/cpu/op/slice/slice_ref.c
+45
-33
source/device/tim-vx/op/timvx_mish.cc
source/device/tim-vx/op/timvx_mish.cc
+46
-0
source/device/tim-vx/op/timvx_slice.cc
source/device/tim-vx/op/timvx_slice.cc
+3
-4
source/device/tim-vx/timvx_executor.cc
source/device/tim-vx/timvx_executor.cc
+3
-0
source/device/tim-vx/timvx_executor.hpp
source/device/tim-vx/timvx_executor.hpp
+1
-0
source/device/tim-vx/timvx_limit.hpp
source/device/tim-vx/timvx_limit.hpp
+1
-1
未找到文件。
examples/CMakeLists.txt
浏览文件 @
eaa1cb37
...
...
@@ -86,7 +86,7 @@ ENDIF()
# set(OpenCV_DIR /mnt/d/ubuntu/opencv_install/linux-armv7/lib/cmake/opencv4)
FIND_PACKAGE
(
OpenCV QUIET
)
IF
(
OpenCV_FOUND
AND
${
TENGINE_TARGET_PROCESSOR
}
MATCHES
"X86"
)
IF
(
OpenCV_FOUND
)
# macro for adding examples
FUNCTION
(
TENGINE_EXAMPLE_CV name file
)
ADD_EXECUTABLE
(
${
name
}
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
file
}
"
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/common/tengine_operations.c"
)
...
...
@@ -104,6 +104,7 @@ IF (OpenCV_FOUND AND ${TENGINE_TARGET_PROCESSOR} MATCHES "X86")
TENGINE_EXAMPLE_CV
(
tm_crnn tm_crnn.cpp
)
TENGINE_EXAMPLE_CV
(
tm_alphapose tm_alphapose.cpp
)
TENGINE_EXAMPLE_CV
(
tm_yolov4_tiny tm_yolov4_tiny.cpp
)
TENGINE_EXAMPLE_CV
(
tm_yolov4_tiny_uint8 tm_yolov4_tiny_uint8.cpp
)
TENGINE_EXAMPLE_CV
(
tm_yolov4_tiny_timvx tm_yolov4_tiny_timvx.cpp
)
TENGINE_EXAMPLE_CV
(
tm_yolov5s tm_yolov5s.cpp
)
TENGINE_EXAMPLE_CV
(
tm_yolov5s_timvx tm_yolov5s_timvx.cpp
)
...
...
examples/tm_yolov4_tiny.cpp
浏览文件 @
eaa1cb37
...
...
@@ -266,7 +266,7 @@ static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
sprintf
(
text
,
"%s %.1f%%"
,
class_names
[
obj
.
label
],
obj
.
prob
*
100
);
int
baseLine
=
0
;
cv
::
Size
label_size
=
cv
::
getTextSize
(
text
,
cv
::
FONT_HERSHEY_SIMPLEX
,
0.5
,
1
,
&
baseLine
);
cv
::
Size
label_size
=
cv
::
getTextSize
(
text
,
cv
::
FONT_HERSHEY_SIMPLEX
,
1
,
2
,
&
baseLine
);
int
x
=
obj
.
rect
.
x
;
int
y
=
obj
.
rect
.
y
-
label_size
.
height
-
baseLine
;
...
...
@@ -278,7 +278,7 @@ static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
cv
::
rectangle
(
image
,
cv
::
Rect
(
cv
::
Point
(
x
,
y
),
cv
::
Size
(
label_size
.
width
,
label_size
.
height
+
baseLine
)),
cv
::
Scalar
(
255
,
255
,
255
),
-
1
);
cv
::
putText
(
image
,
text
,
cv
::
Point
(
x
,
y
+
label_size
.
height
),
cv
::
FONT_HERSHEY_SIMPLEX
,
0.5
,
cv
::
putText
(
image
,
text
,
cv
::
Point
(
x
,
y
+
label_size
.
height
),
cv
::
FONT_HERSHEY_SIMPLEX
,
1
,
cv
::
Scalar
(
0
,
0
,
0
));
}
...
...
examples/tm_yolov4_tiny_timvx.cpp
浏览文件 @
eaa1cb37
此差异已折叠。
点击以展开。
examples/tm_yolov4_tiny_uint8.cpp
0 → 100644
浏览文件 @
eaa1cb37
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Copyright (c) 2021, OPEN AI LAB
* Author: 942002795@qq.com
* Update: xwwang@openailab.com
*/
#include <math.h>
#include <vector>
#include <string>
#include <algorithm>
#include <cmath>
#include <stdlib.h>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "common.h"
#include "tengine/c_api.h"
#include "tengine_operations.h"
struct
Object
{
cv
::
Rect_
<
float
>
rect
;
int
label
;
float
prob
;
};
static
inline
float
sigmoid
(
float
x
)
{
return
static_cast
<
float
>
(
1.
f
/
(
1.
f
+
exp
(
-
x
)));
}
static
inline
float
intersection_area
(
const
Object
&
a
,
const
Object
&
b
)
{
cv
::
Rect_
<
float
>
inter
=
a
.
rect
&
b
.
rect
;
return
inter
.
area
();
}
static
void
qsort_descent_inplace
(
std
::
vector
<
Object
>&
faceobjects
,
int
left
,
int
right
)
{
int
i
=
left
;
int
j
=
right
;
float
p
=
faceobjects
[(
left
+
right
)
/
2
].
prob
;
while
(
i
<=
j
)
{
while
(
faceobjects
[
i
].
prob
>
p
)
i
++
;
while
(
faceobjects
[
j
].
prob
<
p
)
j
--
;
if
(
i
<=
j
)
{
// swap
std
::
swap
(
faceobjects
[
i
],
faceobjects
[
j
]);
i
++
;
j
--
;
}
}
#pragma omp parallel sections
{
#pragma omp section
{
if
(
left
<
j
)
qsort_descent_inplace
(
faceobjects
,
left
,
j
);
}
#pragma omp section
{
if
(
i
<
right
)
qsort_descent_inplace
(
faceobjects
,
i
,
right
);
}
}
}
static
void
qsort_descent_inplace
(
std
::
vector
<
Object
>&
faceobjects
)
{
if
(
faceobjects
.
empty
())
return
;
qsort_descent_inplace
(
faceobjects
,
0
,
faceobjects
.
size
()
-
1
);
}
static
void
nms_sorted_bboxes
(
const
std
::
vector
<
Object
>&
faceobjects
,
std
::
vector
<
int
>&
picked
,
float
nms_threshold
)
{
picked
.
clear
();
const
int
n
=
faceobjects
.
size
();
std
::
vector
<
float
>
areas
(
n
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
areas
[
i
]
=
faceobjects
[
i
].
rect
.
area
();
}
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
const
Object
&
a
=
faceobjects
[
i
];
int
keep
=
1
;
for
(
int
j
=
0
;
j
<
(
int
)
picked
.
size
();
j
++
)
{
const
Object
&
b
=
faceobjects
[
picked
[
j
]];
// intersection over union
float
inter_area
=
intersection_area
(
a
,
b
);
float
union_area
=
areas
[
i
]
+
areas
[
picked
[
j
]]
-
inter_area
;
// float IoU = inter_area / union_area
if
(
inter_area
/
union_area
>
nms_threshold
)
keep
=
0
;
}
if
(
keep
)
picked
.
push_back
(
i
);
}
}
void
get_input_data_yolov4_uint8
(
const
char
*
image_file
,
uint8_t
*
input_data
,
int
img_h
,
int
img_w
,
const
float
*
mean
,
const
float
*
scale
,
float
input_scale
,
int
zero_point
)
{
cv
::
Mat
sample
=
cv
::
imread
(
image_file
,
1
);
cv
::
Mat
img
;
if
(
sample
.
channels
()
==
1
)
cv
::
cvtColor
(
sample
,
img
,
cv
::
COLOR_GRAY2RGB
);
else
cv
::
cvtColor
(
sample
,
img
,
cv
::
COLOR_BGR2RGB
);
/* resize process */
cv
::
resize
(
img
,
img
,
cv
::
Size
(
img_w
,
img_h
));
img
.
convertTo
(
img
,
CV_32FC3
);
float
*
img_data
=
(
float
*
)
img
.
data
;
/* nhwc to nchw */
for
(
int
h
=
0
;
h
<
img_h
;
h
++
)
{
for
(
int
w
=
0
;
w
<
img_w
;
w
++
)
{
for
(
int
c
=
0
;
c
<
3
;
c
++
)
{
int
in_index
=
h
*
img_w
*
3
+
w
*
3
+
c
;
int
out_index
=
c
*
img_h
*
img_w
+
h
*
img_w
+
w
;
float
input_fp32
=
(
img_data
[
in_index
]
-
mean
[
c
])
*
scale
[
c
];
/* quant to uint8 */
int
udata
=
(
round
)(
input_fp32
/
input_scale
+
(
float
)
zero_point
);
if
(
udata
>
255
)
udata
=
255
;
else
if
(
udata
<
0
)
udata
=
0
;
input_data
[
out_index
]
=
udata
;
}
}
}
}
static
void
generate_proposals
(
int
stride
,
const
float
*
feat
,
float
prob_threshold
,
std
::
vector
<
Object
>&
objects
)
{
static
float
anchors
[
12
]
=
{
10
,
14
,
23
,
27
,
37
,
58
,
81
,
82
,
135
,
169
,
344
,
319
};
int
anchor_num
=
3
;
int
feat_w
=
416
/
stride
;
int
feat_h
=
416
/
stride
;
int
cls_num
=
80
;
int
anchor_group
=
0
;
if
(
stride
==
16
)
anchor_group
=
1
;
if
(
stride
==
32
)
anchor_group
=
2
;
for
(
int
h
=
0
;
h
<=
feat_h
-
1
;
h
++
)
{
for
(
int
w
=
0
;
w
<=
feat_w
-
1
;
w
++
)
{
for
(
int
anchor
=
0
;
anchor
<=
anchor_num
-
1
;
anchor
++
)
{
int
class_index
=
0
;
float
class_score
=
-
FLT_MAX
;
int
channel_size
=
feat_h
*
feat_w
;
for
(
int
s
=
0
;
s
<=
cls_num
-
1
;
s
++
)
{
int
score_index
=
anchor
*
85
*
channel_size
+
feat_w
*
h
+
w
+
(
s
+
5
)
*
channel_size
;
float
score
=
feat
[
score_index
];
if
(
score
>
class_score
)
{
class_index
=
s
;
class_score
=
score
;
}
}
float
box_score
=
feat
[
anchor
*
85
*
channel_size
+
feat_w
*
h
+
w
+
4
*
channel_size
];
float
final_score
=
sigmoid
(
box_score
)
*
sigmoid
(
class_score
);
if
(
final_score
>=
prob_threshold
)
{
int
dx_index
=
anchor
*
85
*
channel_size
+
feat_w
*
h
+
w
+
0
*
channel_size
;
int
dy_index
=
anchor
*
85
*
channel_size
+
feat_w
*
h
+
w
+
1
*
channel_size
;
int
dw_index
=
anchor
*
85
*
channel_size
+
feat_w
*
h
+
w
+
2
*
channel_size
;
int
dh_index
=
anchor
*
85
*
channel_size
+
feat_w
*
h
+
w
+
3
*
channel_size
;
float
dx
=
sigmoid
(
feat
[
dx_index
]);
float
dy
=
sigmoid
(
feat
[
dy_index
]);
float
dw
=
feat
[
dw_index
];
float
dh
=
feat
[
dh_index
];
float
anchor_w
=
anchors
[(
anchor_group
-
1
)
*
6
+
anchor
*
2
+
0
];
float
anchor_h
=
anchors
[(
anchor_group
-
1
)
*
6
+
anchor
*
2
+
1
];
float
pred_x
=
(
w
+
dx
)
*
stride
;
float
pred_y
=
(
h
+
dy
)
*
stride
;
float
pred_w
=
exp
(
dw
)
*
anchor_w
;
float
pred_h
=
exp
(
dh
)
*
anchor_h
;
float
x0
=
(
pred_x
-
pred_w
*
0.5
f
);
float
y0
=
(
pred_y
-
pred_h
*
0.5
f
);
float
x1
=
(
pred_x
+
pred_w
*
0.5
f
);
float
y1
=
(
pred_y
+
pred_h
*
0.5
f
);
Object
obj
;
obj
.
rect
.
x
=
x0
;
obj
.
rect
.
y
=
y0
;
obj
.
rect
.
width
=
x1
-
x0
;
obj
.
rect
.
height
=
y1
-
y0
;
obj
.
label
=
class_index
;
obj
.
prob
=
final_score
;
objects
.
push_back
(
obj
);
}
}
}
}
}
static
void
draw_objects
(
const
cv
::
Mat
&
bgr
,
const
std
::
vector
<
Object
>&
objects
)
{
static
const
char
*
class_names
[]
=
{
"person"
,
"bicycle"
,
"car"
,
"motorcycle"
,
"airplane"
,
"bus"
,
"train"
,
"truck"
,
"boat"
,
"traffic light"
,
"fire hydrant"
,
"stop sign"
,
"parking meter"
,
"bench"
,
"bird"
,
"cat"
,
"dog"
,
"horse"
,
"sheep"
,
"cow"
,
"elephant"
,
"bear"
,
"zebra"
,
"giraffe"
,
"backpack"
,
"umbrella"
,
"handbag"
,
"tie"
,
"suitcase"
,
"frisbee"
,
"skis"
,
"snowboard"
,
"sports ball"
,
"kite"
,
"baseball bat"
,
"baseball glove"
,
"skateboard"
,
"surfboard"
,
"tennis racket"
,
"bottle"
,
"wine glass"
,
"cup"
,
"fork"
,
"knife"
,
"spoon"
,
"bowl"
,
"banana"
,
"apple"
,
"sandwich"
,
"orange"
,
"broccoli"
,
"carrot"
,
"hot dog"
,
"pizza"
,
"donut"
,
"cake"
,
"chair"
,
"couch"
,
"potted plant"
,
"bed"
,
"dining table"
,
"toilet"
,
"tv"
,
"laptop"
,
"mouse"
,
"remote"
,
"keyboard"
,
"cell phone"
,
"microwave"
,
"oven"
,
"toaster"
,
"sink"
,
"refrigerator"
,
"book"
,
"clock"
,
"vase"
,
"scissors"
,
"teddy bear"
,
"hair drier"
,
"toothbrush"
};
cv
::
Mat
image
=
bgr
.
clone
();
for
(
size_t
i
=
0
;
i
<
objects
.
size
();
i
++
)
{
const
Object
&
obj
=
objects
[
i
];
fprintf
(
stderr
,
"%2d: %3.0f%%, [%4.0f, %4.0f, %4.0f, %4.0f], %s
\n
"
,
obj
.
label
,
obj
.
prob
*
100
,
obj
.
rect
.
x
,
obj
.
rect
.
y
,
obj
.
rect
.
x
+
obj
.
rect
.
width
,
obj
.
rect
.
y
+
obj
.
rect
.
height
,
class_names
[
obj
.
label
]);
cv
::
rectangle
(
image
,
obj
.
rect
,
cv
::
Scalar
(
255
,
0
,
0
));
char
text
[
256
];
sprintf
(
text
,
"%s %.1f%%"
,
class_names
[
obj
.
label
],
obj
.
prob
*
100
);
int
baseLine
=
0
;
cv
::
Size
label_size
=
cv
::
getTextSize
(
text
,
cv
::
FONT_HERSHEY_SIMPLEX
,
1
,
2
,
&
baseLine
);
int
x
=
obj
.
rect
.
x
;
int
y
=
obj
.
rect
.
y
-
label_size
.
height
-
baseLine
;
if
(
y
<
0
)
y
=
0
;
if
(
x
+
label_size
.
width
>
image
.
cols
)
x
=
image
.
cols
-
label_size
.
width
;
cv
::
rectangle
(
image
,
cv
::
Rect
(
cv
::
Point
(
x
,
y
),
cv
::
Size
(
label_size
.
width
,
label_size
.
height
+
baseLine
)),
cv
::
Scalar
(
255
,
255
,
255
),
-
1
);
cv
::
putText
(
image
,
text
,
cv
::
Point
(
x
,
y
+
label_size
.
height
),
cv
::
FONT_HERSHEY_SIMPLEX
,
1
,
cv
::
Scalar
(
0
,
0
,
0
));
}
cv
::
imwrite
(
"yolov4_tiny_out.jpg"
,
image
);
}
void
show_usage
()
{
fprintf
(
stderr
,
"[Usage]: [-h]
\n
[-m model_file] [-i image_file] [-r repeat_count] [-t thread_count]
\n
"
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
const
char
*
model_file
=
nullptr
;
const
char
*
image_file
=
nullptr
;
int
img_h
=
416
;
int
img_w
=
416
;
int
img_c
=
3
;
const
float
mean
[
3
]
=
{
0
,
0
,
0
};
const
float
scale
[
3
]
=
{
0.003921
,
0.003921
,
0.003921
};
int
repeat_count
=
1
;
int
num_thread
=
1
;
int
res
;
while
((
res
=
getopt
(
argc
,
argv
,
"m:i:r:t:h:"
))
!=
-
1
)
{
switch
(
res
)
{
case
'm'
:
model_file
=
optarg
;
break
;
case
'i'
:
image_file
=
optarg
;
break
;
case
'r'
:
repeat_count
=
std
::
strtoul
(
optarg
,
nullptr
,
10
);
break
;
case
't'
:
num_thread
=
std
::
strtoul
(
optarg
,
nullptr
,
10
);
break
;
case
'h'
:
show_usage
();
return
0
;
default:
break
;
}
}
/* check files */
if
(
nullptr
==
model_file
)
{
fprintf
(
stderr
,
"Error: Tengine model file not specified!
\n
"
);
show_usage
();
return
-
1
;
}
if
(
nullptr
==
image_file
)
{
fprintf
(
stderr
,
"Error: Image file not specified!
\n
"
);
show_usage
();
return
-
1
;
}
if
(
!
check_file_exist
(
model_file
)
||
!
check_file_exist
(
image_file
))
return
-
1
;
cv
::
Mat
img
=
cv
::
imread
(
image_file
,
1
);
if
(
img
.
empty
())
{
fprintf
(
stderr
,
"cv::imread %s failed
\n
"
,
image_file
);
return
-
1
;
}
/* set runtime options */
struct
options
opt
;
opt
.
num_thread
=
num_thread
;
opt
.
cluster
=
TENGINE_CLUSTER_ALL
;
opt
.
precision
=
TENGINE_MODE_UINT8
;
opt
.
affinity
=
0
;
/* inital tengine */
if
(
init_tengine
()
!=
0
)
{
fprintf
(
stderr
,
"Initial tengine failed.
\n
"
);
return
-
1
;
}
fprintf
(
stderr
,
"tengine-lite library version: %s
\n
"
,
get_tengine_version
());
/* create graph, load tengine model xxx.tmfile */
graph_t
graph
=
create_graph
(
nullptr
,
"tengine"
,
model_file
);
if
(
graph
==
nullptr
)
{
fprintf
(
stderr
,
"Create graph failed.
\n
"
);
return
-
1
;
}
int
img_size
=
img_h
*
img_w
*
img_c
;
int
dims
[]
=
{
1
,
3
,
img_h
,
img_w
};
std
::
vector
<
uint8_t
>
input_data
(
img_size
);
tensor_t
input_tensor
=
get_graph_input_tensor
(
graph
,
0
,
0
);
if
(
input_tensor
==
nullptr
)
{
fprintf
(
stderr
,
"Get input tensor failed
\n
"
);
return
-
1
;
}
if
(
set_tensor_shape
(
input_tensor
,
dims
,
4
)
<
0
)
{
fprintf
(
stderr
,
"Set input tensor shape failed
\n
"
);
return
-
1
;
}
if
(
set_tensor_buffer
(
input_tensor
,
input_data
.
data
(),
img_size
)
<
0
)
{
fprintf
(
stderr
,
"Set input tensor buffer failed
\n
"
);
return
-
1
;
}
/* prerun graph, set work options(num_thread, cluster, precision) */
if
(
prerun_graph_multithread
(
graph
,
opt
)
<
0
)
{
fprintf
(
stderr
,
"Prerun multithread graph failed.
\n
"
);
return
-
1
;
}
/* prepare process input data, set the data mem to input tensor */
float
input_scale
=
0.
f
;
int
input_zero_point
=
0
;
get_tensor_quant_param
(
input_tensor
,
&
input_scale
,
&
input_zero_point
,
1
);
get_input_data_yolov4_uint8
(
image_file
,
input_data
.
data
(),
img_h
,
img_w
,
mean
,
scale
,
input_scale
,
input_zero_point
);
/* run graph */
double
min_time
=
DBL_MAX
;
double
max_time
=
DBL_MIN
;
double
total_time
=
0.
;
for
(
int
i
=
0
;
i
<
repeat_count
;
i
++
)
{
double
start
=
get_current_time
();
if
(
run_graph
(
graph
,
1
)
<
0
)
{
fprintf
(
stderr
,
"Run graph failed
\n
"
);
return
-
1
;
}
double
end
=
get_current_time
();
double
cur
=
end
-
start
;
total_time
+=
cur
;
min_time
=
std
::
min
(
min_time
,
cur
);
max_time
=
std
::
max
(
max_time
,
cur
);
}
fprintf
(
stderr
,
"Repeat %d times, thread %d, avg time %.2f ms, max_time %.2f ms, min_time %.2f ms
\n
"
,
repeat_count
,
num_thread
,
total_time
/
repeat_count
,
max_time
,
min_time
);
fprintf
(
stderr
,
"--------------------------------------
\n
"
);
/* dequant output data */
tensor_t
p16_output
=
get_graph_output_tensor
(
graph
,
1
,
0
);
tensor_t
p32_output
=
get_graph_output_tensor
(
graph
,
0
,
0
);
float
p16_scale
=
0.
f
;
float
p32_scale
=
0.
f
;
int
p16_zero_point
=
0
;
int
p32_zero_point
=
0
;
get_tensor_quant_param
(
p16_output
,
&
p16_scale
,
&
p16_zero_point
,
1
);
get_tensor_quant_param
(
p32_output
,
&
p32_scale
,
&
p32_zero_point
,
1
);
int
p16_count
=
get_tensor_buffer_size
(
p16_output
)
/
sizeof
(
uint8_t
);
int
p32_count
=
get_tensor_buffer_size
(
p32_output
)
/
sizeof
(
uint8_t
);
uint8_t
*
p16_data_u8
=
(
uint8_t
*
)
get_tensor_buffer
(
p16_output
);
uint8_t
*
p32_data_u8
=
(
uint8_t
*
)
get_tensor_buffer
(
p32_output
);
std
::
vector
<
float
>
p16_data
(
p16_count
);
std
::
vector
<
float
>
p32_data
(
p32_count
);
for
(
int
c
=
0
;
c
<
p16_count
;
c
++
)
{
p16_data
[
c
]
=
((
float
)
p16_data_u8
[
c
]
-
(
float
)
p16_zero_point
)
*
p16_scale
;
}
for
(
int
c
=
0
;
c
<
p32_count
;
c
++
)
{
p32_data
[
c
]
=
((
float
)
p32_data_u8
[
c
]
-
(
float
)
p32_zero_point
)
*
p32_scale
;
}
/* postprocess */
const
float
prob_threshold
=
0.45
f
;
const
float
nms_threshold
=
0.25
f
;
std
::
vector
<
Object
>
proposals
;
std
::
vector
<
Object
>
objects16
;
std
::
vector
<
Object
>
objects32
;
std
::
vector
<
Object
>
objects
;
generate_proposals
(
32
,
p32_data
.
data
(),
prob_threshold
,
objects32
);
proposals
.
insert
(
proposals
.
end
(),
objects32
.
begin
(),
objects32
.
end
());
generate_proposals
(
16
,
p16_data
.
data
(),
prob_threshold
,
objects16
);
proposals
.
insert
(
proposals
.
end
(),
objects16
.
begin
(),
objects16
.
end
());
qsort_descent_inplace
(
proposals
);
std
::
vector
<
int
>
picked
;
nms_sorted_bboxes
(
proposals
,
picked
,
nms_threshold
);
/* yolov4 tiny draw the result */
int
raw_h
=
img
.
rows
;
int
raw_w
=
img
.
cols
;
float
ratio_x
=
(
float
)
raw_w
/
img_w
;
float
ratio_y
=
(
float
)
raw_h
/
img_h
;
int
count
=
picked
.
size
();
fprintf
(
stderr
,
"detection num: %d
\n
"
,
count
);
objects
.
resize
(
count
);
for
(
int
i
=
0
;
i
<
count
;
i
++
)
{
objects
[
i
]
=
proposals
[
picked
[
i
]];
float
x0
=
(
objects
[
i
].
rect
.
x
);
float
y0
=
(
objects
[
i
].
rect
.
y
);
float
x1
=
(
objects
[
i
].
rect
.
x
+
objects
[
i
].
rect
.
width
);
float
y1
=
(
objects
[
i
].
rect
.
y
+
objects
[
i
].
rect
.
height
);
x0
=
x0
*
ratio_x
;
y0
=
y0
*
ratio_y
;
x1
=
x1
*
ratio_x
;
y1
=
y1
*
ratio_y
;
x0
=
std
::
max
(
std
::
min
(
x0
,
(
float
)(
raw_w
-
1
)),
0.
f
);
y0
=
std
::
max
(
std
::
min
(
y0
,
(
float
)(
raw_h
-
1
)),
0.
f
);
x1
=
std
::
max
(
std
::
min
(
x1
,
(
float
)(
raw_w
-
1
)),
0.
f
);
y1
=
std
::
max
(
std
::
min
(
y1
,
(
float
)(
raw_h
-
1
)),
0.
f
);
objects
[
i
].
rect
.
x
=
x0
;
objects
[
i
].
rect
.
y
=
y0
;
objects
[
i
].
rect
.
width
=
x1
-
x0
;
objects
[
i
].
rect
.
height
=
y1
-
y0
;
}
draw_objects
(
img
,
objects
);
/* release tengine */
postrun_graph
(
graph
);
destroy_graph
(
graph
);
release_tengine
();
}
source/device/cpu/op/slice/slice_ref.c
浏览文件 @
eaa1cb37
...
...
@@ -286,8 +286,7 @@ static int onnx_run(const int8_t* in_data, int8_t** out_data, int element_size,
for
(
int
j
=
start_2
;
j
<
stop_2
;
++
j
)
{
int
len
=
stop_3
-
start_3
;
int
input_off
=
n
*
in_dim_1
*
in_dim_2
*
in_dim_3
+
i
*
in_dim_2
*
in_dim_3
+
j
*
in_dim_3
+
start_3
;
int
input_off
=
n
*
in_dim_1
*
in_dim_2
*
in_dim_3
+
i
*
in_dim_2
*
in_dim_3
+
j
*
in_dim_3
+
start_3
;
memcpy
(
output
,
input
+
input_off
*
element_size
,
(
size_t
)
len
*
element_size
);
output
+=
len
*
element_size
;
}
...
...
@@ -374,7 +373,8 @@ static int run(struct node_ops* node_ops, struct exec_node* exec_node, struct ex
{
struct
node
*
ir_node
=
exec_node
->
ir_node
;
struct
graph
*
ir_graph
=
ir_node
->
graph
;
struct
tensor
*
input_tensor
=
get_ir_graph_tensor
(
ir_graph
,
ir_node
->
input_tensors
[
0
]);
struct
tensor
*
input_tensor
=
get_ir_graph_tensor
(
ir_graph
,
ir_node
->
input_tensors
[
0
]);
struct
tensor
*
output_tensor
=
get_ir_graph_tensor
(
ir_graph
,
ir_node
->
output_tensors
[
0
]);
struct
slice_param_ref
op_param
;
slice_param_t
*
_param
=
(
struct
slice_param
*
)(
ir_node
->
op
.
param_mem
);
...
...
@@ -408,12 +408,11 @@ static int run(struct node_ops* node_ops, struct exec_node* exec_node, struct ex
// set the output
for
(
int
i
=
0
;
i
<
op_param
.
out_num
;
++
i
)
{
struct
tensor
*
out_tensor
=
get_ir_graph_tensor
(
ir_graph
,
ir_node
->
output_tensors
[
i
]);
for
(
int
j
=
0
;
j
<
op_param
.
dim_num
;
++
j
)
{
op_param
.
output_shape
[
i
].
dims
[
j
]
=
out_tensor
->
dims
[
j
];
op_param
.
output_shape
[
i
].
dims
[
j
]
=
out
put
_tensor
->
dims
[
j
];
}
out_data_ptrs
[
i
]
=
(
int8_t
*
)
out_tensor
->
data
;
out_data_ptrs
[
i
]
=
(
int8_t
*
)
out
put
_tensor
->
data
;
}
}
else
if
(
op_param
.
ismxnet
||
op_param
.
isonnx
)
...
...
@@ -438,20 +437,10 @@ static int run(struct node_ops* node_ops, struct exec_node* exec_node, struct ex
op_param
.
in_shape_2
[
idx
]
=
input_tensor
->
dims
[
idx
];
}
}
struct
tensor
*
out_tensor
=
get_ir_graph_tensor
(
ir_graph
,
ir_node
->
output_tensors
[
0
]);
// std::vector<int> output_dim = o_tensor->GetShape().GetDim();
out_data_ptrs
[
0
]
=
(
int8_t
*
)
out_tensor
->
data
;
// Set the int8 output quant param
// if(data_type == TENGINE_DT_INT8)
// {
// auto* o_quant = o_tensor->GetQuantParam();
// QuantParam q_param;
// q_param.scale = op_param.out_scale;
// o_quant->resize(0);
// o_quant->push_back(q_param);
// }
if
(
input_tensor
->
dims
[
0
]
==
out_tensor
->
dims
[
0
]
&&
input_tensor
->
dims
[
1
]
==
out_tensor
->
dims
[
1
]
&&
input_tensor
->
dims
[
2
]
==
out_tensor
->
dims
[
2
]
&&
input_tensor
->
dims
[
3
]
==
out_tensor
->
dims
[
3
])
out_data_ptrs
[
0
]
=
(
int8_t
*
)
output_tensor
->
data
;
if
(
input_tensor
->
dims
[
0
]
==
output_tensor
->
dims
[
0
]
&&
input_tensor
->
dims
[
1
]
==
output_tensor
->
dims
[
1
]
&&
input_tensor
->
dims
[
2
]
==
output_tensor
->
dims
[
2
]
&&
input_tensor
->
dims
[
3
]
==
output_tensor
->
dims
[
3
])
{
memcpy
((
void
*
)(
out_data_ptrs
[
0
]),
(
void
*
)
input
,
mem_size
*
input_tensor
->
elem_num
);
sys_free
(
out_data_ptrs
);
...
...
@@ -479,24 +468,47 @@ static int run(struct node_ops* node_ops, struct exec_node* exec_node, struct ex
dim_idx
++
;
}
}
struct
tensor
*
out_tensor
=
get_ir_graph_tensor
(
ir_graph
,
ir_node
->
output_tensors
[
0
]);
out_data_ptrs
[
0
]
=
(
int8_t
*
)
out_tensor
->
data
;
// Set the int8 output quant param
// if(data_type == TENGINE_DT_INT8)
// {
// auto* o_quant = o_tensor->GetQuantParam();
// QuantParam q_param;
// q_param.scale = op_param.out_scale;
// o_quant->resize(0);
// o_quant->push_back(q_param);
// }
out_data_ptrs
[
0
]
=
(
int8_t
*
)
output_tensor
->
data
;
}
int
ret
=
-
1
;
if
(
input_tensor
->
data_type
==
TENGINE_DT_FP32
)
ret
=
ref_slice_common
(
input
,
out_data_ptrs
,
sizeof
(
float
),
&
op_param
);
else
if
(
input_tensor
->
data_type
==
TENGINE_DT_UINT8
)
ret
=
ref_slice_common
(
input
,
out_data_ptrs
,
sizeof
(
uint8_t
),
&
op_param
);
else
if
(
input_tensor
->
data_type
==
TENGINE_DT_UINT8
)
// ugly implement, need to refactor !
{
/* dequant to fp32 */
uint8_t
*
input_uint8
=
input_tensor
->
data
;
uint8_t
*
output_uint8
=
output_tensor
->
data
;
float
input_scale
=
input_tensor
->
scale
;
float
output_scale
=
output_tensor
->
scale
;
int32_t
input_zero
=
input_tensor
->
zero_point
;
int32_t
output_zero
=
output_tensor
->
zero_point
;
float
*
input_fp32
=
(
float
*
)
sys_malloc
(
input_tensor
->
elem_num
*
sizeof
(
float
));
float
*
output_fp32
=
(
float
*
)
sys_malloc
(
output_tensor
->
elem_num
*
sizeof
(
float
));
out_data_ptrs
[
0
]
=
(
int8_t
*
)
output_fp32
;
for
(
int
i
=
0
;
i
<
input_tensor
->
elem_num
;
i
++
)
{
input_fp32
[
i
]
=
((
float
)
input_uint8
[
i
]
-
(
float
)
input_zero
)
*
input_scale
;
}
ret
=
ref_slice_common
((
int8_t
*
)
input_fp32
,
out_data_ptrs
,
sizeof
(
float
),
&
op_param
);
/* quant to uint8 */
for
(
int
i
=
0
;
i
<
output_tensor
->
elem_num
;
i
++
)
{
int
udata
=
round
(
output_fp32
[
i
]
/
output_scale
+
output_zero
);
if
(
udata
>
255
)
udata
=
255
;
else
if
(
udata
<
0
)
udata
=
0
;
output_uint8
[
i
]
=
udata
;
}
free
(
input_fp32
);
free
(
output_fp32
);
}
sys_free
(
out_data_ptrs
);
if
(
ret
<
0
)
...
...
source/device/tim-vx/op/timvx_mish.cc
0 → 100644
浏览文件 @
eaa1cb37
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Copyright (c) 2021, OPEN AI LAB
* Author: hhchen@openailab.com
*/
#include "timvx_executor.hpp"
extern
"C"
{
#include "operator/op.h"
}
bool
VXEngine
::
AddMishNode
(
struct
node
*
ir_node
)
{
struct
graph
*
ir_graph
=
ir_node
->
graph
;
struct
tensor
*
input_tensor
=
get_ir_graph_tensor
(
ir_graph
,
ir_node
->
input_tensors
[
0
]);
struct
tensor
*
output_tensor
=
get_ir_graph_tensor
(
ir_graph
,
ir_node
->
output_tensors
[
0
]);
auto
mish
=
graph
->
CreateOperation
<
tim
::
vx
::
ops
::
Mish
>
();
(
*
mish
)
.
BindInputs
({
this
->
vx_tensor_map
[
input_tensor
->
index
]
})
.
BindOutputs
({
this
->
vx_tensor_map
[
output_tensor
->
index
]
});
return
true
;
}
source/device/tim-vx/op/timvx_slice.cc
浏览文件 @
eaa1cb37
...
...
@@ -33,7 +33,6 @@ extern "C"
bool
VXEngine
::
AddSliceNode
(
struct
node
*
ir_node
)
{
struct
graph
*
ir_graph
=
ir_node
->
graph
;
struct
tensor
*
input_tensor
=
get_ir_graph_tensor
(
ir_graph
,
ir_node
->
input_tensors
[
0
]);
struct
tensor
*
output_tensor
=
get_ir_graph_tensor
(
ir_graph
,
ir_node
->
output_tensors
[
0
]);
...
...
@@ -42,7 +41,7 @@ bool VXEngine::AddSliceNode(struct node* ir_node)
uint32_t
axis
=
output_tensor
->
dim_num
-
1
-
param
->
axis
;
std
::
vector
<
int32_t
>
start
;
for
(
int
i
=
output_tensor
->
dim_num
-
1
;
i
>=
0
;
i
--
)
for
(
int
i
=
0
;
i
<
output_tensor
->
dim_num
;
i
++
)
{
if
(
axis
==
i
)
start
.
push_back
(
param
->
begin
);
...
...
@@ -51,12 +50,12 @@ bool VXEngine::AddSliceNode(struct node* ir_node)
}
std
::
vector
<
int32_t
>
length
;
for
(
int
i
=
output_tensor
->
dim_num
-
1
;
i
>=
0
;
i
--
)
for
(
int
i
=
0
;
i
<
output_tensor
->
dim_num
;
i
++
)
{
if
(
axis
==
i
)
length
.
push_back
(
param
->
end
-
param
->
begin
);
else
length
.
push_back
(
output_tensor
->
dims
[
i
]);
length
.
push_back
(
output_tensor
->
dims
[
output_tensor
->
dim_num
-
1
-
i
]);
}
auto
slice
=
this
->
graph
->
CreateOperation
<
tim
::
vx
::
ops
::
Slice
>
(
output_tensor
->
dim_num
,
start
,
length
);
...
...
source/device/tim-vx/timvx_executor.cc
浏览文件 @
eaa1cb37
...
...
@@ -224,6 +224,9 @@ int VXEngine::Build(struct subgraph* subgraph)
case
OP_INTERP
:
this
->
AddInterpNode
(
ir_node
);
break
;
case
OP_MISH
:
this
->
AddMishNode
(
ir_node
);
break
;
case
OP_PERMUTE
:
this
->
AddPermuteNode
(
ir_node
);
break
;
...
...
source/device/tim-vx/timvx_executor.hpp
浏览文件 @
eaa1cb37
...
...
@@ -103,6 +103,7 @@ private:
bool
AddGatherNode
(
struct
node
*
node
);
bool
AddHardSwishNode
(
struct
node
*
node
);
bool
AddInterpNode
(
struct
node
*
ir_node
);
bool
AddMishNode
(
struct
node
*
ir_node
);
bool
AddPermuteNode
(
struct
node
*
ir_node
);
bool
AddPoolingNode
(
struct
node
*
ir_node
);
bool
AddPReluNode
(
struct
node
*
ir_node
);
...
...
source/device/tim-vx/timvx_limit.hpp
浏览文件 @
eaa1cb37
...
...
@@ -121,7 +121,7 @@ const int timvx_supported_ops[] = {
// OP_UNSQUEEZE,
OP_UPSAMPLE
,
// OP_ZEROSLIKE,
//
OP_MISH,
OP_MISH
,
// OP_LOGSOFTMAX,
// OP_RELU1,
// OP_L2NORMALIZATION,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录