Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
f9d908b8
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f9d908b8
编写于
8月 29, 2020
作者:
Z
zhaozhenlong
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
optimize resize using cache line
上级
469b132c
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
178 addition
and
3 deletion
+178
-3
mindspore/lite/nnacl/fp32/resize.c
mindspore/lite/nnacl/fp32/resize.c
+123
-0
mindspore/lite/nnacl/fp32/resize.h
mindspore/lite/nnacl/fp32/resize.h
+6
-0
mindspore/lite/src/runtime/kernel/arm/fp32/resize.cc
mindspore/lite/src/runtime/kernel/arm/fp32/resize.cc
+17
-3
mindspore/lite/src/runtime/kernel/arm/fp32/resize.h
mindspore/lite/src/runtime/kernel/arm/fp32/resize.h
+3
-0
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_bilinear_fp32_tests.cc
...src/runtime/kernel/arm/fp32/resize_bilinear_fp32_tests.cc
+29
-0
未找到文件。
mindspore/lite/nnacl/fp32/resize.c
浏览文件 @
f9d908b8
...
...
@@ -154,6 +154,129 @@ int ResizeBilinear(const float *input_data, float *output_data, const int *input
return
NNACL_OK
;
}
int
InterpRow
(
const
float
*
src_line
,
float
*
linear_output
,
int
new_width
,
float
*
x_left_weights
,
int
*
x_lefts
,
int
*
x_rights
,
int
in_c
)
{
int
w
;
for
(
w
=
0
;
w
<
new_width
;
w
++
)
{
int
c
=
0
;
#ifdef ENABLE_NEON
float32x4_t
left_w
=
vdupq_n_f32
(
x_left_weights
[
w
]);
float32x4_t
right_w
=
vdupq_n_f32
(
1
.
0
f
-
x_left_weights
[
w
]);
for
(;
c
<=
in_c
-
4
;
c
+=
4
)
{
float32x4_t
left
=
vld1q_f32
(
src_line
+
x_lefts
[
w
]
*
in_c
+
c
);
float32x4_t
right
=
vld1q_f32
(
src_line
+
x_rights
[
w
]
*
in_c
+
c
);
float32x4_t
interp_value
=
left
*
left_w
+
right
*
right_w
;
vst1q_f32
(
linear_output
+
w
*
in_c
+
c
,
interp_value
);
}
#endif
int
left_w_offset
=
x_lefts
[
w
]
*
in_c
;
int
right_w_offset
=
x_rights
[
w
]
*
in_c
;
for
(;
c
<
in_c
;
c
++
)
{
float
left
=
src_line
[
left_w_offset
+
c
];
float
right
=
src_line
[
right_w_offset
+
c
];
linear_output
[
w
*
in_c
+
c
]
=
left
*
x_left_weights
[
w
]
+
right
*
(
1
.
0
f
-
x_left_weights
[
w
]);
}
}
return
0
;
}
int
InterpCol
(
const
float
*
bottom_line
,
const
float
*
top_line
,
float
*
output
,
int
new_width
,
float
y_bottom_weight
,
int
in_c
)
{
int
w
;
for
(
w
=
0
;
w
<
new_width
;
w
++
)
{
int
c
=
0
;
#ifdef ENABLE_NEON
float32x4_t
bottom_w
=
vdupq_n_f32
(
y_bottom_weight
);
float32x4_t
top_w
=
vdupq_n_f32
(
1
.
0
f
-
y_bottom_weight
);
for
(;
c
<=
in_c
-
4
;
c
+=
4
)
{
float32x4_t
bottom
=
vld1q_f32
(
bottom_line
+
w
*
in_c
+
c
);
float32x4_t
top
=
vld1q_f32
(
top_line
+
w
*
in_c
+
c
);
float32x4_t
interp_value
=
bottom
*
bottom_w
+
top
*
top_w
;
vst1q_f32
(
output
+
w
*
in_c
+
c
,
interp_value
);
}
#endif
for
(;
c
<
in_c
;
c
++
)
{
float
bottom
=
bottom_line
[
w
*
in_c
+
c
];
float
top
=
top_line
[
w
*
in_c
+
c
];
output
[
w
*
in_c
+
c
]
=
bottom
*
y_bottom_weight
+
top
*
(
1
.
0
f
-
y_bottom_weight
);
}
}
return
0
;
}
int
ResizeBilinear2
(
const
float
*
input_data
,
float
*
output_data
,
const
int
*
input_shape
,
const
int
*
output_shape
,
int
*
y_bottoms
,
int
*
y_tops
,
int
*
x_lefts
,
int
*
x_rights
,
float
*
y_bottom_weights
,
float
*
x_left_weights
,
float
*
line0
,
float
*
line1
,
int
n_h_begin
,
int
n_h_end
)
{
if
(
input_data
==
NULL
||
output_data
==
NULL
||
input_shape
==
NULL
||
output_shape
==
NULL
||
y_bottoms
==
NULL
||
y_tops
==
NULL
||
x_lefts
==
NULL
||
x_rights
==
NULL
||
y_bottom_weights
==
NULL
||
x_left_weights
==
NULL
)
{
return
NNACL_NULL_PTR
;
}
int
in_h
=
input_shape
[
1
];
int
in_w
=
input_shape
[
2
];
int
in_c
=
input_shape
[
3
];
int
new_height
=
output_shape
[
1
];
int
new_width
=
output_shape
[
2
];
int
n_h
;
int
n_h_stride
=
new_width
*
in_c
;
bool
cache_line_used
[
2
]
=
{
false
,
false
};
int
cache_line_num
[
2
]
=
{
-
1
,
-
1
};
float
*
const
cache_line_ptr
[
2
]
=
{
line0
,
line1
};
float
*
current_line_ptr
[
2
]
=
{
line0
,
line1
};
int
current_line_num
[
2
]
=
{
-
1
,
-
1
};
for
(
n_h
=
n_h_begin
;
n_h
<
n_h_end
;
n_h
++
)
{
int
n
,
h
;
n
=
n_h
/
new_height
;
h
=
n_h
%
new_height
;
current_line_num
[
0
]
=
n
*
in_h
+
y_bottoms
[
h
];
current_line_num
[
1
]
=
n
*
in_h
+
y_tops
[
h
];
int
i
;
for
(
i
=
0
;
i
<
2
;
i
++
)
{
cache_line_used
[
i
]
=
false
;
}
// search if we cached
int
j
,
k
;
for
(
j
=
0
;
j
<
2
;
j
++
)
{
bool
find
=
false
;
for
(
k
=
0
;
k
<
2
;
k
++
)
{
if
(
current_line_num
[
j
]
==
cache_line_num
[
k
])
{
cache_line_used
[
k
]
=
true
;
current_line_ptr
[
j
]
=
cache_line_ptr
[
k
];
find
=
true
;
break
;
}
}
if
(
!
find
)
{
const
float
*
line
=
input_data
+
current_line_num
[
j
]
*
in_w
*
in_c
;
for
(
k
=
0
;
k
<
2
;
k
++
)
{
if
(
!
cache_line_used
[
k
])
{
cache_line_num
[
k
]
=
current_line_num
[
j
];
cache_line_used
[
k
]
=
true
;
current_line_ptr
[
j
]
=
cache_line_ptr
[
k
];
InterpRow
(
line
,
current_line_ptr
[
j
],
new_width
,
x_left_weights
,
x_lefts
,
x_rights
,
in_c
);
break
;
}
}
}
}
// do col interp
InterpCol
(
current_line_ptr
[
0
],
current_line_ptr
[
1
],
output_data
+
n_h
*
n_h_stride
,
new_width
,
y_bottom_weights
[
h
],
in_c
);
}
return
NNACL_OK
;
}
int
ResizeNearestNeighbor
(
const
float
*
input_data
,
float
*
output_data
,
const
int
*
input_shape
,
const
int
*
output_shape
,
int
tid
,
int
thread_num
)
{
int
batch
,
y
,
x
,
c
;
...
...
mindspore/lite/nnacl/fp32/resize.h
浏览文件 @
f9d908b8
...
...
@@ -28,9 +28,15 @@ extern "C" {
int
PrepareResizeBilinear
(
const
int
*
input_shape
,
const
int
*
output_shape
,
bool
align_corners
,
int
*
y_bottoms
,
int
*
y_tops
,
int
*
x_lefts
,
int
*
x_rights
,
float
*
y_bottom_weights
,
float
*
x_left_weights
);
int
ResizeBilinear
(
const
float
*
input_data
,
float
*
output_data
,
const
int
*
input_shape
,
const
int
*
output_shape
,
int
*
y_bottoms
,
int
*
y_tops
,
int
*
x_lefts
,
int
*
x_rights
,
float
*
y_bottom_weights
,
float
*
x_left_weights
,
int
n_h_begin
,
int
n_h_end
);
int
ResizeBilinear2
(
const
float
*
input_data
,
float
*
output_data
,
const
int
*
input_shape
,
const
int
*
output_shape
,
int
*
y_bottoms
,
int
*
y_tops
,
int
*
x_lefts
,
int
*
x_rights
,
float
*
y_bottom_weights
,
float
*
x_left_weights
,
float
*
line0
,
float
*
line1
,
int
n_h_begin
,
int
n_h_end
);
int
ResizeNearestNeighbor
(
const
float
*
input_data
,
float
*
output_data
,
const
int
*
input_shape
,
const
int
*
output_shape
,
int
tid
,
int
thread_num
);
#ifdef __cplusplus
...
...
mindspore/lite/src/runtime/kernel/arm/fp32/resize.cc
浏览文件 @
f9d908b8
...
...
@@ -61,6 +61,7 @@ int ResizeCPUKernel::ReSize() {
}
int
ResizeCPUKernel
::
MallocTmpBuffer
()
{
int
c
=
in_tensors_
.
at
(
0
)
->
Channel
();
int
h
=
new_height_
;
int
w
=
new_width_
;
y_bottoms_
=
reinterpret_cast
<
int
*>
(
malloc
(
sizeof
(
int
)
*
h
));
...
...
@@ -94,6 +95,12 @@ int ResizeCPUKernel::MallocTmpBuffer() {
MS_LOG
(
ERROR
)
<<
"malloc data failed"
;
return
RET_NULL_PTR
;
}
line_buffer_
=
reinterpret_cast
<
float
*>
(
malloc
(
sizeof
(
float
)
*
w
*
c
*
2
*
context_
->
thread_num_
));
if
(
line_buffer_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"malloc data failed"
;
return
RET_NULL_PTR
;
}
return
RET_OK
;
}
void
ResizeCPUKernel
::
FreeTmpBuffer
()
{
...
...
@@ -122,6 +129,10 @@ void ResizeCPUKernel::FreeTmpBuffer() {
free
(
x_left_weights_
);
x_left_weights_
=
nullptr
;
}
if
(
line_buffer_
!=
nullptr
)
{
free
(
line_buffer_
);
line_buffer_
=
nullptr
;
}
}
int
ResizeImpl
(
void
*
cdata
,
int
task_id
)
{
...
...
@@ -158,9 +169,12 @@ int ResizeCPUKernel::RunImpl(int task_id) {
int
unit
=
UP_DIV
(
n
*
h
,
context_
->
thread_num_
);
n_h_begin
=
unit
*
task_id
;
n_h_end
=
std
::
min
(
n_h_begin
+
unit
,
n
*
h
);
ret
=
ResizeBilinear
(
input_data
,
output_data
,
input_shape
.
data
(),
out_tensors_
[
0
]
->
shape
().
data
(),
y_bottoms_
,
y_tops_
,
x_lefts_
,
x_rights_
,
y_bottom_weights_
,
x_left_weights_
,
n_h_begin
,
n_h_end
);
int
c
=
in_tensors_
.
at
(
0
)
->
shape
()[
3
];
line0_
=
line_buffer_
+
new_width_
*
c
*
2
*
task_id
;
line1_
=
line0_
+
new_width_
*
c
;
ret
=
ResizeBilinear2
(
input_data
,
output_data
,
input_shape
.
data
(),
out_tensors_
[
0
]
->
shape
().
data
(),
y_bottoms_
,
y_tops_
,
x_lefts_
,
x_rights_
,
y_bottom_weights_
,
x_left_weights_
,
line0_
,
line1_
,
n_h_begin
,
n_h_end
);
break
;
}
...
...
mindspore/lite/src/runtime/kernel/arm/fp32/resize.h
浏览文件 @
f9d908b8
...
...
@@ -47,6 +47,9 @@ class ResizeCPUKernel : public ResizeBaseCPUKernel {
int
*
x_rights_
=
nullptr
;
float
*
y_bottom_weights_
=
nullptr
;
float
*
x_left_weights_
=
nullptr
;
float
*
line_buffer_
=
nullptr
;
float
*
line0_
=
nullptr
;
float
*
line1_
=
nullptr
;
};
}
// namespace mindspore::kernel
...
...
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_bilinear_fp32_tests.cc
浏览文件 @
f9d908b8
...
...
@@ -19,6 +19,8 @@
#include "common/common_test.h"
#include "nnacl/resize_parameter.h"
#include "mindspore/lite/src/kernel_registry.h"
#include "mindspore/lite/schema/ops_generated.h"
using
mindspore
::
schema
::
Format_NHWC
;
namespace
mindspore
{
...
...
@@ -52,6 +54,7 @@ void TestResizeBilinearFp32::Prepare(const std::vector<int> &input_shape, const
float
*
input_data
,
float
*
output_data
,
const
bool
align_corners
,
const
int
thread_num
)
{
in_tensor_
.
set_data_type
(
kNumberTypeFloat32
);
in_tensor_
.
SetFormat
(
Format_NHWC
);
in_tensor_
.
set_shape
(
input_shape
);
out_tensor_
.
set_data_type
(
kNumberTypeFloat32
);
out_tensor_
.
set_shape
(
output_shape
);
...
...
@@ -377,4 +380,30 @@ TEST_F(TestResizeBilinearFp32, ResizeBilinearTest15) {
CompareOutputData
(
output_data
,
expect
.
data
(),
output_size
,
err_tol
);
}
// 5*5 -> 2*2
TEST_F
(
TestResizeBilinearFp32
,
ResizeBilinearTest16
)
{
float
input_data
[]
=
{
0.0
,
1.0
,
2.0
,
3.0
,
4.0
,
5.0
,
6.0
,
7.0
,
8.0
,
9.0
,
10.0
,
11.0
,
12.0
,
13.0
,
14.0
,
15.0
,
16.0
,
17.0
,
18.0
,
19.0
,
20.0
,
21.0
,
22.0
,
23.0
,
24.0
,
25.0
,
26.0
,
27.0
,
28.0
,
29.0
,
30.0
,
31.0
,
32.0
,
33.0
,
34.0
,
35.0
,
36.0
,
37.0
,
38.0
,
39.0
,
40.0
,
41.0
,
42.0
,
43.0
,
44.0
,
45.0
,
46.0
,
47.0
,
48.0
,
49.0
,
50.0
,
51.0
,
52.0
,
53.0
,
54.0
,
55.0
,
56.0
,
57.0
,
58.0
,
59.0
,
60.0
,
61.0
,
62.0
,
63.0
,
64.0
,
65.0
,
66.0
,
67.0
,
68.0
,
69.0
,
70.0
,
71.0
,
72.0
,
73.0
,
74.0
,
75.0
,
76.0
,
77.0
,
78.0
,
79.0
,
80.0
,
81.0
,
82.0
,
83.0
,
84.0
,
85.0
,
86.0
,
87.0
,
88.0
,
89.0
,
90.0
,
91.0
,
92.0
,
93.0
,
94.0
,
95.0
,
96.0
,
97.0
,
98.0
,
99.0
,
100.0
,
101.0
,
102.0
,
103.0
,
104.0
,
105.0
,
106.0
,
107.0
,
108.0
,
109.0
,
110.0
,
111.0
,
112.0
,
113.0
,
114.0
,
115.0
,
116.0
,
117.0
,
118.0
,
119.0
,
120.0
,
121.0
,
122.0
,
123.0
,
124.0
};
float
output_data
[
20
]
=
{
0
};
std
::
vector
<
int
>
input_shape
=
{
1
,
5
,
5
,
5
};
std
::
vector
<
int
>
output_shape
=
{
1
,
2
,
2
,
5
};
std
::
vector
<
float
>
expect
=
{
0.0
,
1.0
,
2.0
,
3.0
,
4.0
,
12.5
,
13.5
,
14.5
,
15.5
,
16.5
,
62.5
,
63.5
,
64.5
,
65.5
,
66.5
,
75.0
,
76.0
,
77.0
,
78.0
,
79.0
};
bool
align_corners
=
false
;
auto
output_size
=
20
;
Prepare
(
input_shape
,
output_shape
,
input_data
,
output_data
,
align_corners
,
2
);
auto
ret
=
kernel_
->
Run
();
EXPECT_EQ
(
0
,
ret
);
CompareOutputData
(
output_data
,
expect
.
data
(),
output_size
,
err_tol
);
}
}
// namespace mindspore
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录