Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
f6783881
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f6783881
编写于
8月 18, 2020
作者:
P
Pengyongrong
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add new ops named slice
add ops supported fp16 add dimension 0-3 for concat
上级
1d0e0ae2
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
249 addition
and
285 deletion
+249
-285
mindspore/lite/src/runtime/kernel/opencl/cl/concat.cl
mindspore/lite/src/runtime/kernel/opencl/cl/concat.cl
+87
-20
mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
+3
-3
mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
+26
-53
mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h
mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h
+0
-2
mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc
+3
-3
mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
+4
-4
mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc
...lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc
+24
-33
mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc
...re/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc
+73
-135
mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc
...ore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc
+29
-32
未找到文件。
mindspore/lite/src/runtime/kernel/opencl/cl/concat.cl
浏览文件 @
f6783881
...
...
@@ -2,39 +2,106 @@
__constant
sampler_t
smp_none
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_NONE |
CLK_FILTER_NEAREST
;
__kernel
void
Concat
(
__read_only
image2d_t
input0,
__read_only
image2d_t
input1,
__write_only
image2d_t
output,
int
2
input_channels,
int4
output_shape
)
{
int
X
=
get_global_id
(
0
)
; // H
int
4
input_shape0,
int4
input_shape1,
int4
output_shape,
const
int
axis
)
{
int
X
=
get_global_id
(
0
)
; //
N*
H
int
Y
=
get_global_id
(
1
)
; // W
int
Z
=
get_global_id
(
2
)
; // c/4
if
(
X
>=
output_shape.y
|
| Y >= output_shape.z || Z >= output_shape.w) {
if
(
X
>=
output_shape.
x
*
output_shape.
y
|
| Y >= output_shape.z || Z >= output_shape.w) {
return;
}
if (Z < input_channels.x) {
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y)*input_channels.x + Z, (X)));
WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result);
if (axis == 0) {
if (X < input_shape0.x * input_shape0.y) {
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X)));
WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result);
} else {
FLT4 result = READ_IMAGE(input1, smp_none, (int2)((Y)*input_shape1.w + Z, (X - input_shape0.x * input_shape0.y)));
WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result);
}
} else if (axis == 1) {
if (X < input_shape0.y) {
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X)));
WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result);
} else {
FLT4 result = READ_IMAGE(input1, smp_none, (int2)((Y)*input_shape1.w + Z, (X - input_shape0.y)));
WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result);
}
} else if (axis == 2) {
if (Y < input_shape0.z) {
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X)));
WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result);
} else {
FLT4 result = READ_IMAGE(input1, smp_none, (int2)((Y - input_shape0.z) * input_shape1.w + Z, (X)));
WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result);
}
} else {
FLT4 result = READ_IMAGE(input1, smp_none, (int2)((Y)*input_channels.y + Z - input_channels.x, (X)));
WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result);
if (Z < input_shape0.w) {
FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X)));
WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result);
} else {
FLT4 result = READ_IMAGE(input1, smp_none, (int2)((Y)*input_shape1.w + Z - input_shape0.w, (X)));
WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result);
}
}
}
__kernel void Concat3input(__read_only image2d_t input0, __read_only image2d_t input1, __read_only image2d_t input2,
__write_only image2d_t output, int3 input_channels, int4 output_shape) {
int X = get_global_id(0); // H
__write_only image2d_t output, int4 input_shape0, int4 input_shape1, int4 input_shape2,
int4 output_shape, const int axis) {
int X = get_global_id(0); // N*H
int Y = get_global_id(1); // W
int Z = get_global_id(2); // c/4
if (X >= output_shape.y || Y >= output_shape.z |
|
Z
>=
output_shape.w
)
{
if (X >= output_shape.
x * output_shape.
y || Y >= output_shape.z |
|
Z
>=
output_shape.w
)
{
return
;
}
if
(
Z
<
input_channels.x
)
{
FLT4
result0
=
READ_IMAGE
(
input0,
smp_none,
(
int2
)((
Y
)
*input_channels.x
+
Z,
(
X
)))
;
WRITE_IMAGE
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result0
)
;
}
else
if
(
Z
<
(
input_channels.x
+
input_channels.y
))
{
FLT4
result1
=
READ_IMAGE
(
input1,
smp_none,
(
int2
)((
Y
)
*input_channels.y
+
Z
-
input_channels.x,
(
X
)))
;
WRITE_IMAGE
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result1
)
;
if
(
axis
==
0
)
{
if
(
X
<
input_shape0.x
*
input_shape0.y
)
{
FLT4
result0
=
READ_IMAGE
(
input0,
smp_none,
(
int2
)((
Y
)
*input_shape0.w
+
Z,
(
X
)))
;
WRITE_IMAGE
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result0
)
;
}
else
if
(
X
<
(
input_shape0.x
*
input_shape0.y
+
input_shape1.x
*
input_shape1.y
))
{
FLT4
result1
=
READ_IMAGE
(
input1,
smp_none,
(
int2
)((
Y
)
*input_shape1.w
+
Z,
(
X
-
input_shape0.x
*
input_shape0.y
)))
;
WRITE_IMAGE
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result1
)
;
}
else
{
FLT4
result2
=
READ_IMAGE
(
input2,
smp_none,
(
int2
)((
Y
)
*input_shape2.w
+
Z,
(
X
-
input_shape0.x
*
input_shape0.y
-
input_shape1.x
*
input_shape1.y
)))
;
WRITE_IMAGE
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result2
)
;
}
}
else
if
(
axis
==
1
)
{
if
(
X
<
input_shape0.y
)
{
FLT4
result0
=
READ_IMAGE
(
input0,
smp_none,
(
int2
)((
Y
)
*input_shape0.w
+
Z,
(
X
)))
;
WRITE_IMAGE
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result0
)
;
}
else
if
(
X
<
(
input_shape0.y
+
input_shape1.y
))
{
FLT4
result1
=
READ_IMAGE
(
input1,
smp_none,
(
int2
)((
Y
)
*input_shape1.w
+
Z,
(
X
-
input_shape0.y
)))
;
WRITE_IMAGE
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result1
)
;
}
else
{
FLT4
result2
=
READ_IMAGE
(
input2,
smp_none,
(
int2
)((
Y
)
*input_shape2.w
+
Z,
(
X
-
input_shape0.y
-
input_shape1.y
)))
;
WRITE_IMAGE
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result2
)
;
}
}
else
if
(
axis
==
2
)
{
if
(
Y
<
input_shape0.z
)
{
FLT4
result0
=
READ_IMAGE
(
input0,
smp_none,
(
int2
)((
Y
)
*input_shape0.w
+
Z,
(
X
)))
;
WRITE_IMAGE
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result0
)
;
}
else
if
(
Y
<
(
input_shape0.z
+
input_shape0.z
))
{
FLT4
result1
=
READ_IMAGE
(
input1,
smp_none,
(
int2
)((
Y
-
input_shape0.z
)
*
input_shape1.w
+
Z,
(
X
)))
;
WRITE_IMAGE
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result1
)
;
}
else
{
FLT4
result2
=
READ_IMAGE
(
input2,
smp_none,
(
int2
)((
Y
-
input_shape0.z
-
input_shape1.z
)
*
input_shape2.w
+
Z,
(
X
)))
;
WRITE_IMAGE
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result2
)
;
}
}
else
{
FLT4
result2
=
READ_IMAGE
(
input2,
smp_none,
(
int2
)((
Y
)
*input_channels.z
+
Z
-
input_channels.x
-
input_channels.y,
(
X
)))
;
WRITE_IMAGE
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result2
)
;
if
(
Z
<
input_shape0.w
)
{
FLT4
result0
=
READ_IMAGE
(
input0,
smp_none,
(
int2
)((
Y
)
*input_shape0.w
+
Z,
(
X
)))
;
WRITE_IMAGE
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result0
)
;
}
else
if
(
Z
<
(
input_shape0.w
+
input_shape0.w
))
{
FLT4
result1
=
READ_IMAGE
(
input1,
smp_none,
(
int2
)((
Y
)
*input_shape1.w
+
Z
-
input_shape0.w,
(
X
)))
;
WRITE_IMAGE
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result1
)
;
}
else
{
FLT4
result2
=
READ_IMAGE
(
input2,
smp_none,
(
int2
)((
Y
)
*input_shape2.w
+
Z
-
input_shape0.w
-
input_shape1.w,
(
X
)))
;
WRITE_IMAGE
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result2
)
;
}
}
}
mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
浏览文件 @
f6783881
...
...
@@ -100,7 +100,7 @@ void BatchNormGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t
local
->
push_back
(
z
);
}
int
BatchNormOpenCLKernel
::
Run
()
{
MS_LOG
(
DEBUG
)
<<
this
->
name
()
<<
" Running!"
;
MS_LOG
(
DEBUG
)
<<
this
->
name
()
<<
" Running!
"
;
auto
param
=
reinterpret_cast
<
BatchNormParameter
*>
(
this
->
op_parameter_
);
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
auto
input0_shape
=
in_tensors_
[
0
]
->
shape
();
...
...
@@ -136,12 +136,12 @@ kernel::LiteKernel *OpenCLBatchnormKernelCreator(const std::vector<lite::tensor:
const
mindspore
::
lite
::
PrimitiveC
*
primitive
)
{
auto
*
kernel
=
new
(
std
::
nothrow
)
BatchNormOpenCLKernel
(
opParameter
,
inputs
,
outputs
);
if
(
kernel
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"
new BatchnormOpenCLKernel failed
"
;
MS_LOG
(
ERROR
)
<<
"
new BatchnormOpenCLKernel failed
"
;
return
nullptr
;
}
auto
ret
=
kernel
->
Init
();
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"
Init kernel failed, name: Convolution
"
;
MS_LOG
(
ERROR
)
<<
"
Init kernel failed, name: Batchnorm
"
;
delete
kernel
;
return
nullptr
;
}
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
浏览文件 @
f6783881
...
...
@@ -33,10 +33,10 @@ int ConcatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size)
size_t
im_dst_x
,
im_dst_y
;
if
(
in_tensors_
[
0
]
->
GetFormat
()
==
schema
::
Format_NHWC4
)
{
im_dst_x
=
out_tensors_
[
0
]
->
Width
()
*
CO4
;
im_dst_y
=
out_tensors_
[
0
]
->
Height
();
im_dst_y
=
out_tensors_
[
0
]
->
Height
()
*
out_tensors_
[
0
]
->
Batch
()
;
}
else
{
im_dst_y
=
out_tensors_
[
0
]
->
Height
()
*
CO4
;
im_dst_x
=
out_tensors_
[
0
]
->
Width
();
im_dst_x
=
out_tensors_
[
0
]
->
Width
()
*
out_tensors_
[
0
]
->
Batch
()
;
}
size_t
img_dtype
=
CL_FLOAT
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
...
...
@@ -51,20 +51,17 @@ int ConcatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size)
}
int
ConcatOpenCLKernel
::
Init
()
{
if
(
in_tensors_
[
0
]
->
shape
().
size
()
!=
4
)
{
MS_LOG
(
ERROR
)
<<
"
only support dim=4
"
;
MS_LOG
(
ERROR
)
<<
"
only support dim = 4
"
;
return
RET_ERROR
;
}
auto
param
=
reinterpret_cast
<
ConcatParameter
*>
(
this
->
op_parameter_
);
MS_LOG
(
DEBUG
)
<<
"concat at axis=: "
<<
param
->
axis_
;
if
(
param
->
axis_
!=
0
&&
param
->
axis_
!=
3
)
{
MS_LOG
(
ERROR
)
<<
"
only support axis=0 or axis=3
"
;
MS_LOG
(
DEBUG
)
<<
"
concat at axis=: "
<<
param
->
axis_
;
if
(
param
->
axis_
<
0
||
param
->
axis_
>
3
)
{
MS_LOG
(
ERROR
)
<<
"
only support axis >= 0 and axis <= 3
"
;
return
RET_ERROR
;
}
if
(
param
->
axis_
==
0
)
{
return
RET_OK
;
}
if
(
in_tensors_
.
size
()
==
2
)
{
std
::
set
<
std
::
string
>
build_options
;
std
::
string
source
=
concat_source
;
...
...
@@ -94,33 +91,6 @@ int ConcatOpenCLKernel::Init() {
int
ConcatOpenCLKernel
::
ReSize
()
{
return
RET_OK
;
}
int
ConcatOpenCLKernel
::
Run_axis0
()
{
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
auto
allocator_
=
ocl_runtime
->
GetAllocator
();
cl
::
CommandQueue
*
command_queue
=
ocl_runtime
->
GetDefaultCommandQueue
();
for
(
auto
&
tensor
:
in_tensors_
)
{
auto
buffer
=
static_cast
<
cl
::
Buffer
*>
(
allocator_
->
GetBuffer
(
tensor
->
Data
()));
ocl_runtime
->
MapBuffer
(
*
buffer
,
CL_MAP_READ
,
tensor
->
Size
(),
command_queue
,
true
);
}
for
(
auto
&
tensor
:
out_tensors_
)
{
auto
buffer
=
static_cast
<
cl
::
Buffer
*>
(
allocator_
->
GetBuffer
(
tensor
->
Data
()));
ocl_runtime
->
MapBuffer
(
*
buffer
,
CL_MAP_WRITE
,
tensor
->
Size
(),
command_queue
,
true
);
}
memcpy
(
out_tensors_
[
0
]
->
Data
(),
in_tensors_
[
0
]
->
Data
(),
in_tensors_
[
0
]
->
Size
());
memcpy
(
reinterpret_cast
<
char
*>
(
out_tensors_
[
0
]
->
Data
())
+
in_tensors_
[
0
]
->
Size
(),
in_tensors_
[
1
]
->
Data
(),
in_tensors_
[
1
]
->
Size
());
for
(
auto
tensors
:
{
&
in_tensors_
,
&
out_tensors_
})
{
for
(
auto
&
tensor
:
*
tensors
)
{
auto
buffer
=
static_cast
<
cl
::
Buffer
*>
(
allocator_
->
GetBuffer
(
tensor
->
Data
()));
ocl_runtime
->
UnmapBuffer
(
*
buffer
,
tensor
->
Data
());
}
}
return
RET_OK
;
}
int
ConcatGetBiggestDividerWithPriority
(
int
number
,
int
max_divider
)
{
if
(
number
%
8
==
0
&&
max_divider
>=
8
)
{
return
number
/
8
;
...
...
@@ -154,21 +124,19 @@ void ConcatGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t> *
local
->
push_back
(
z
);
}
int
ConcatOpenCLKernel
::
Run
()
{
MS_LOG
(
DEBUG
)
<<
this
->
name
()
<<
" Running!"
;
MS_LOG
(
DEBUG
)
<<
this
->
name
()
<<
" Running!
"
;
auto
param
=
reinterpret_cast
<
ConcatParameter
*>
(
this
->
op_parameter_
);
if
(
param
->
axis_
==
0
)
{
return
Run_axis0
();
}
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
auto
input
0
_shape
=
in_tensors_
[
0
]
->
shape
();
auto
input
1
_shape
=
in_tensors_
[
1
]
->
shape
();
auto
input
1
_shape
=
in_tensors_
[
0
]
->
shape
();
auto
input
2
_shape
=
in_tensors_
[
1
]
->
shape
();
auto
output_shape
=
out_tensors_
[
0
]
->
shape
();
cl_int2
input0_shape2_
=
{
UP_DIV
(
input0_shape
[
3
],
C4NUM
),
UP_DIV
(
input1_shape
[
3
],
C4NUM
)};
// change
cl_int4
input_shape1_
=
{
input1_shape
[
0
],
input1_shape
[
1
],
input1_shape
[
2
],
UP_DIV
(
input1_shape
[
3
],
C4NUM
)};
cl_int4
input_shape2_
=
{
input2_shape
[
0
],
input2_shape
[
1
],
input2_shape
[
2
],
UP_DIV
(
input2_shape
[
3
],
C4NUM
)};
cl_int4
output_shape_
=
{
output_shape
[
0
],
output_shape
[
1
],
output_shape
[
2
],
UP_DIV
(
output_shape
[
3
],
C4NUM
)};
uint32_t
OH
=
output_shape
[
1
];
// N*H
uint32_t
OH
=
output_shape
[
0
]
*
output_shape
[
1
];
// N*H
uint32_t
OW
=
output_shape
[
2
];
uint32_t
OC
=
UP_DIV
(
output_shape
[
3
],
C4NUM
);
...
...
@@ -182,23 +150,28 @@ int ConcatOpenCLKernel::Run() {
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
in_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
in_tensors_
[
1
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
out_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
input0_shape2_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
input_shape1_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
input_shape2_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
output_shape_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
param
->
axis_
);
}
else
if
(
in_tensors_
.
size
()
==
3
)
{
auto
input
2
_shape
=
in_tensors_
[
2
]
->
shape
();
cl_int
3
input0_shape3_
=
{
UP_DIV
(
input0_shape
[
3
],
C4NUM
),
UP_DIV
(
input1_shape
[
3
],
C4NUM
),
UP_DIV
(
input2_shape
[
3
],
C4NUM
)};
auto
input
3
_shape
=
in_tensors_
[
2
]
->
shape
();
cl_int
4
input_shape3_
=
{
input3_shape
[
0
],
input3_shape
[
1
],
input3_shape
[
2
],
UP_DIV
(
input3_shape
[
3
],
C4NUM
)};
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
in_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
in_tensors_
[
1
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
in_tensors_
[
2
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
out_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
input0_shape3_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
input_shape1_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
input_shape2_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
input_shape3_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
output_shape_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cn
++
,
param
->
axis_
);
}
else
if
(
in_tensors_
.
size
()
<
2
)
{
MS_LOG
(
ERROR
)
<<
" input
s must been >=2
"
;
MS_LOG
(
ERROR
)
<<
" input
sizes must >= 2
"
;
return
RET_ERROR
;
}
else
{
MS_LOG
(
ERROR
)
<<
"
only support inputs<=3
"
;
MS_LOG
(
ERROR
)
<<
"
only support inputs <= 3
"
;
return
RET_ERROR
;
}
ocl_runtime
->
RunKernel
(
kernel_
,
global
,
local
,
nullptr
);
...
...
@@ -213,12 +186,12 @@ kernel::LiteKernel *OpenCLConcatKernelCreator(const std::vector<lite::tensor::Te
const
mindspore
::
lite
::
PrimitiveC
*
primitive
)
{
auto
*
kernel
=
new
(
std
::
nothrow
)
ConcatOpenCLKernel
(
opParameter
,
inputs
,
outputs
);
if
(
kernel
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"
new ConcatOpenCLKernel failed
"
;
MS_LOG
(
ERROR
)
<<
"
new ConcatOpenCLKernel failed
"
;
return
nullptr
;
}
auto
ret
=
kernel
->
Init
();
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"
Init kernel failed, name: Convolution
"
;
MS_LOG
(
ERROR
)
<<
"
Init kernel failed, name: Concat
"
;
delete
kernel
;
return
nullptr
;
}
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h
浏览文件 @
f6783881
...
...
@@ -37,8 +37,6 @@ class ConcatOpenCLKernel : public OpenCLKernel {
int
ReSize
()
override
;
int
Run_axis0
();
int
Run
()
override
;
int
GetImageSize
(
size_t
idx
,
std
::
vector
<
size_t
>
*
img_size
)
override
;
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc
浏览文件 @
f6783881
...
...
@@ -97,7 +97,7 @@ void SlcieGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t> *l
local
->
push_back
(
z
);
}
int
SliceOpenCLKernel
::
Run
()
{
MS_LOG
(
DEBUG
)
<<
this
->
name
()
<<
" Running!"
;
MS_LOG
(
DEBUG
)
<<
this
->
name
()
<<
" Running!
"
;
auto
param
=
reinterpret_cast
<
SliceParameter
*>
(
this
->
op_parameter_
);
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
auto
input_shape
=
in_tensors_
[
0
]
->
shape
();
...
...
@@ -131,12 +131,12 @@ kernel::LiteKernel *OpenCLSliceKernelCreator(const std::vector<lite::tensor::Ten
const
mindspore
::
lite
::
PrimitiveC
*
primitive
)
{
auto
*
kernel
=
new
(
std
::
nothrow
)
SliceOpenCLKernel
(
opParameter
,
inputs
,
outputs
);
if
(
kernel
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"
new SliceOpenCLKernel failed
"
;
MS_LOG
(
ERROR
)
<<
"
new SliceOpenCLKernel failed
"
;
return
nullptr
;
}
auto
ret
=
kernel
->
Init
();
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"
Init kernel failed, name: Convolution
"
;
MS_LOG
(
ERROR
)
<<
"
Init kernel failed, name: Slice
"
;
delete
kernel
;
return
nullptr
;
}
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
浏览文件 @
f6783881
...
...
@@ -94,7 +94,7 @@ int ToFormatOpenCLKernel::InitNHWCShape() {
int
ToFormatOpenCLKernel
::
ReSize
()
{
return
RET_OK
;
}
int
ToFormatOpenCLKernel
::
GetGlobalSize
(
size_t
idx
,
std
::
vector
<
size_t
>
*
global_size
)
{
std
::
vector
<
size_t
>
vec
=
{
nhwc_shape_
[
1
],
nhwc_shape_
[
2
],
UP_DIV
(
nhwc_shape_
[
3
],
C4NUM
)};
std
::
vector
<
size_t
>
vec
=
{
nhwc_shape_
[
0
]
*
nhwc_shape_
[
1
],
nhwc_shape_
[
2
],
UP_DIV
(
nhwc_shape_
[
3
],
C4NUM
)};
*
global_size
=
std
::
move
(
vec
);
return
RET_OK
;
}
...
...
@@ -107,13 +107,13 @@ int ToFormatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size
size_t
im_dst_x
,
im_dst_y
;
std
::
vector
<
int
>
shapex
=
out_tensors_
[
0
]
->
shape
();
if
(
out_tensors_
[
0
]
->
GetFormat
()
==
schema
::
Format_NC4HW4
)
{
int
c
=
shapex
[
1
];
int
h
=
shapex
[
2
];
int
c
=
shapex
[
1
]
*
shapex
[
2
]
;
int
h
=
shapex
[
0
];
int
w
=
shapex
[
3
];
im_dst_y
=
h
*
UP_DIV
(
c
,
C4NUM
);
im_dst_x
=
w
;
}
else
if
(
out_tensors_
[
0
]
->
GetFormat
()
==
schema
::
Format_NHWC4
)
{
int
h
=
shapex
[
1
];
int
h
=
shapex
[
0
]
*
shapex
[
1
];
int
w
=
shapex
[
2
];
int
c
=
shapex
[
3
];
im_dst_x
=
w
*
UP_DIV
(
c
,
C4NUM
);
...
...
mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc
浏览文件 @
f6783881
...
...
@@ -32,13 +32,6 @@ class TestBatchnormOpenCLfp16 : public mindspore::CommonTest {
TestBatchnormOpenCLfp16
()
{}
};
template
<
typename
T
>
void
CompareOutputData1
(
T
*
output_data
,
T
*
correct_data
,
int
size
,
float
err_bound
)
{
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
T
abs
=
fabs
(
output_data
[
i
]
-
correct_data
[
i
]);
ASSERT_LE
(
abs
,
err_bound
);
}
}
TEST_F
(
TestBatchnormOpenCLfp16
,
Batchnormfp16input_dim4
)
{
MS_LOG
(
INFO
)
<<
"begin test"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
...
...
@@ -46,7 +39,7 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) {
ocl_runtime
->
Init
();
auto
allocator
=
ocl_runtime
->
GetAllocator
();
MS_LOG
(
INFO
)
<<
"
Read tensors from .bin
"
;
MS_LOG
(
INFO
)
<<
"
Read tensors from .bin
"
;
std
::
vector
<
int
>
input_shape
=
{
1
,
256
,
256
,
48
};
std
::
vector
<
int
>
output_shape
=
{
1
,
256
,
256
,
48
};
auto
data_type
=
kNumberTypeFloat32
;
...
...
@@ -59,7 +52,7 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) {
std
::
string
var_path
=
"./test_data/batchnorm_varfp16.bin"
;
std
::
string
offset_path
=
"./test_data/batchnorm_offsetfp16.bin"
;
std
::
string
scale_path
=
"./test_data/batchnorm_scalefp16.bin"
;
std
::
string
output_path
=
"./test_data/batchnorm_
out_
datafp16.bin"
;
std
::
string
output_path
=
"./test_data/batchnorm_
correct
datafp16.bin"
;
auto
input_data
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
input_path
.
c_str
(),
&
input_size
));
auto
correct_data
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
output_path
.
c_str
(),
&
output_size
));
size_t
mean_size
,
var_size
,
scale_size
,
offset_size
;
...
...
@@ -68,7 +61,7 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) {
auto
scale_data
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
scale_path
.
c_str
(),
&
scale_size
));
auto
offset_data
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
offset_path
.
c_str
(),
&
offset_size
));
MS_LOG
(
INFO
)
<<
"
construct tensors
"
;
MS_LOG
(
INFO
)
<<
"
construct tensors
"
;
lite
::
tensor
::
Tensor
*
tensor_data
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
schema
::
Format_NHWC
,
tensor_type
);
lite
::
tensor
::
Tensor
*
tensor_mean
=
...
...
@@ -81,13 +74,13 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) {
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
{
1
,
1
,
1
,
input_shape
[
3
]},
schema
::
Format_NHWC
,
tensor_type
);
if
(
tensor_data
==
nullptr
||
tensor_mean
==
nullptr
||
tensor_var
==
nullptr
||
tensor_scale
==
nullptr
||
tensor_offset
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"
init tensor failed
"
;
MS_LOG
(
INFO
)
<<
"
init tensor failed
"
;
return
;
}
auto
*
output_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
output_shape
,
schema
::
Format_NHWC4
,
tensor_type
);
if
(
output_tensor
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"
init tensor failed
"
;
MS_LOG
(
INFO
)
<<
"
init tensor failed
"
;
delete
tensor_data
;
delete
tensor_mean
;
delete
tensor_var
;
...
...
@@ -98,10 +91,10 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) {
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
=
{
tensor_data
,
tensor_scale
,
tensor_offset
,
tensor_mean
,
tensor_var
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
{
output_tensor
};
MS_LOG
(
INFO
)
<<
"
initialize tensors
"
;
MS_LOG
(
INFO
)
<<
"
initialize tensors
"
;
auto
param
=
new
(
std
::
nothrow
)
BatchNormParameter
();
if
(
param
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"
new BatchNormParameter failed
"
;
MS_LOG
(
INFO
)
<<
"
new BatchNormParameter failed
"
;
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
...
...
@@ -111,7 +104,7 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) {
auto
*
batchnorm_kernel
=
new
(
std
::
nothrow
)
kernel
::
BatchNormOpenCLKernel
(
reinterpret_cast
<
OpParameter
*>
(
param
),
inputs
,
outputs
);
if
(
batchnorm_kernel
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"
new kernel::BatchNorm_kernel failed
"
;
MS_LOG
(
INFO
)
<<
"
new kernel::BatchNorm_kernel failed
"
;
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
...
...
@@ -125,11 +118,11 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) {
input_tensor
->
MallocData
(
allocator
);
}
MS_LOG
(
INFO
)
<<
"
initialize sub_graph
"
;
MS_LOG
(
INFO
)
<<
"
initialize sub_graph
"
;
std
::
vector
<
kernel
::
LiteKernel
*>
kernels
{
batchnorm_kernel
};
auto
*
sub_graph
=
new
(
std
::
nothrow
)
kernel
::
SubGraphOpenCLKernel
(
inputs
,
outputs
,
kernels
,
kernels
,
kernels
);
if
(
sub_graph
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"
new kernel::SubGraphOpenCLKernel failed
"
;
MS_LOG
(
INFO
)
<<
"
new kernel::SubGraphOpenCLKernel failed
"
;
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
...
...
@@ -138,7 +131,7 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) {
return
;
}
sub_graph
->
Init
();
MS_LOG
(
INFO
)
<<
"
init tensors
"
;
MS_LOG
(
INFO
)
<<
"
init tensors
"
;
memcpy
(
inputs
[
0
]
->
Data
(),
input_data
,
input_size
);
memcpy
(
inputs
[
1
]
->
Data
(),
scale_data
,
scale_size
);
memcpy
(
inputs
[
2
]
->
Data
(),
offset_data
,
offset_size
);
...
...
@@ -148,7 +141,7 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) {
sub_graph
->
Run
();
auto
*
output_data_gpu
=
reinterpret_cast
<
float16_t
*>
(
output_tensor
->
Data
());
CompareOutputData
1
(
output_data_gpu
,
correct_data
,
output_tensor
->
ElementsNum
(),
0.00
01
);
CompareOutputData
(
output_data_gpu
,
correct_data
,
output_tensor
->
ElementsNum
(),
0.
01
);
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
...
...
@@ -158,15 +151,14 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) {
delete
param
;
delete
batchnorm_kernel
;
delete
sub_graph
;
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
TEST_F
(
TestBatchnormOpenCLfp32
,
Batchnormfp32input_dim4
)
{
MS_LOG
(
INFO
)
<<
"
begin test
"
;
MS_LOG
(
INFO
)
<<
"
begin test
"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
Init
();
auto
allocator
=
ocl_runtime
->
GetAllocator
();
MS_LOG
(
INFO
)
<<
"
Read tensors from .bin
"
;
MS_LOG
(
INFO
)
<<
"
Read tensors from .bin
"
;
std
::
vector
<
int
>
input_shape
=
{
1
,
256
,
256
,
47
};
std
::
vector
<
int
>
output_shape
=
{
1
,
256
,
256
,
47
};
auto
data_type
=
kNumberTypeFloat32
;
...
...
@@ -188,7 +180,7 @@ TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) {
auto
scale_data
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
scale_path
.
c_str
(),
&
scale_size
));
auto
offset_data
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
offset_path
.
c_str
(),
&
offset_size
));
MS_LOG
(
INFO
)
<<
"
construct tensors
"
;
MS_LOG
(
INFO
)
<<
"
construct tensors
"
;
lite
::
tensor
::
Tensor
*
tensor_data
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
schema
::
Format_NHWC
,
tensor_type
);
lite
::
tensor
::
Tensor
*
tensor_mean
=
...
...
@@ -201,13 +193,13 @@ TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) {
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
{
1
,
1
,
1
,
input_shape
[
3
]},
schema
::
Format_NHWC
,
tensor_type
);
if
(
tensor_data
==
nullptr
||
tensor_mean
==
nullptr
||
tensor_var
==
nullptr
||
tensor_scale
==
nullptr
||
tensor_offset
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"
init tensor failed
"
;
MS_LOG
(
INFO
)
<<
"
init tensor failed
"
;
return
;
}
auto
*
output_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
output_shape
,
schema
::
Format_NHWC4
,
tensor_type
);
if
(
output_tensor
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"
init tensor failed
"
;
MS_LOG
(
INFO
)
<<
"
init tensor failed
"
;
delete
tensor_data
;
delete
tensor_mean
;
delete
tensor_var
;
...
...
@@ -218,10 +210,10 @@ TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) {
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
=
{
tensor_data
,
tensor_scale
,
tensor_offset
,
tensor_mean
,
tensor_var
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
{
output_tensor
};
MS_LOG
(
INFO
)
<<
"
initialize tensors
"
;
MS_LOG
(
INFO
)
<<
"
initialize tensors
"
;
auto
param
=
new
(
std
::
nothrow
)
BatchNormParameter
();
if
(
param
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"
new BatchNormParameter failed
"
;
MS_LOG
(
INFO
)
<<
"
new BatchNormParameter failed
"
;
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
...
...
@@ -231,7 +223,7 @@ TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) {
auto
*
batchnorm_kernel
=
new
(
std
::
nothrow
)
kernel
::
BatchNormOpenCLKernel
(
reinterpret_cast
<
OpParameter
*>
(
param
),
inputs
,
outputs
);
if
(
batchnorm_kernel
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"
new kernel::BatchNorm_kernel failed
"
;
MS_LOG
(
INFO
)
<<
"
new kernel::BatchNorm_kernel failed
"
;
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
...
...
@@ -245,11 +237,11 @@ TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) {
input_tensor
->
MallocData
(
allocator
);
}
MS_LOG
(
INFO
)
<<
"
initialize sub_graph
"
;
MS_LOG
(
INFO
)
<<
"
initialize sub_graph
"
;
std
::
vector
<
kernel
::
LiteKernel
*>
kernels
{
batchnorm_kernel
};
auto
*
sub_graph
=
new
(
std
::
nothrow
)
kernel
::
SubGraphOpenCLKernel
(
inputs
,
outputs
,
kernels
,
kernels
,
kernels
);
if
(
sub_graph
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"
new kernel::SubGraphOpenCLKernel failed
"
;
MS_LOG
(
INFO
)
<<
"
new kernel::SubGraphOpenCLKernel failed
"
;
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
...
...
@@ -258,7 +250,7 @@ TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) {
return
;
}
sub_graph
->
Init
();
MS_LOG
(
INFO
)
<<
"
init tensors
"
;
MS_LOG
(
INFO
)
<<
"
init tensors
"
;
memcpy
(
inputs
[
0
]
->
Data
(),
input_data
,
input_size
);
memcpy
(
inputs
[
1
]
->
Data
(),
scale_data
,
scale_size
);
memcpy
(
inputs
[
2
]
->
Data
(),
offset_data
,
offset_size
);
...
...
@@ -268,7 +260,7 @@ TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) {
sub_graph
->
Run
();
auto
*
output_data_gpu
=
reinterpret_cast
<
float
*>
(
output_tensor
->
Data
());
CompareOutputData
1
(
output_data_gpu
,
correct_data
,
output_tensor
->
ElementsNum
(),
0.0001
);
CompareOutputData
(
output_data_gpu
,
correct_data
,
output_tensor
->
ElementsNum
(),
0.0001
);
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
...
...
@@ -278,6 +270,5 @@ TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) {
delete
param
;
delete
batchnorm_kernel
;
delete
sub_graph
;
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
}
// namespace mindspore
mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc
浏览文件 @
f6783881
...
...
@@ -18,70 +18,10 @@
#include "utils/log_adapter.h"
#include "common/common_test.h"
#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h"
#include "mindspore/lite/src/common/file_utils.h"
#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
#include "mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h"
template
<
typename
T
>
void
ConcatComputeByCPU_2input_dim4_axis3
(
const
T
*
input0
,
const
T
*
input1
,
T
*
output
,
std
::
vector
<
int
>
input_shape0
,
std
::
vector
<
int
>
input_shape1
,
std
::
vector
<
int
>
output_shape
,
const
int
axis
)
{
int
postion
,
index0
=
0
,
index1
=
0
;
for
(
int
i
=
0
;
i
<
output_shape
[
0
];
i
++
)
{
for
(
int
j
=
0
;
j
<
output_shape
[
1
];
j
++
)
{
for
(
int
k
=
0
;
k
<
output_shape
[
2
];
k
++
)
{
postion
=
i
*
output_shape
[
1
]
*
output_shape
[
2
]
*
output_shape
[
3
]
+
j
*
output_shape
[
2
]
*
output_shape
[
3
]
+
k
*
output_shape
[
3
];
for
(
int
w
=
0
;
w
<
output_shape
[
3
];
w
++
)
{
if
(
w
<
input_shape0
[
3
]
+
input_shape1
[
3
])
{
output
[
postion
++
]
=
(
w
<
input_shape0
[
3
])
?
input0
[
index0
++
]
:
input1
[
index1
++
];
}
else
{
for
(
int
ind
=
input_shape0
[
3
]
+
input_shape1
[
3
];
ind
<
output_shape
[
3
];
ind
++
)
{
output
[
postion
++
]
=
0
;
}
}
}
}
}
}
}
template
<
typename
T
>
void
ConcatComputeByCPU_3input_dim4_axis3
(
T
*
input0
,
T
*
input1
,
T
*
input2
,
T
*
output
,
std
::
vector
<
int
>
input_shape0
,
std
::
vector
<
int
>
input_shape1
,
std
::
vector
<
int
>
input_shape2
,
std
::
vector
<
int
>
output_shape
,
const
int
axis
)
{
int
postion
,
index0
=
0
,
index1
=
0
,
index2
=
0
;
for
(
int
i
=
0
;
i
<
output_shape
[
0
];
i
++
)
{
for
(
int
j
=
0
;
j
<
output_shape
[
1
];
j
++
)
{
for
(
int
k
=
0
;
k
<
output_shape
[
2
];
k
++
)
{
postion
=
i
*
output_shape
[
1
]
*
output_shape
[
2
]
*
output_shape
[
3
]
+
j
*
output_shape
[
2
]
*
output_shape
[
3
]
+
k
*
output_shape
[
3
];
for
(
int
w
=
0
;
w
<
output_shape
[
3
];
w
++
)
{
if
(
w
<
input_shape0
[
3
])
{
int
align
=
UP_DIV
(
input_shape0
[
3
],
4
)
*
4
;
index0
=
i
*
input_shape0
[
1
]
*
input_shape0
[
2
]
*
align
+
j
*
input_shape0
[
2
]
*
align
+
k
*
align
+
w
;
output
[
postion
++
]
=
input0
[
index0
];
}
else
if
(
w
>=
input_shape0
[
3
]
&&
w
<
(
input_shape0
[
3
]
+
input_shape1
[
3
]))
{
int
align
=
UP_DIV
(
input_shape1
[
3
],
4
)
*
4
;
index1
=
i
*
input_shape1
[
1
]
*
input_shape1
[
2
]
*
align
+
j
*
input_shape1
[
2
]
*
align
+
k
*
align
+
w
-
input_shape0
[
3
];
output
[
postion
++
]
=
input1
[
index1
];
}
else
if
((
input_shape0
[
3
]
+
input_shape1
[
3
])
<=
w
&&
w
<
(
input_shape0
[
3
]
+
input_shape1
[
3
]
+
input_shape2
[
3
]))
{
int
align
=
UP_DIV
(
input_shape2
[
3
],
4
)
*
4
;
index2
=
i
*
input_shape2
[
1
]
*
input_shape2
[
2
]
*
align
+
j
*
input_shape2
[
2
]
*
align
+
k
*
align
+
w
-
input_shape0
[
3
]
-
input_shape1
[
3
];
output
[
postion
++
]
=
input2
[
index2
];
}
else
{
for
(
int
ind
=
input_shape0
[
3
]
+
input_shape1
[
3
]
+
input_shape2
[
3
];
ind
<
output_shape
[
3
];
ind
++
)
{
output
[
postion
++
]
=
0
;
}
break
;
}
}
}
}
}
}
namespace
mindspore
{
class
TestConcatOpenCLfp32
:
public
mindspore
::
CommonTest
{
public:
...
...
@@ -100,17 +40,29 @@ void CompareOutputData1(T *output_data, T *correct_data, int size, float err_bou
}
}
TEST_F
(
TestConcatOpenCLfp16
,
ConcatFp16_2input_dim4_axis3
)
{
MS_LOG
(
INFO
)
<<
"
begin test
"
;
MS_LOG
(
INFO
)
<<
"
begin test
"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
SetFp16Enable
(
true
);
ocl_runtime
->
Init
();
auto
allocator
=
ocl_runtime
->
GetAllocator
();
MS_LOG
(
INFO
)
<<
"init tensors"
;
constexpr
int
INPUT_NUM
=
3
;
std
::
array
<
std
::
vector
<
int
>
,
INPUT_NUM
>
input_shapes
=
{
std
::
vector
<
int
>
{
1
,
16
,
256
,
80
},
std
::
vector
<
int
>
{
1
,
16
,
256
,
80
},
std
::
vector
<
int
>
{
1
,
16
,
256
,
80
}};
std
::
vector
<
int
>
output_shape
=
{
1
,
16
,
256
,
240
};
// get the input from .bin
size_t
input1_size
,
input2_size
,
input3_size
,
output_size
;
std
::
string
input1Ppath
=
"./test_data/concatfp16_input1.bin"
;
std
::
string
input2Ppath
=
"./test_data/concatfp16_input2.bin"
;
std
::
string
input3Ppath
=
"./test_data/concatfp16_input3.bin"
;
std
::
string
correctOutputPath
=
"./test_data/concatfp16_output.bin"
;
auto
input_data1
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
input1Ppath
.
c_str
(),
&
input1_size
));
auto
input_data2
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
input2Ppath
.
c_str
(),
&
input2_size
));
auto
input_data3
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
input3Ppath
.
c_str
(),
&
input3_size
));
auto
correctOutput
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
correctOutputPath
.
c_str
(),
&
output_size
));
MS_LOG
(
INFO
)
<<
" init tensors "
;
constexpr
int
INPUT_NUM
=
2
;
std
::
array
<
std
::
vector
<
int
>
,
INPUT_NUM
>
input_shapes
=
{
std
::
vector
<
int
>
{
1
,
19
,
19
,
96
},
std
::
vector
<
int
>
{
1
,
19
,
19
,
96
}};
std
::
vector
<
int
>
output_shape
=
{
2
,
19
,
19
,
96
};
auto
data_type
=
kNumberTypeFloat16
;
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
;
...
...
@@ -118,26 +70,26 @@ TEST_F(TestConcatOpenCLfp16, ConcatFp16_2input_dim4_axis3) {
auto
input_temp
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
shape
,
schema
::
Format_NHWC4
,
tensor_type
);
inputs
.
push_back
(
input_temp
);
if
(
input_temp
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"
new input_tensor failed
"
;
MS_LOG
(
INFO
)
<<
"
new input_tensor failed
"
;
return
;
}
}
auto
*
output_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
output_shape
,
schema
::
Format_NHWC4
,
tensor_type
);
if
(
output_tensor
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"
new output_tensor failed
"
;
MS_LOG
(
INFO
)
<<
"
new output_tensor failed
"
;
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
return
;
}
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
{
output_tensor
};
MS_LOG
(
INFO
)
<<
"
input_shapes size
=: "
<<
input_shapes
.
size
();
MS_LOG
(
INFO
)
<<
"
input_shapes size
=: "
<<
input_shapes
.
size
();
MS_LOG
(
INFO
)
<<
"
initialize tensors
"
;
MS_LOG
(
INFO
)
<<
"
initialize tensors
"
;
auto
param
=
new
(
std
::
nothrow
)
ConcatParameter
();
if
(
param
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"
new ConcatParameter failed
"
;
MS_LOG
(
INFO
)
<<
"
new ConcatParameter failed
"
;
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
...
...
@@ -146,11 +98,11 @@ TEST_F(TestConcatOpenCLfp16, ConcatFp16_2input_dim4_axis3) {
}
return
;
}
param
->
axis_
=
3
;
param
->
axis_
=
0
;
auto
*
concat_kernel
=
new
(
std
::
nothrow
)
kernel
::
ConcatOpenCLKernel
(
reinterpret_cast
<
OpParameter
*>
(
param
),
inputs
,
outputs
);
if
(
concat_kernel
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"
new kernel::ConcatOpenCLKernel failed
"
;
MS_LOG
(
INFO
)
<<
"
new kernel::ConcatOpenCLKernel failed
"
;
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
...
...
@@ -165,12 +117,11 @@ TEST_F(TestConcatOpenCLfp16, ConcatFp16_2input_dim4_axis3) {
for
(
auto
&
input_tensor
:
inputs
)
{
input_tensor
->
MallocData
(
allocator
);
}
MS_LOG
(
INFO
)
<<
"initialize sub_graph"
;
MS_LOG
(
INFO
)
<<
" initialize sub_graph "
;
std
::
vector
<
kernel
::
LiteKernel
*>
kernels
{
concat_kernel
};
auto
*
sub_graph
=
new
(
std
::
nothrow
)
kernel
::
SubGraphOpenCLKernel
(
inputs
,
outputs
,
kernels
,
kernels
,
kernels
);
if
(
sub_graph
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"
new kernel::SubGraphOpenCLKernel failed
"
;
MS_LOG
(
INFO
)
<<
"
new kernel::SubGraphOpenCLKernel failed
"
;
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
...
...
@@ -182,33 +133,22 @@ TEST_F(TestConcatOpenCLfp16, ConcatFp16_2input_dim4_axis3) {
return
;
}
sub_graph
->
Init
();
unsigned
int
seed
=
123
;
MS_LOG
(
INFO
)
<<
"initialize input data"
;
for
(
auto
&
input_tensor
:
inputs
)
{
auto
input_data
=
reinterpret_cast
<
float16_t
*>
(
input_tensor
->
Data
());
for
(
int
i
=
0
;
i
<
input_tensor
->
ElementsNum
();
++
i
)
{
input_data
[
i
]
=
static_cast
<
float16_t
>
(
rand_r
(
&
seed
)
%
10
+
1
);
}
}
// compute the result for CPU
auto
*
input_data0
=
reinterpret_cast
<
float16_t
*>
(
inputs
[
0
]
->
Data
());
auto
*
input_data1
=
reinterpret_cast
<
float16_t
*>
(
inputs
[
1
]
->
Data
());
std
::
vector
<
float16_t
>
output_data_cpu
(
output_shape
[
0
]
*
output_shape
[
1
]
*
output_shape
[
2
]
*
output_shape
[
3
]);
MS_LOG
(
INFO
)
<<
" initialize input data "
;
if
(
inputs
.
size
()
==
2
)
{
ConcatComputeByCPU_2input_dim4_axis3
(
input_data0
,
input_data1
,
output_data_cpu
.
data
(),
input_shapes
[
0
],
input_shapes
[
1
],
output_shape
,
param
->
axis_
);
}
if
(
inputs
.
size
()
==
3
)
{
auto
*
input_data2
=
reinterpret_cast
<
float16_t
*>
(
inputs
[
2
]
->
Data
());
ConcatComputeByCPU_3input_dim4_axis3
(
input_data0
,
input_data1
,
input_data2
,
output_data_cpu
.
data
(),
input_shapes
[
0
],
input_shapes
[
1
],
input_shapes
[
2
],
output_shape
,
param
->
axis_
);
memcpy
(
inputs
[
0
]
->
Data
(),
input_data1
,
input1_size
);
memcpy
(
inputs
[
1
]
->
Data
(),
input_data2
,
input2_size
);
}
else
if
(
inputs
.
size
()
==
3
)
{
memcpy
(
inputs
[
0
]
->
Data
(),
input_data1
,
input1_size
);
memcpy
(
inputs
[
1
]
->
Data
(),
input_data2
,
input2_size
);
memcpy
(
inputs
[
2
]
->
Data
(),
input_data3
,
input3_size
);
}
else
{
MS_LOG
(
ERROR
)
<<
" input size must be 2 or 3"
;
}
std
::
cout
<<
"==================output data================"
<<
std
::
endl
;
sub_graph
->
Run
();
auto
*
output_data_gpu
=
reinterpret_cast
<
float16_t
*>
(
output_tensor
->
Data
());
CompareOutputData1
(
output_data_gpu
,
output_data_cpu
.
data
(),
output_tensor
->
ElementsNum
(),
0.
00001
);
CompareOutputData1
(
output_data_gpu
,
correctOutput
,
output_tensor
->
ElementsNum
(),
0.0
00001
);
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
...
...
@@ -218,47 +158,57 @@ TEST_F(TestConcatOpenCLfp16, ConcatFp16_2input_dim4_axis3) {
delete
param
;
delete
concat_kernel
;
delete
sub_graph
;
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
TEST_F
(
TestConcatOpenCLfp32
,
ConcatFp32_2input_dim4_axis3
)
{
MS_LOG
(
INFO
)
<<
"
begin test
"
;
MS_LOG
(
INFO
)
<<
"
begin test
"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
Init
();
auto
allocator
=
ocl_runtime
->
GetAllocator
();
MS_LOG
(
INFO
)
<<
"init tensors"
;
// get the input from .bin
size_t
input1_size
,
input2_size
,
input3_size
,
output_size
;
std
::
string
input1Ppath
=
"./test_data/concat_input1.bin"
;
std
::
string
input2Ppath
=
"./test_data/concat_input2.bin"
;
std
::
string
input3Ppath
=
"./test_data/concat_input3.bin"
;
std
::
string
correctOutputPath
=
"./test_data/concat_output.bin"
;
auto
input_data1
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
input1Ppath
.
c_str
(),
&
input1_size
));
auto
input_data2
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
input2Ppath
.
c_str
(),
&
input2_size
));
auto
input_data3
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
input3Ppath
.
c_str
(),
&
input3_size
));
auto
correctOutput
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
correctOutputPath
.
c_str
(),
&
output_size
));
MS_LOG
(
INFO
)
<<
" init tensors "
;
constexpr
int
INPUT_NUM
=
3
;
std
::
array
<
std
::
vector
<
int
>
,
INPUT_NUM
>
input_shapes
=
{
std
::
vector
<
int
>
{
1
,
16
,
256
,
80
},
std
::
vector
<
int
>
{
1
,
16
,
256
,
80
},
std
::
vector
<
int
>
{
1
,
16
,
256
,
80
}};
std
::
vector
<
int
>
output_shape
=
{
1
,
16
,
256
,
24
0
};
std
::
vector
<
int
>
output_shape
=
{
1
,
48
,
256
,
8
0
};
auto
data_type
=
kNumberTypeFloat32
;
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
;
for
(
auto
&
shape
:
input_shapes
)
{
auto
input_temp
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
shape
,
schema
::
Format_NHWC
4
,
tensor_type
);
auto
input_temp
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
shape
,
schema
::
Format_NHWC
,
tensor_type
);
inputs
.
push_back
(
input_temp
);
if
(
input_temp
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"
new input_tensor failed
"
;
MS_LOG
(
INFO
)
<<
"
new input_tensor failed
"
;
return
;
}
}
auto
*
output_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
output_shape
,
schema
::
Format_NHWC
4
,
tensor_type
);
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
output_shape
,
schema
::
Format_NHWC
,
tensor_type
);
if
(
output_tensor
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"
new output_tensor failed
"
;
MS_LOG
(
INFO
)
<<
"
new output_tensor failed
"
;
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
return
;
}
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
{
output_tensor
};
MS_LOG
(
INFO
)
<<
"input_shapes size=: "
<<
input_shapes
.
size
();
MS_LOG
(
INFO
)
<<
"
input_shapes size=: "
<<
input_shapes
.
size
();
MS_LOG
(
INFO
)
<<
"
initialize tensors
"
;
MS_LOG
(
INFO
)
<<
"
initialize tensors
"
;
auto
param
=
new
(
std
::
nothrow
)
ConcatParameter
();
if
(
param
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"
new ConcatParameter failed
"
;
MS_LOG
(
INFO
)
<<
"
new ConcatParameter failed
"
;
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
...
...
@@ -267,11 +217,11 @@ TEST_F(TestConcatOpenCLfp32, ConcatFp32_2input_dim4_axis3) {
}
return
;
}
param
->
axis_
=
3
;
param
->
axis_
=
1
;
auto
*
concat_kernel
=
new
(
std
::
nothrow
)
kernel
::
ConcatOpenCLKernel
(
reinterpret_cast
<
OpParameter
*>
(
param
),
inputs
,
outputs
);
if
(
concat_kernel
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"
new kernel::ConcatOpenCLKernel failed
"
;
MS_LOG
(
INFO
)
<<
"
new kernel::ConcatOpenCLKernel failed
"
;
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
...
...
@@ -287,11 +237,11 @@ TEST_F(TestConcatOpenCLfp32, ConcatFp32_2input_dim4_axis3) {
input_tensor
->
MallocData
(
allocator
);
}
MS_LOG
(
INFO
)
<<
"
initialize sub_graph
"
;
MS_LOG
(
INFO
)
<<
"
initialize sub_graph
"
;
std
::
vector
<
kernel
::
LiteKernel
*>
kernels
{
concat_kernel
};
auto
*
sub_graph
=
new
(
std
::
nothrow
)
kernel
::
SubGraphOpenCLKernel
(
inputs
,
outputs
,
kernels
,
kernels
,
kernels
);
if
(
sub_graph
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"
new kernel::SubGraphOpenCLKernel failed
"
;
MS_LOG
(
INFO
)
<<
"
new kernel::SubGraphOpenCLKernel failed
"
;
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
...
...
@@ -303,33 +253,22 @@ TEST_F(TestConcatOpenCLfp32, ConcatFp32_2input_dim4_axis3) {
return
;
}
sub_graph
->
Init
();
unsigned
int
seed
=
123
;
MS_LOG
(
INFO
)
<<
"initialize input data"
;
for
(
auto
&
input_tensor
:
inputs
)
{
auto
input_data
=
reinterpret_cast
<
float
*>
(
input_tensor
->
Data
());
for
(
int
i
=
0
;
i
<
input_tensor
->
ElementsNum
();
++
i
)
{
input_data
[
i
]
=
static_cast
<
float
>
(
rand_r
(
&
seed
)
%
10
+
1
);
}
}
// compute the result for CPU
auto
*
input_data0
=
reinterpret_cast
<
float
*>
(
inputs
[
0
]
->
Data
());
auto
*
input_data1
=
reinterpret_cast
<
float
*>
(
inputs
[
1
]
->
Data
());
std
::
vector
<
float
>
output_data_cpu
(
output_shape
[
0
]
*
output_shape
[
1
]
*
output_shape
[
2
]
*
output_shape
[
3
]);
MS_LOG
(
INFO
)
<<
" initialize input data "
;
if
(
inputs
.
size
()
==
2
)
{
ConcatComputeByCPU_2input_dim4_axis3
(
input_data0
,
input_data1
,
output_data_cpu
.
data
(),
input_shapes
[
0
],
input_shapes
[
1
],
output_shape
,
param
->
axis_
);
}
if
(
inputs
.
size
()
==
3
)
{
auto
*
input_data2
=
reinterpret_cast
<
float
*>
(
inputs
[
2
]
->
Data
());
ConcatComputeByCPU_3input_dim4_axis3
(
input_data0
,
input_data1
,
input_data2
,
output_data_cpu
.
data
(),
input_shapes
[
0
],
input_shapes
[
1
],
input_shapes
[
2
],
output_shape
,
param
->
axis_
);
memcpy
(
inputs
[
0
]
->
Data
(),
input_data1
,
input1_size
);
memcpy
(
inputs
[
1
]
->
Data
(),
input_data2
,
input2_size
);
}
else
if
(
inputs
.
size
()
==
3
)
{
memcpy
(
inputs
[
0
]
->
Data
(),
input_data1
,
input1_size
);
memcpy
(
inputs
[
1
]
->
Data
(),
input_data2
,
input2_size
);
memcpy
(
inputs
[
2
]
->
Data
(),
input_data3
,
input3_size
);
}
else
{
MS_LOG
(
ERROR
)
<<
" input size must be 2 or 3 "
;
}
std
::
cout
<<
"==================output data================"
<<
std
::
endl
;
sub_graph
->
Run
();
auto
*
output_data_gpu
=
reinterpret_cast
<
float
*>
(
output_tensor
->
Data
());
CompareOutputData1
(
output_data_gpu
,
output_data_cpu
.
data
()
,
output_tensor
->
ElementsNum
(),
0.00001
);
CompareOutputData1
(
output_data_gpu
,
correctOutput
,
output_tensor
->
ElementsNum
(),
0.00001
);
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
...
...
@@ -339,6 +278,5 @@ TEST_F(TestConcatOpenCLfp32, ConcatFp32_2input_dim4_axis3) {
delete
param
;
delete
concat_kernel
;
delete
sub_graph
;
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
}
// namespace mindspore
mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc
浏览文件 @
f6783881
...
...
@@ -41,44 +41,43 @@ void CompareOutputData1(T *output_data, T *correct_data, int size, float err_bou
}
TEST_F
(
TestSliceOpenCLfp32
,
Slicefp32input_dim4
)
{
MS_LOG
(
INFO
)
<<
"
begin test
"
;
MS_LOG
(
INFO
)
<<
"
begin test
"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
Init
();
auto
allocator
=
ocl_runtime
->
GetAllocator
();
MS_LOG
(
INFO
)
<<
"
Read tensors from .bin
"
;
std
::
vector
<
int
>
input_shape
=
{
1
,
256
,
256
,
48
};
std
::
vector
<
int
>
output_shape
=
{
1
,
255
,
255
,
15
};
std
::
vector
<
int
>
begin
=
{
0
,
1
,
1
,
7
};
std
::
vector
<
int
>
size
=
{
1
,
255
,
255
,
15
};
MS_LOG
(
INFO
)
<<
"
Read tensors from .bin
"
;
std
::
vector
<
int
>
input_shape
=
{
1
,
19
,
19
,
96
};
std
::
vector
<
int
>
output_shape
=
{
1
,
10
,
10
,
13
};
std
::
vector
<
int
>
begin
=
{
0
,
2
,
3
,
4
};
std
::
vector
<
int
>
size
=
{
1
,
10
,
10
,
13
};
auto
data_type
=
kNumberTypeFloat32
;
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
// get the input from .bin
size_t
input_size
,
output_size
;
std
::
string
input_path
=
"./test_data/in_
data
fp32.bin"
;
std
::
string
output_path
=
"./test_data/out_
data
fp32.bin"
;
std
::
string
input_path
=
"./test_data/in_
slice
fp32.bin"
;
std
::
string
output_path
=
"./test_data/out_
slice
fp32.bin"
;
auto
input_data
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
input_path
.
c_str
(),
&
input_size
));
auto
correct_data
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
output_path
.
c_str
(),
&
output_size
));
MS_LOG
(
INFO
)
<<
"construct tensors"
;
MS_LOG
(
INFO
)
<<
" construct tensors "
;
lite
::
tensor
::
Tensor
*
tensor_data
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
schema
::
Format_NHWC
,
tensor_type
);
if
(
tensor_data
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"
init tensor failed
"
;
MS_LOG
(
INFO
)
<<
"
init tensor failed
"
;
return
;
}
auto
*
output_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
output_shape
,
schema
::
Format_NHWC4
,
tensor_type
);
if
(
output_tensor
==
nullptr
)
{
delete
tensor_data
;
MS_LOG
(
INFO
)
<<
"
init tensor failed
"
;
MS_LOG
(
INFO
)
<<
"
init tensor failed
"
;
return
;
}
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
=
{
tensor_data
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
=
{
output_tensor
};
MS_LOG
(
INFO
)
<<
"setting SliceParameter"
;
MS_LOG
(
INFO
)
<<
"setting SliceParameter
"
;
auto
param
=
new
(
std
::
nothrow
)
SliceParameter
();
if
(
param
==
nullptr
)
{
for
(
auto
tensor
:
inputs
)
{
...
...
@@ -87,7 +86,7 @@ TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) {
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
MS_LOG
(
INFO
)
<<
"new SliceParameter failed"
;
MS_LOG
(
INFO
)
<<
"new SliceParameter failed
"
;
return
;
}
for
(
int
i
=
0
;
i
<
input_shape
.
size
();
i
++
)
{
...
...
@@ -105,7 +104,7 @@ TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) {
delete
tensor
;
}
delete
param
;
MS_LOG
(
INFO
)
<<
"new kernel::slice_kernel failed"
;
MS_LOG
(
INFO
)
<<
"new kernel::slice_kernel failed
"
;
return
;
}
slice_kernel
->
Init
();
...
...
@@ -115,7 +114,7 @@ TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) {
input_tensor
->
MallocData
(
allocator
);
}
MS_LOG
(
INFO
)
<<
"
initialize sub_graph
"
;
MS_LOG
(
INFO
)
<<
"
initialize sub_graph
"
;
std
::
vector
<
kernel
::
LiteKernel
*>
kernels
{
slice_kernel
};
auto
*
sub_graph
=
new
(
std
::
nothrow
)
kernel
::
SubGraphOpenCLKernel
(
inputs
,
outputs
,
kernels
,
kernels
,
kernels
);
if
(
sub_graph
==
nullptr
)
{
...
...
@@ -127,12 +126,12 @@ TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) {
}
delete
param
;
delete
slice_kernel
;
MS_LOG
(
INFO
)
<<
"
new kernel::SubGraphOpenCLKernel failed
"
;
MS_LOG
(
INFO
)
<<
"
new kernel::SubGraphOpenCLKernel failed
"
;
return
;
}
sub_graph
->
Init
();
MS_LOG
(
INFO
)
<<
"
init tensors
"
;
MS_LOG
(
INFO
)
<<
"
init tensors
"
;
memcpy
(
inputs
[
0
]
->
Data
(),
input_data
,
input_size
);
std
::
cout
<<
"==================output data================"
<<
std
::
endl
;
...
...
@@ -148,16 +147,15 @@ TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) {
}
delete
slice_kernel
;
delete
sub_graph
;
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
TEST_F
(
TestSliceOpenCLfp16
,
Slicefp16input_dim4
)
{
MS_LOG
(
INFO
)
<<
"
begin test
"
;
MS_LOG
(
INFO
)
<<
"
begin test
"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
SetFp16Enable
(
true
);
ocl_runtime
->
Init
();
auto
allocator
=
ocl_runtime
->
GetAllocator
();
MS_LOG
(
INFO
)
<<
"
Read tensors from .bin
"
;
MS_LOG
(
INFO
)
<<
"
Read tensors from .bin
"
;
std
::
vector
<
int
>
input_shape
=
{
1
,
256
,
256
,
48
};
std
::
vector
<
int
>
output_shape
=
{
1
,
255
,
255
,
15
};
std
::
vector
<
int
>
begin
=
{
0
,
1
,
1
,
7
};
...
...
@@ -172,24 +170,24 @@ TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) {
auto
input_data
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
input_path
.
c_str
(),
&
input_size
));
auto
correct_data
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
output_path
.
c_str
(),
&
output_size
));
MS_LOG
(
INFO
)
<<
"
construct tensors
"
;
MS_LOG
(
INFO
)
<<
"
construct tensors
"
;
lite
::
tensor
::
Tensor
*
tensor_data
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
schema
::
Format_NHWC
,
tensor_type
);
if
(
tensor_data
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"
init tensor failed
"
;
MS_LOG
(
INFO
)
<<
"
init tensor failed
"
;
return
;
}
auto
*
output_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
output_shape
,
schema
::
Format_NHWC4
,
tensor_type
);
if
(
output_tensor
==
nullptr
)
{
delete
tensor_data
;
MS_LOG
(
INFO
)
<<
"
init tensor failed
"
;
MS_LOG
(
INFO
)
<<
"
init tensor failed
"
;
return
;
}
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
=
{
tensor_data
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
=
{
output_tensor
};
MS_LOG
(
INFO
)
<<
"
setting SliceParameter
"
;
MS_LOG
(
INFO
)
<<
"
setting SliceParameter
"
;
auto
param
=
new
(
std
::
nothrow
)
SliceParameter
();
if
(
param
==
nullptr
)
{
for
(
auto
tensor
:
inputs
)
{
...
...
@@ -198,10 +196,10 @@ TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) {
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
MS_LOG
(
INFO
)
<<
"
new SliceParameter failed
"
;
MS_LOG
(
INFO
)
<<
"
new SliceParameter failed
"
;
return
;
}
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
for
(
int
i
=
0
;
i
<
input_shape
.
size
()
;
i
++
)
{
param
->
begin_
[
i
]
=
begin
[
i
];
param
->
size_
[
i
]
=
size
[
i
];
}
...
...
@@ -216,7 +214,7 @@ TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) {
delete
tensor
;
}
delete
param
;
MS_LOG
(
INFO
)
<<
"
new kernel::slice_kernel failed
"
;
MS_LOG
(
INFO
)
<<
"
new kernel::slice_kernel failed
"
;
return
;
}
slice_kernel
->
Init
();
...
...
@@ -226,7 +224,7 @@ TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) {
input_tensor
->
MallocData
(
allocator
);
}
MS_LOG
(
INFO
)
<<
"
initialize sub_graph
"
;
MS_LOG
(
INFO
)
<<
"
initialize sub_graph
"
;
std
::
vector
<
kernel
::
LiteKernel
*>
kernels
{
slice_kernel
};
auto
*
sub_graph
=
new
(
std
::
nothrow
)
kernel
::
SubGraphOpenCLKernel
(
inputs
,
outputs
,
kernels
,
kernels
,
kernels
);
if
(
sub_graph
==
nullptr
)
{
...
...
@@ -238,12 +236,12 @@ TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) {
}
delete
param
;
delete
slice_kernel
;
MS_LOG
(
INFO
)
<<
"
new kernel::SubGraphOpenCLKernel failed
"
;
MS_LOG
(
INFO
)
<<
"
new kernel::SubGraphOpenCLKernel failed
"
;
return
;
}
sub_graph
->
Init
();
MS_LOG
(
INFO
)
<<
"
init tensors
"
;
MS_LOG
(
INFO
)
<<
"
init tensors
"
;
memcpy
(
inputs
[
0
]
->
Data
(),
input_data
,
input_size
);
std
::
cout
<<
"==================output data================"
<<
std
::
endl
;
...
...
@@ -259,6 +257,5 @@ TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) {
}
delete
slice_kernel
;
delete
sub_graph
;
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
}
// namespace mindspore
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录