Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
b022e3d1
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b022e3d1
编写于
8月 18, 2020
作者:
P
Pengyongrong
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add new ops named slice
add ops supported fp16
上级
b4bc1deb
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
450 addition
and
82 deletion
+450
-82
mindspore/lite/src/runtime/kernel/opencl/cl/batchnorm.cl
mindspore/lite/src/runtime/kernel/opencl/cl/batchnorm.cl
+7
-6
mindspore/lite/src/runtime/kernel/opencl/cl/concat.cl
mindspore/lite/src/runtime/kernel/opencl/cl/concat.cl
+11
-11
mindspore/lite/src/runtime/kernel/opencl/cl/slice.cl
mindspore/lite/src/runtime/kernel/opencl/cl/slice.cl
+13
-16
mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
+6
-4
mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
+6
-4
mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
+2
-0
mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc
+7
-4
mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc
...lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc
+139
-17
mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc
...re/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc
+139
-14
mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc
...ore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc
+120
-6
未找到文件。
mindspore/lite/src/runtime/kernel/opencl/cl/batchnorm.cl
浏览文件 @
b022e3d1
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
#
define
INT4
int4
#
define
INT2
int2
__constant
sampler_t
smp_none
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_NONE |
CLK_FILTER_NEAREST
;
...
...
@@ -11,16 +12,16 @@ __kernel void batch_normalization(__read_only image2d_t input, __read_only image
if
(
X
>=
input_shape.y
|
| Y >= input_shape.z |
|
Z
>=
input_shape.w
)
{
return
;
}
FLT4
result
=
read_imagef
(
input,
smp_none,
(
int2
)((
Y
)
*input_shape.w
+
Z,
(
X
)))
;
FLT4
result
=
READ_IMAGE
(
input,
smp_none,
(
int2
)((
Y
)
*input_shape.w
+
Z,
(
X
)))
;
FLT4
result_mean
=
read_imagef
(
mean,
smp_none,
(
int2
)((
Z
)
,
(
0
)))
;
FLT4
result_var
=
read_imagef
(
variance,
smp_none,
(
int2
)((
Z
)
,
(
0
)))
;
FLT4
result_scale
=
read_imagef
(
scale,
smp_none,
(
int2
)((
Z
)
,
(
0
)))
;
FLT4
result_offset
=
read_imagef
(
offset,
smp_none,
(
int2
)((
Z
)
,
(
0
)))
;
FLT4
result_mean
=
READ_IMAGE
(
mean,
smp_none,
(
int2
)((
Z
)
,
(
0
)))
;
FLT4
result_var
=
READ_IMAGE
(
variance,
smp_none,
(
int2
)((
Z
)
,
(
0
)))
;
FLT4
result_scale
=
READ_IMAGE
(
scale,
smp_none,
(
int2
)((
Z
)
,
(
0
)))
;
FLT4
result_offset
=
READ_IMAGE
(
offset,
smp_none,
(
int2
)((
Z
)
,
(
0
)))
;
result.x
=
result_scale.x
*
((
result.x
-
result_mean.x
)
/
sqrt
(
result_var.x
+
epsilon
))
+
result_offset.x
;
result.y
=
result_scale.y
*
((
result.y
-
result_mean.y
)
/
sqrt
(
result_var.y
+
epsilon
))
+
result_offset.y
;
result.z
=
result_scale.z
*
((
result.z
-
result_mean.z
)
/
sqrt
(
result_var.z
+
epsilon
))
+
result_offset.z
;
result.w
=
result_scale.w
*
((
result.w
-
result_mean.w
)
/
sqrt
(
result_var.w
+
epsilon
))
+
result_offset.w
;
write_imagef
(
output,
(
int2
)((
Y
)
*input_shape.w
+
Z,
(
X
))
,
result
)
;
WRITE_IMAGE
(
output,
(
int2
)((
Y
)
*input_shape.w
+
Z,
(
X
))
,
result
)
;
}
mindspore/lite/src/runtime/kernel/opencl/cl/concat.cl
浏览文件 @
b022e3d1
//
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
__constant
sampler_t
smp_none
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_NONE |
CLK_FILTER_NEAREST
;
__kernel
void
Concat
(
__read_only
image2d_t
input0,
__read_only
image2d_t
input1,
__write_only
image2d_t
output,
...
...
@@ -10,11 +10,11 @@ __kernel void Concat(__read_only image2d_t input0, __read_only image2d_t input1,
return;
}
if (Z < input_channels.x) {
FLT4 result =
read_imagef
(input0, smp_none, (int2)((Y)*input_channels.x + Z, (X)));
write_imagef
(output, (int2)((Y)*output_shape.w + Z, (X)), result);
FLT4 result =
READ_IMAGE
(input0, smp_none, (int2)((Y)*input_channels.x + Z, (X)));
WRITE_IMAGE
(output, (int2)((Y)*output_shape.w + Z, (X)), result);
} else {
FLT4 result =
read_imagef
(input1, smp_none, (int2)((Y)*input_channels.y + Z - input_channels.x, (X)));
write_imagef
(output, (int2)((Y)*output_shape.w + Z, (X)), result);
FLT4 result =
READ_IMAGE
(input1, smp_none, (int2)((Y)*input_channels.y + Z - input_channels.x, (X)));
WRITE_IMAGE
(output, (int2)((Y)*output_shape.w + Z, (X)), result);
}
}
...
...
@@ -27,14 +27,14 @@ __kernel void Concat3input(__read_only image2d_t input0, __read_only image2d_t i
return
;
}
if
(
Z
<
input_channels.x
)
{
FLT4
result0
=
read_imagef
(
input0,
smp_none,
(
int2
)((
Y
)
*input_channels.x
+
Z,
(
X
)))
;
write_imagef
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result0
)
;
FLT4
result0
=
READ_IMAGE
(
input0,
smp_none,
(
int2
)((
Y
)
*input_channels.x
+
Z,
(
X
)))
;
WRITE_IMAGE
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result0
)
;
}
else
if
(
Z
<
(
input_channels.x
+
input_channels.y
))
{
FLT4
result1
=
read_imagef
(
input1,
smp_none,
(
int2
)((
Y
)
*input_channels.y
+
Z
-
input_channels.x,
(
X
)))
;
write_imagef
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result1
)
;
FLT4
result1
=
READ_IMAGE
(
input1,
smp_none,
(
int2
)((
Y
)
*input_channels.y
+
Z
-
input_channels.x,
(
X
)))
;
WRITE_IMAGE
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result1
)
;
}
else
{
FLT4
result2
=
read_imagef
(
input2,
smp_none,
(
int2
)((
Y
)
*input_channels.z
+
Z
-
input_channels.x
-
input_channels.y,
(
X
)))
;
write_imagef
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result2
)
;
READ_IMAGE
(
input2,
smp_none,
(
int2
)((
Y
)
*input_channels.z
+
Z
-
input_channels.x
-
input_channels.y,
(
X
)))
;
WRITE_IMAGE
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result2
)
;
}
}
mindspore/lite/src/runtime/kernel/opencl/cl/slice.cl
浏览文件 @
b022e3d1
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
#
define
INT2
int2
#
define
INT4
int4
#
define
FLT4
float4
__constant
sampler_t
smp_none
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_NONE |
CLK_FILTER_NEAREST
;
__kernel
void
slice
(
__read_only
image2d_t
input,
__write_only
image2d_t
output,
INT4
input_shape,
INT4
out_shape,
INT4
begin,
INT2
sharedNoUpdiv
)
{
...
...
@@ -12,46 +12,43 @@ __kernel void slice(__read_only image2d_t input, __write_only image2d_t output,
FLT4
result
;
if
(
sharedNoUpdiv.x
%
4
==
0
)
{
for
(
int
i
=
0
; i < out_shape.w; i++) {
result
=
read_imagef
(
input,
smp_none,
(
INT2
)((
Y
+
begin.z
)
*
input_shape.w
+
(
i
+
begin.w
)
,
(
X
+
begin.y
)))
;
write_imagef
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
i,
(
X
))
,
result
)
;
result
=
READ_IMAGE
(
input,
smp_none,
(
INT2
)((
Y
+
begin.z
)
*
input_shape.w
+
(
i
+
begin.w
)
,
(
X
+
begin.y
)))
;
WRITE_IMAGE
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
i,
(
X
))
,
result
)
;
}
}
else
{
int
begin_postion
=
sharedNoUpdiv.y
%
4
;
FLT4
first
=
read_imagef
(
input,
smp_none,
(
INT2
)((
Y
+
begin.z
)
*
input_shape.w
+
begin.w,
(
X
+
begin.y
)))
;
FLT4
first
=
READ_IMAGE
(
input,
smp_none,
(
INT2
)((
Y
+
begin.z
)
*
input_shape.w
+
begin.w,
(
X
+
begin.y
)))
;
if
(
begin_postion
==
1
)
{
for
(
int
i
=
1
; i <= out_shape.w; i++) {
FLT4
second
=
read_imagef
(
input,
smp_none,
(
INT2
)((
Y
+
begin.z
)
*
input_shape.w
+
(
begin.w
+
i
)
,
(
X
+
begin.y
)))
;
FLT4
second
=
READ_IMAGE
(
input,
smp_none,
(
INT2
)((
Y
+
begin.z
)
*
input_shape.w
+
(
begin.w
+
i
)
,
(
X
+
begin.y
)))
;
result.x
=
first.y
;
result.y
=
first.z
;
result.z
=
first.w
;
result.w
=
second.x
;
write_imagef
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
i
-
1
,
(
X
))
,
result
)
;
WRITE_IMAGE
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
i
-
1
,
(
X
))
,
result
)
;
first.y
=
second.y
;
first.z
=
second.z
;
first.w
=
second.w
;
}
}
else
if
(
begin_postion
==
2
)
{
for
(
int
i
=
1
; i <= out_shape.w; i++) {
FLT4
second
=
read_imagef
(
input,
smp_none,
(
INT2
)((
Y
+
begin.z
)
*
input_shape.w
+
(
begin.w
+
i
)
,
(
X
+
begin.y
)))
;
FLT4
second
=
READ_IMAGE
(
input,
smp_none,
(
INT2
)((
Y
+
begin.z
)
*
input_shape.w
+
(
begin.w
+
i
)
,
(
X
+
begin.y
)))
;
result.x
=
first.z
;
result.y
=
first.w
;
result.z
=
second.x
;
result.w
=
second.y
;
write_imagef
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
i
-
1
,
(
X
))
,
result
)
;
WRITE_IMAGE
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
i
-
1
,
(
X
))
,
result
)
;
first.z
=
second.z
;
first.w
=
second.w
;
}
}
else
{
for
(
int
i
=
1
; i <= out_shape.w; i++) {
FLT4
second
=
read_imagef
(
input,
smp_none,
(
INT2
)((
Y
+
begin.z
)
*
input_shape.w
+
(
begin.w
+
i
)
,
(
X
+
begin.y
)))
;
FLT4
second
=
READ_IMAGE
(
input,
smp_none,
(
INT2
)((
Y
+
begin.z
)
*
input_shape.w
+
(
begin.w
+
i
)
,
(
X
+
begin.y
)))
;
result.x
=
first.w
;
result.y
=
second.x
;
result.z
=
second.y
;
result.w
=
second.z
;
write_imagef
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
i
-
1
,
(
X
))
,
result
)
;
WRITE_IMAGE
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
i
-
1
,
(
X
))
,
result
)
;
first.w
=
second.w
;
}
}
...
...
@@ -64,18 +61,18 @@ __kernel void slice(__read_only image2d_t input, __write_only image2d_t output,
result_fill0.y
=
0
;
result_fill0.z
=
0
;
result_fill0.w
=
0
;
write_imagef
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
out_shape.w
-
1
,
(
X
))
,
result_fill0
)
;
WRITE_IMAGE
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
out_shape.w
-
1
,
(
X
))
,
result_fill0
)
;
}
else
if
(
size
==
2
)
{
result_fill0.x
=
result.x
;
result_fill0.y
=
result.y
;
result_fill0.z
=
0
;
result_fill0.w
=
0
;
write_imagef
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
out_shape.w
-
1
,
(
X
))
,
result_fill0
)
;
WRITE_IMAGE
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
out_shape.w
-
1
,
(
X
))
,
result_fill0
)
;
}
else
if
(
size
==
3
)
{
result_fill0.x
=
result.x
;
result_fill0.y
=
result.y
;
result_fill0.z
=
result.z
;
result_fill0.w
=
0
;
write_imagef
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
out_shape.w
-
1
,
(
X
))
,
result_fill0
)
;
WRITE_IMAGE
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
out_shape.w
-
1
,
(
X
))
,
result_fill0
)
;
}
}
mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
浏览文件 @
b022e3d1
...
...
@@ -38,11 +38,12 @@ int BatchNormOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_siz
im_dst_y
=
out_tensors_
[
0
]
->
Height
()
*
CO4
;
im_dst_x
=
out_tensors_
[
0
]
->
Width
();
}
#ifdef ENABLE_FP16
size_t
img_dtype
=
CL_HALF_FLOAT
;
#else
size_t
img_dtype
=
CL_FLOAT
;
#endif
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
auto
enable_fp16_
=
ocl_runtime
->
GetFp16Enable
();
if
(
enable_fp16_
)
{
img_dtype
=
CL_HALF_FLOAT
;
}
img_size
->
clear
();
std
::
vector
<
size_t
>
vec
{
im_dst_x
,
im_dst_y
,
img_dtype
};
*
img_size
=
vec
;
...
...
@@ -148,4 +149,5 @@ kernel::LiteKernel *OpenCLBatchnormKernelCreator(const std::vector<lite::tensor:
}
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_BatchNorm
,
OpenCLBatchnormKernelCreator
);
REG_KERNEL
(
kGPU
,
kNumberTypeFloat16
,
PrimitiveType_BatchNorm
,
OpenCLBatchnormKernelCreator
);
}
// namespace mindspore::kernel
mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
浏览文件 @
b022e3d1
...
...
@@ -38,11 +38,12 @@ int ConcatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size)
im_dst_y
=
out_tensors_
[
0
]
->
Height
()
*
CO4
;
im_dst_x
=
out_tensors_
[
0
]
->
Width
();
}
#ifdef ENABLE_FP16
size_t
img_dtype
=
CL_HALF_FLOAT
;
#else
size_t
img_dtype
=
CL_FLOAT
;
#endif
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
auto
enable_fp16_
=
ocl_runtime
->
GetFp16Enable
();
if
(
enable_fp16_
)
{
img_dtype
=
CL_HALF_FLOAT
;
}
img_size
->
clear
();
std
::
vector
<
size_t
>
vec
{
im_dst_x
,
im_dst_y
,
img_dtype
};
*
img_size
=
vec
;
...
...
@@ -225,4 +226,5 @@ kernel::LiteKernel *OpenCLConcatKernelCreator(const std::vector<lite::tensor::Te
}
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_Concat
,
OpenCLConcatKernelCreator
);
REG_KERNEL
(
kGPU
,
kNumberTypeFloat16
,
PrimitiveType_Concat
,
OpenCLConcatKernelCreator
);
}
// namespace mindspore::kernel
mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
浏览文件 @
b022e3d1
...
...
@@ -175,4 +175,6 @@ kernel::LiteKernel *OpenCLMatMulKernelCreator(const std::vector<lite::tensor::Te
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_MatMul
,
OpenCLMatMulKernelCreator
)
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_FullConnection
,
OpenCLMatMulKernelCreator
)
REG_KERNEL
(
kGPU
,
kNumberTypeFloat16
,
PrimitiveType_MatMul
,
OpenCLMatMulKernelCreator
)
REG_KERNEL
(
kGPU
,
kNumberTypeFloat16
,
PrimitiveType_FullConnection
,
OpenCLMatMulKernelCreator
)
}
// namespace mindspore::kernel
mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc
浏览文件 @
b022e3d1
...
...
@@ -38,11 +38,12 @@ int SliceOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
im_dst_y
=
out_tensors_
[
0
]
->
Height
()
*
CO4
;
im_dst_x
=
out_tensors_
[
0
]
->
Width
();
}
#ifdef ENABLE_FP16
size_t
img_dtype
=
CL_HALF_FLOAT
;
#else
size_t
img_dtype
=
CL_FLOAT
;
#endif
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
auto
enable_fp16_
=
ocl_runtime
->
GetFp16Enable
();
if
(
enable_fp16_
)
{
img_dtype
=
CL_HALF_FLOAT
;
}
img_size
->
clear
();
std
::
vector
<
size_t
>
vec
{
im_dst_x
,
im_dst_y
,
img_dtype
};
*
img_size
=
vec
;
...
...
@@ -143,4 +144,6 @@ kernel::LiteKernel *OpenCLSliceKernelCreator(const std::vector<lite::tensor::Ten
}
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_Slice
,
OpenCLSliceKernelCreator
);
REG_KERNEL
(
kGPU
,
kNumberTypeFloat16
,
PrimitiveType_Slice
,
OpenCLSliceKernelCreator
);
}
// namespace mindspore::kernel
mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc
浏览文件 @
b022e3d1
...
...
@@ -23,9 +23,13 @@
#include "mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h"
namespace
mindspore
{
class
TestBatchnormOpenCL
:
public
mindspore
::
CommonTest
{
class
TestBatchnormOpenCL
fp32
:
public
mindspore
::
CommonTest
{
public:
TestBatchnormOpenCL
()
{}
TestBatchnormOpenCLfp32
()
{}
};
class
TestBatchnormOpenCLfp16
:
public
mindspore
::
CommonTest
{
public:
TestBatchnormOpenCLfp16
()
{}
};
template
<
typename
T
>
...
...
@@ -35,30 +39,153 @@ void CompareOutputData1(T *output_data, T *correct_data, int size, float err_bou
ASSERT_LE
(
abs
,
err_bound
);
}
}
TEST_F
(
TestBatchnormOpenCLfp16
,
Batchnormfp16input_dim4
)
{
MS_LOG
(
INFO
)
<<
"begin test"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
SetFp16Enable
(
true
);
ocl_runtime
->
Init
();
auto
allocator
=
ocl_runtime
->
GetAllocator
();
MS_LOG
(
INFO
)
<<
"Read tensors from .bin"
;
std
::
vector
<
int
>
input_shape
=
{
1
,
256
,
256
,
48
};
std
::
vector
<
int
>
output_shape
=
{
1
,
256
,
256
,
48
};
auto
data_type
=
kNumberTypeFloat32
;
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
// get the input from .bin
size_t
input_size
,
output_size
;
std
::
string
input_path
=
"./test_data/batchnorm_in_datafp16.bin"
;
std
::
string
mean_path
=
"./test_data/batchnorm_meanfp16.bin"
;
std
::
string
var_path
=
"./test_data/batchnorm_varfp16.bin"
;
std
::
string
offset_path
=
"./test_data/batchnorm_offsetfp16.bin"
;
std
::
string
scale_path
=
"./test_data/batchnorm_scalefp16.bin"
;
std
::
string
output_path
=
"./test_data/batchnorm_out_datafp16.bin"
;
auto
input_data
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
input_path
.
c_str
(),
&
input_size
));
auto
correct_data
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
output_path
.
c_str
(),
&
output_size
));
size_t
mean_size
,
var_size
,
scale_size
,
offset_size
;
auto
mean_data
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
mean_path
.
c_str
(),
&
mean_size
));
auto
var_data
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
var_path
.
c_str
(),
&
var_size
));
auto
scale_data
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
scale_path
.
c_str
(),
&
scale_size
));
auto
offset_data
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
offset_path
.
c_str
(),
&
offset_size
));
MS_LOG
(
INFO
)
<<
"construct tensors"
;
lite
::
tensor
::
Tensor
*
tensor_data
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
schema
::
Format_NHWC
,
tensor_type
);
lite
::
tensor
::
Tensor
*
tensor_mean
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
{
1
,
1
,
1
,
input_shape
[
3
]},
schema
::
Format_NHWC
,
tensor_type
);
lite
::
tensor
::
Tensor
*
tensor_var
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
{
1
,
1
,
1
,
input_shape
[
3
]},
schema
::
Format_NHWC
,
tensor_type
);
lite
::
tensor
::
Tensor
*
tensor_scale
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
{
1
,
1
,
1
,
input_shape
[
3
]},
schema
::
Format_NHWC
,
tensor_type
);
lite
::
tensor
::
Tensor
*
tensor_offset
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
{
1
,
1
,
1
,
input_shape
[
3
]},
schema
::
Format_NHWC
,
tensor_type
);
if
(
tensor_data
==
nullptr
||
tensor_mean
==
nullptr
||
tensor_var
==
nullptr
||
tensor_scale
==
nullptr
||
tensor_offset
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"init tensor failed"
;
return
;
}
auto
*
output_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
output_shape
,
schema
::
Format_NHWC4
,
tensor_type
);
if
(
output_tensor
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"init tensor failed"
;
delete
tensor_data
;
delete
tensor_mean
;
delete
tensor_var
;
delete
tensor_scale
;
delete
tensor_offset
;
return
;
}
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
=
{
tensor_data
,
tensor_scale
,
tensor_offset
,
tensor_mean
,
tensor_var
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
{
output_tensor
};
MS_LOG
(
INFO
)
<<
"initialize tensors"
;
auto
param
=
new
(
std
::
nothrow
)
BatchNormParameter
();
if
(
param
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"new BatchNormParameter failed"
;
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
return
;
}
param
->
epsilon_
=
pow
(
10
,
-
5
);
auto
*
batchnorm_kernel
=
new
(
std
::
nothrow
)
kernel
::
BatchNormOpenCLKernel
(
reinterpret_cast
<
OpParameter
*>
(
param
),
inputs
,
outputs
);
if
(
batchnorm_kernel
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"new kernel::BatchNorm_kernel failed"
;
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
param
;
return
;
}
batchnorm_kernel
->
Init
();
TEST_F
(
TestBatchnormOpenCL
,
Batchnorminput_dim4
)
{
// to do allocate memory for inputs and outputs
for
(
auto
&
input_tensor
:
inputs
)
{
input_tensor
->
MallocData
(
allocator
);
}
MS_LOG
(
INFO
)
<<
"initialize sub_graph"
;
std
::
vector
<
kernel
::
LiteKernel
*>
kernels
{
batchnorm_kernel
};
auto
*
sub_graph
=
new
(
std
::
nothrow
)
kernel
::
SubGraphOpenCLKernel
(
inputs
,
outputs
,
kernels
,
kernels
,
kernels
);
if
(
sub_graph
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"new kernel::SubGraphOpenCLKernel failed"
;
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
param
;
delete
batchnorm_kernel
;
return
;
}
sub_graph
->
Init
();
MS_LOG
(
INFO
)
<<
"init tensors"
;
memcpy
(
inputs
[
0
]
->
Data
(),
input_data
,
input_size
);
memcpy
(
inputs
[
1
]
->
Data
(),
scale_data
,
scale_size
);
memcpy
(
inputs
[
2
]
->
Data
(),
offset_data
,
offset_size
);
memcpy
(
inputs
[
3
]
->
Data
(),
mean_data
,
mean_size
);
memcpy
(
inputs
[
4
]
->
Data
(),
var_data
,
var_size
);
std
::
cout
<<
"==================output data================"
<<
std
::
endl
;
sub_graph
->
Run
();
auto
*
output_data_gpu
=
reinterpret_cast
<
float16_t
*>
(
output_tensor
->
Data
());
CompareOutputData1
(
output_data_gpu
,
correct_data
,
output_tensor
->
ElementsNum
(),
0.0001
);
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
param
;
delete
batchnorm_kernel
;
delete
sub_graph
;
}
TEST_F
(
TestBatchnormOpenCLfp32
,
Batchnormfp32input_dim4
)
{
MS_LOG
(
INFO
)
<<
"begin test"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
Init
();
auto
allocator
=
ocl_runtime
->
GetAllocator
();
MS_LOG
(
INFO
)
<<
"Read tensors from .bin"
;
std
::
vector
<
int
>
input_shape
=
{
1
,
256
,
256
,
16
};
std
::
vector
<
int
>
output_shape
=
{
1
,
256
,
256
,
16
};
std
::
vector
<
int
>
input_shape
=
{
1
,
256
,
256
,
47
};
std
::
vector
<
int
>
output_shape
=
{
1
,
256
,
256
,
47
};
auto
data_type
=
kNumberTypeFloat32
;
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
// get the input from .bin
size_t
input_size
,
output_size
;
std
::
string
input_path
=
"./test_data/in_data.bin"
;
std
::
string
mean_path
=
"./test_data/mean.bin"
;
std
::
string
var_path
=
"./test_data/var.bin"
;
std
::
string
output_path
=
"./test_data/out_data.bin"
;
std
::
string
input_path
=
"./test_data/batchnorm_in_datafp32.bin"
;
std
::
string
mean_path
=
"./test_data/batchnorm_meanfp32.bin"
;
std
::
string
var_path
=
"./test_data/batchnorm_varfp32.bin"
;
std
::
string
offset_path
=
"./test_data/batchnorm_offsetfp32.bin"
;
std
::
string
scale_path
=
"./test_data/batchnorm_scalefp32.bin"
;
std
::
string
output_path
=
"./test_data/batchnorm_out_datafp32.bin"
;
auto
input_data
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
input_path
.
c_str
(),
&
input_size
));
auto
correct_data
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
output_path
.
c_str
(),
&
output_size
));
size_t
mean_size
,
var_size
;
size_t
mean_size
,
var_size
,
scale_size
,
offset_size
;
auto
mean_data
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
mean_path
.
c_str
(),
&
mean_size
));
auto
var_data
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
var_path
.
c_str
(),
&
var_size
));
auto
scale_data
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
scale_path
.
c_str
(),
&
scale_size
));
auto
offset_data
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
offset_path
.
c_str
(),
&
offset_size
));
MS_LOG
(
INFO
)
<<
"construct tensors"
;
lite
::
tensor
::
Tensor
*
tensor_data
=
...
...
@@ -131,14 +258,9 @@ TEST_F(TestBatchnormOpenCL, Batchnorminput_dim4) {
}
sub_graph
->
Init
();
MS_LOG
(
INFO
)
<<
"init tensors"
;
std
::
cout
<<
"init tensors"
<<
std
::
endl
;
memcpy
(
inputs
[
0
]
->
Data
(),
input_data
,
input_size
);
auto
&
temp
=
inputs
[
1
];
auto
tensor_temp
=
reinterpret_cast
<
float
*>
(
temp
->
Data
());
int
UPDIV_tensor_scale
=
UP_DIV
(
tensor_scale
->
ElementsNum
(),
C4NUM
)
*
4
;
for
(
int
i
=
0
;
i
<
UPDIV_tensor_scale
;
++
i
)
{
tensor_temp
[
i
]
=
static_cast
<
float
>
(
1
);
}
memcpy
(
inputs
[
1
]
->
Data
(),
scale_data
,
scale_size
);
memcpy
(
inputs
[
2
]
->
Data
(),
offset_data
,
offset_size
);
memcpy
(
inputs
[
3
]
->
Data
(),
mean_data
,
mean_size
);
memcpy
(
inputs
[
4
]
->
Data
(),
var_data
,
var_size
);
std
::
cout
<<
"==================output data================"
<<
std
::
endl
;
...
...
mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc
浏览文件 @
b022e3d1
...
...
@@ -21,9 +21,10 @@
#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
#include "mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h"
void
ConcatComputeByCPU_2input_dim4_axis3
(
const
float
*
input0
,
const
float
*
input1
,
float
*
output
,
std
::
vector
<
int
>
input_shape0
,
std
::
vector
<
int
>
input_shape1
,
std
::
vector
<
int
>
output_shape
,
const
int
axis
)
{
template
<
typename
T
>
void
ConcatComputeByCPU_2input_dim4_axis3
(
const
T
*
input0
,
const
T
*
input1
,
T
*
output
,
std
::
vector
<
int
>
input_shape0
,
std
::
vector
<
int
>
input_shape1
,
std
::
vector
<
int
>
output_shape
,
const
int
axis
)
{
int
postion
,
index0
=
0
,
index1
=
0
;
for
(
int
i
=
0
;
i
<
output_shape
[
0
];
i
++
)
{
for
(
int
j
=
0
;
j
<
output_shape
[
1
];
j
++
)
{
...
...
@@ -43,10 +44,10 @@ void ConcatComputeByCPU_2input_dim4_axis3(const float *input0, const float *inpu
}
}
}
void
ConcatComputeByCPU_3input_dim4_axis3
(
float
*
input0
,
float
*
input1
,
float
*
input2
,
float
*
output
,
std
::
vector
<
int
>
input_shape0
,
std
::
vector
<
int
>
input_shape1
,
std
::
vector
<
int
>
input_shape
2
,
std
::
vector
<
int
>
output_shape
,
const
int
axis
)
{
template
<
typename
T
>
void
ConcatComputeByCPU_3input_dim4_axis3
(
T
*
input0
,
T
*
input1
,
T
*
input2
,
T
*
output
,
std
::
vector
<
int
>
input_shape0
,
std
::
vector
<
int
>
input_shape
1
,
std
::
vector
<
int
>
input_shape2
,
std
::
vector
<
int
>
output_shape
,
const
int
axis
)
{
int
postion
,
index0
=
0
,
index1
=
0
,
index2
=
0
;
for
(
int
i
=
0
;
i
<
output_shape
[
0
];
i
++
)
{
for
(
int
j
=
0
;
j
<
output_shape
[
1
];
j
++
)
{
...
...
@@ -82,9 +83,13 @@ void ConcatComputeByCPU_3input_dim4_axis3(float *input0, float *input1, float *i
}
namespace
mindspore
{
class
TestConcatOpenCL
:
public
mindspore
::
CommonTest
{
class
TestConcatOpenCLfp32
:
public
mindspore
::
CommonTest
{
public:
TestConcatOpenCLfp32
()
{}
};
class
TestConcatOpenCLfp16
:
public
mindspore
::
CommonTest
{
public:
TestConcatOpenCL
()
{}
TestConcatOpenCL
fp16
()
{}
};
template
<
typename
T
>
...
...
@@ -94,18 +99,138 @@ void CompareOutputData1(T *output_data, T *correct_data, int size, float err_bou
ASSERT_LE
(
abs
,
err_bound
);
}
}
TEST_F
(
TestConcatOpenCLfp16
,
ConcatFp16_2input_dim4_axis3
)
{
MS_LOG
(
INFO
)
<<
"begin test"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
SetFp16Enable
(
true
);
ocl_runtime
->
Init
();
auto
allocator
=
ocl_runtime
->
GetAllocator
();
MS_LOG
(
INFO
)
<<
"init tensors"
;
constexpr
int
INPUT_NUM
=
3
;
std
::
array
<
std
::
vector
<
int
>
,
INPUT_NUM
>
input_shapes
=
{
std
::
vector
<
int
>
{
1
,
16
,
256
,
80
},
std
::
vector
<
int
>
{
1
,
16
,
256
,
80
},
std
::
vector
<
int
>
{
1
,
16
,
256
,
80
}};
std
::
vector
<
int
>
output_shape
=
{
1
,
16
,
256
,
240
};
auto
data_type
=
kNumberTypeFloat16
;
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
;
for
(
auto
&
shape
:
input_shapes
)
{
auto
input_temp
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
shape
,
schema
::
Format_NHWC4
,
tensor_type
);
inputs
.
push_back
(
input_temp
);
if
(
input_temp
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"new input_tensor failed"
;
return
;
}
}
auto
*
output_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
output_shape
,
schema
::
Format_NHWC4
,
tensor_type
);
if
(
output_tensor
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"new output_tensor failed"
;
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
return
;
}
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
{
output_tensor
};
MS_LOG
(
INFO
)
<<
"input_shapes size=: "
<<
input_shapes
.
size
();
MS_LOG
(
INFO
)
<<
"initialize tensors"
;
auto
param
=
new
(
std
::
nothrow
)
ConcatParameter
();
if
(
param
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"new ConcatParameter failed"
;
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
return
;
}
param
->
axis_
=
3
;
auto
*
concat_kernel
=
new
(
std
::
nothrow
)
kernel
::
ConcatOpenCLKernel
(
reinterpret_cast
<
OpParameter
*>
(
param
),
inputs
,
outputs
);
if
(
concat_kernel
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"new kernel::ConcatOpenCLKernel failed"
;
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
param
;
return
;
}
concat_kernel
->
Init
();
// to do allocate memory for inputs and outputs
for
(
auto
&
input_tensor
:
inputs
)
{
input_tensor
->
MallocData
(
allocator
);
}
MS_LOG
(
INFO
)
<<
"initialize sub_graph"
;
std
::
vector
<
kernel
::
LiteKernel
*>
kernels
{
concat_kernel
};
auto
*
sub_graph
=
new
(
std
::
nothrow
)
kernel
::
SubGraphOpenCLKernel
(
inputs
,
outputs
,
kernels
,
kernels
,
kernels
);
if
(
sub_graph
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"new kernel::SubGraphOpenCLKernel failed"
;
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
param
;
delete
concat_kernel
;
return
;
}
sub_graph
->
Init
();
unsigned
int
seed
=
123
;
MS_LOG
(
INFO
)
<<
"initialize input data"
;
for
(
auto
&
input_tensor
:
inputs
)
{
auto
input_data
=
reinterpret_cast
<
float16_t
*>
(
input_tensor
->
Data
());
for
(
int
i
=
0
;
i
<
input_tensor
->
ElementsNum
();
++
i
)
{
input_data
[
i
]
=
static_cast
<
float16_t
>
(
rand_r
(
&
seed
)
%
10
+
1
);
}
}
// compute the result for CPU
auto
*
input_data0
=
reinterpret_cast
<
float16_t
*>
(
inputs
[
0
]
->
Data
());
auto
*
input_data1
=
reinterpret_cast
<
float16_t
*>
(
inputs
[
1
]
->
Data
());
std
::
vector
<
float16_t
>
output_data_cpu
(
output_shape
[
0
]
*
output_shape
[
1
]
*
output_shape
[
2
]
*
output_shape
[
3
]);
if
(
inputs
.
size
()
==
2
)
{
ConcatComputeByCPU_2input_dim4_axis3
(
input_data0
,
input_data1
,
output_data_cpu
.
data
(),
input_shapes
[
0
],
input_shapes
[
1
],
output_shape
,
param
->
axis_
);
}
if
(
inputs
.
size
()
==
3
)
{
auto
*
input_data2
=
reinterpret_cast
<
float16_t
*>
(
inputs
[
2
]
->
Data
());
ConcatComputeByCPU_3input_dim4_axis3
(
input_data0
,
input_data1
,
input_data2
,
output_data_cpu
.
data
(),
input_shapes
[
0
],
input_shapes
[
1
],
input_shapes
[
2
],
output_shape
,
param
->
axis_
);
}
std
::
cout
<<
"==================output data================"
<<
std
::
endl
;
sub_graph
->
Run
();
auto
*
output_data_gpu
=
reinterpret_cast
<
float16_t
*>
(
output_tensor
->
Data
());
CompareOutputData1
(
output_data_gpu
,
output_data_cpu
.
data
(),
output_tensor
->
ElementsNum
(),
0.00001
);
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
param
;
delete
concat_kernel
;
delete
sub_graph
;
}
TEST_F
(
TestConcatOpenCL
,
ConcatFp32_2input_dim4_axis3
)
{
TEST_F
(
TestConcatOpenCL
fp32
,
ConcatFp32_2input_dim4_axis3
)
{
MS_LOG
(
INFO
)
<<
"begin test"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
Init
();
auto
allocator
=
ocl_runtime
->
GetAllocator
();
MS_LOG
(
INFO
)
<<
"init tensors"
;
constexpr
int
INPUT_NUM
=
2
;
std
::
array
<
std
::
vector
<
int
>
,
INPUT_NUM
>
input_shapes
=
{
std
::
vector
<
int
>
{
1
,
16
,
256
,
80
},
std
::
vector
<
int
>
{
1
,
16
,
256
,
80
}};
std
::
vector
<
int
>
output_shape
=
{
1
,
16
,
256
,
16
0
};
constexpr
int
INPUT_NUM
=
3
;
std
::
array
<
std
::
vector
<
int
>
,
INPUT_NUM
>
input_shapes
=
{
std
::
vector
<
int
>
{
1
,
16
,
256
,
80
},
std
::
vector
<
int
>
{
1
,
16
,
256
,
80
},
std
::
vector
<
int
>
{
1
,
16
,
256
,
80
}};
std
::
vector
<
int
>
output_shape
=
{
1
,
16
,
256
,
24
0
};
auto
data_type
=
kNumberTypeFloat32
;
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
;
...
...
mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc
浏览文件 @
b022e3d1
...
...
@@ -23,9 +23,13 @@
#include "mindspore/lite/src/runtime/kernel/opencl/kernel/slice.h"
namespace
mindspore
{
class
TestSliceOpenCL
:
public
mindspore
::
CommonTest
{
class
TestSliceOpenCL
fp32
:
public
mindspore
::
CommonTest
{
public:
TestSliceOpenCL
()
{}
TestSliceOpenCLfp32
()
{}
};
class
TestSliceOpenCLfp16
:
public
mindspore
::
CommonTest
{
public:
TestSliceOpenCLfp16
()
{}
};
template
<
typename
T
>
...
...
@@ -36,7 +40,7 @@ void CompareOutputData1(T *output_data, T *correct_data, int size, float err_bou
}
}
TEST_F
(
TestSliceOpenCL
,
Slice
input_dim4
)
{
TEST_F
(
TestSliceOpenCL
fp32
,
Slicefp32
input_dim4
)
{
MS_LOG
(
INFO
)
<<
"begin test"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
Init
();
...
...
@@ -52,8 +56,8 @@ TEST_F(TestSliceOpenCL, Sliceinput_dim4) {
// get the input from .bin
size_t
input_size
,
output_size
;
std
::
string
input_path
=
"./test_data/in_data.bin"
;
std
::
string
output_path
=
"./test_data/out_data.bin"
;
std
::
string
input_path
=
"./test_data/in_data
fp32
.bin"
;
std
::
string
output_path
=
"./test_data/out_data
fp32
.bin"
;
auto
input_data
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
input_path
.
c_str
(),
&
input_size
));
auto
correct_data
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
output_path
.
c_str
(),
&
output_size
));
...
...
@@ -86,7 +90,7 @@ TEST_F(TestSliceOpenCL, Sliceinput_dim4) {
MS_LOG
(
INFO
)
<<
"new SliceParameter failed"
;
return
;
}
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
for
(
int
i
=
0
;
i
<
input_shape
.
size
()
;
i
++
)
{
param
->
begin_
[
i
]
=
begin
[
i
];
param
->
size_
[
i
]
=
size
[
i
];
}
...
...
@@ -145,4 +149,114 @@ TEST_F(TestSliceOpenCL, Sliceinput_dim4) {
delete
slice_kernel
;
delete
sub_graph
;
}
TEST_F
(
TestSliceOpenCLfp16
,
Slicefp16input_dim4
)
{
MS_LOG
(
INFO
)
<<
"begin test"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
SetFp16Enable
(
true
);
ocl_runtime
->
Init
();
auto
allocator
=
ocl_runtime
->
GetAllocator
();
MS_LOG
(
INFO
)
<<
"Read tensors from .bin"
;
std
::
vector
<
int
>
input_shape
=
{
1
,
256
,
256
,
48
};
std
::
vector
<
int
>
output_shape
=
{
1
,
255
,
255
,
15
};
std
::
vector
<
int
>
begin
=
{
0
,
1
,
1
,
7
};
std
::
vector
<
int
>
size
=
{
1
,
255
,
255
,
15
};
auto
data_type
=
kNumberTypeFloat16
;
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
// get the input from .bin
size_t
input_size
,
output_size
;
std
::
string
input_path
=
"./test_data/in_data.bin"
;
std
::
string
output_path
=
"./test_data/out_data.bin"
;
auto
input_data
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
input_path
.
c_str
(),
&
input_size
));
auto
correct_data
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
output_path
.
c_str
(),
&
output_size
));
MS_LOG
(
INFO
)
<<
"construct tensors"
;
lite
::
tensor
::
Tensor
*
tensor_data
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
schema
::
Format_NHWC
,
tensor_type
);
if
(
tensor_data
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"init tensor failed"
;
return
;
}
auto
*
output_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
output_shape
,
schema
::
Format_NHWC4
,
tensor_type
);
if
(
output_tensor
==
nullptr
)
{
delete
tensor_data
;
MS_LOG
(
INFO
)
<<
"init tensor failed"
;
return
;
}
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
=
{
tensor_data
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
=
{
output_tensor
};
MS_LOG
(
INFO
)
<<
"setting SliceParameter"
;
auto
param
=
new
(
std
::
nothrow
)
SliceParameter
();
if
(
param
==
nullptr
)
{
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
MS_LOG
(
INFO
)
<<
"new SliceParameter failed"
;
return
;
}
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
param
->
begin_
[
i
]
=
begin
[
i
];
param
->
size_
[
i
]
=
size
[
i
];
}
auto
*
slice_kernel
=
new
(
std
::
nothrow
)
kernel
::
SliceOpenCLKernel
(
reinterpret_cast
<
OpParameter
*>
(
param
),
inputs
,
outputs
);
if
(
slice_kernel
==
nullptr
)
{
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
param
;
MS_LOG
(
INFO
)
<<
"new kernel::slice_kernel failed"
;
return
;
}
slice_kernel
->
Init
();
// to do allocate memory for inputs and outputs
for
(
auto
&
input_tensor
:
inputs
)
{
input_tensor
->
MallocData
(
allocator
);
}
MS_LOG
(
INFO
)
<<
"initialize sub_graph"
;
std
::
vector
<
kernel
::
LiteKernel
*>
kernels
{
slice_kernel
};
auto
*
sub_graph
=
new
(
std
::
nothrow
)
kernel
::
SubGraphOpenCLKernel
(
inputs
,
outputs
,
kernels
,
kernels
,
kernels
);
if
(
sub_graph
==
nullptr
)
{
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
param
;
delete
slice_kernel
;
MS_LOG
(
INFO
)
<<
"new kernel::SubGraphOpenCLKernel failed"
;
return
;
}
sub_graph
->
Init
();
MS_LOG
(
INFO
)
<<
"init tensors"
;
memcpy
(
inputs
[
0
]
->
Data
(),
input_data
,
input_size
);
std
::
cout
<<
"==================output data================"
<<
std
::
endl
;
sub_graph
->
Run
();
auto
*
output_data_gpu
=
reinterpret_cast
<
float16_t
*>
(
output_tensor
->
Data
());
CompareOutputData1
(
output_data_gpu
,
correct_data
,
output_tensor
->
ElementsNum
(),
0.0001
);
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
slice_kernel
;
delete
sub_graph
;
}
}
// namespace mindspore
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录