Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
b5ed5466
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b5ed5466
编写于
4年前
作者:
M
mindspore-ci-bot
提交者:
Gitee
4年前
浏览文件
操作
浏览文件
下载
差异文件
!5170 [MS][LITE][DEVELOP]concat slice ops support fp16
Merge pull request !5170 from pengyongrong/concat
上级
9ab4f123
b022e3d1
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
450 addition
and
82 deletion
+450
-82
mindspore/lite/src/runtime/kernel/opencl/cl/batchnorm.cl
mindspore/lite/src/runtime/kernel/opencl/cl/batchnorm.cl
+7
-6
mindspore/lite/src/runtime/kernel/opencl/cl/concat.cl
mindspore/lite/src/runtime/kernel/opencl/cl/concat.cl
+11
-11
mindspore/lite/src/runtime/kernel/opencl/cl/slice.cl
mindspore/lite/src/runtime/kernel/opencl/cl/slice.cl
+13
-16
mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
+6
-4
mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
+6
-4
mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
+2
-0
mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc
+7
-4
mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc
...lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc
+139
-17
mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc
...re/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc
+139
-14
mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc
...ore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc
+120
-6
未找到文件。
mindspore/lite/src/runtime/kernel/opencl/cl/batchnorm.cl
浏览文件 @
b5ed5466
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
#
define
INT4
int4
#
define
INT2
int2
__constant
sampler_t
smp_none
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_NONE |
CLK_FILTER_NEAREST
;
...
...
@@ -11,16 +12,16 @@ __kernel void batch_normalization(__read_only image2d_t input, __read_only image
if
(
X
>=
input_shape.y
|
| Y >= input_shape.z |
|
Z
>=
input_shape.w
)
{
return
;
}
FLT4
result
=
read_imagef
(
input,
smp_none,
(
int2
)((
Y
)
*input_shape.w
+
Z,
(
X
)))
;
FLT4
result
=
READ_IMAGE
(
input,
smp_none,
(
int2
)((
Y
)
*input_shape.w
+
Z,
(
X
)))
;
FLT4
result_mean
=
read_imagef
(
mean,
smp_none,
(
int2
)((
Z
)
,
(
0
)))
;
FLT4
result_var
=
read_imagef
(
variance,
smp_none,
(
int2
)((
Z
)
,
(
0
)))
;
FLT4
result_scale
=
read_imagef
(
scale,
smp_none,
(
int2
)((
Z
)
,
(
0
)))
;
FLT4
result_offset
=
read_imagef
(
offset,
smp_none,
(
int2
)((
Z
)
,
(
0
)))
;
FLT4
result_mean
=
READ_IMAGE
(
mean,
smp_none,
(
int2
)((
Z
)
,
(
0
)))
;
FLT4
result_var
=
READ_IMAGE
(
variance,
smp_none,
(
int2
)((
Z
)
,
(
0
)))
;
FLT4
result_scale
=
READ_IMAGE
(
scale,
smp_none,
(
int2
)((
Z
)
,
(
0
)))
;
FLT4
result_offset
=
READ_IMAGE
(
offset,
smp_none,
(
int2
)((
Z
)
,
(
0
)))
;
result.x
=
result_scale.x
*
((
result.x
-
result_mean.x
)
/
sqrt
(
result_var.x
+
epsilon
))
+
result_offset.x
;
result.y
=
result_scale.y
*
((
result.y
-
result_mean.y
)
/
sqrt
(
result_var.y
+
epsilon
))
+
result_offset.y
;
result.z
=
result_scale.z
*
((
result.z
-
result_mean.z
)
/
sqrt
(
result_var.z
+
epsilon
))
+
result_offset.z
;
result.w
=
result_scale.w
*
((
result.w
-
result_mean.w
)
/
sqrt
(
result_var.w
+
epsilon
))
+
result_offset.w
;
write_imagef
(
output,
(
int2
)((
Y
)
*input_shape.w
+
Z,
(
X
))
,
result
)
;
WRITE_IMAGE
(
output,
(
int2
)((
Y
)
*input_shape.w
+
Z,
(
X
))
,
result
)
;
}
This diff is collapsed.
Click to expand it.
mindspore/lite/src/runtime/kernel/opencl/cl/concat.cl
浏览文件 @
b5ed5466
//
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
__constant
sampler_t
smp_none
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_NONE |
CLK_FILTER_NEAREST
;
__kernel
void
Concat
(
__read_only
image2d_t
input0,
__read_only
image2d_t
input1,
__write_only
image2d_t
output,
...
...
@@ -10,11 +10,11 @@ __kernel void Concat(__read_only image2d_t input0, __read_only image2d_t input1,
return;
}
if (Z < input_channels.x) {
FLT4 result =
read_imagef
(input0, smp_none, (int2)((Y)*input_channels.x + Z, (X)));
write_imagef
(output, (int2)((Y)*output_shape.w + Z, (X)), result);
FLT4 result =
READ_IMAGE
(input0, smp_none, (int2)((Y)*input_channels.x + Z, (X)));
WRITE_IMAGE
(output, (int2)((Y)*output_shape.w + Z, (X)), result);
} else {
FLT4 result =
read_imagef
(input1, smp_none, (int2)((Y)*input_channels.y + Z - input_channels.x, (X)));
write_imagef
(output, (int2)((Y)*output_shape.w + Z, (X)), result);
FLT4 result =
READ_IMAGE
(input1, smp_none, (int2)((Y)*input_channels.y + Z - input_channels.x, (X)));
WRITE_IMAGE
(output, (int2)((Y)*output_shape.w + Z, (X)), result);
}
}
...
...
@@ -27,14 +27,14 @@ __kernel void Concat3input(__read_only image2d_t input0, __read_only image2d_t i
return
;
}
if
(
Z
<
input_channels.x
)
{
FLT4
result0
=
read_imagef
(
input0,
smp_none,
(
int2
)((
Y
)
*input_channels.x
+
Z,
(
X
)))
;
write_imagef
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result0
)
;
FLT4
result0
=
READ_IMAGE
(
input0,
smp_none,
(
int2
)((
Y
)
*input_channels.x
+
Z,
(
X
)))
;
WRITE_IMAGE
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result0
)
;
}
else
if
(
Z
<
(
input_channels.x
+
input_channels.y
))
{
FLT4
result1
=
read_imagef
(
input1,
smp_none,
(
int2
)((
Y
)
*input_channels.y
+
Z
-
input_channels.x,
(
X
)))
;
write_imagef
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result1
)
;
FLT4
result1
=
READ_IMAGE
(
input1,
smp_none,
(
int2
)((
Y
)
*input_channels.y
+
Z
-
input_channels.x,
(
X
)))
;
WRITE_IMAGE
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result1
)
;
}
else
{
FLT4
result2
=
read_imagef
(
input2,
smp_none,
(
int2
)((
Y
)
*input_channels.z
+
Z
-
input_channels.x
-
input_channels.y,
(
X
)))
;
write_imagef
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result2
)
;
READ_IMAGE
(
input2,
smp_none,
(
int2
)((
Y
)
*input_channels.z
+
Z
-
input_channels.x
-
input_channels.y,
(
X
)))
;
WRITE_IMAGE
(
output,
(
int2
)((
Y
)
*output_shape.w
+
Z,
(
X
))
,
result2
)
;
}
}
This diff is collapsed.
Click to expand it.
mindspore/lite/src/runtime/kernel/opencl/cl/slice.cl
浏览文件 @
b5ed5466
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
#
define
INT2
int2
#
define
INT4
int4
#
define
FLT4
float4
__constant
sampler_t
smp_none
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_NONE |
CLK_FILTER_NEAREST
;
__kernel
void
slice
(
__read_only
image2d_t
input,
__write_only
image2d_t
output,
INT4
input_shape,
INT4
out_shape,
INT4
begin,
INT2
sharedNoUpdiv
)
{
...
...
@@ -12,46 +12,43 @@ __kernel void slice(__read_only image2d_t input, __write_only image2d_t output,
FLT4
result
;
if
(
sharedNoUpdiv.x
%
4
==
0
)
{
for
(
int
i
=
0
; i < out_shape.w; i++) {
result
=
read_imagef
(
input,
smp_none,
(
INT2
)((
Y
+
begin.z
)
*
input_shape.w
+
(
i
+
begin.w
)
,
(
X
+
begin.y
)))
;
write_imagef
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
i,
(
X
))
,
result
)
;
result
=
READ_IMAGE
(
input,
smp_none,
(
INT2
)((
Y
+
begin.z
)
*
input_shape.w
+
(
i
+
begin.w
)
,
(
X
+
begin.y
)))
;
WRITE_IMAGE
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
i,
(
X
))
,
result
)
;
}
}
else
{
int
begin_postion
=
sharedNoUpdiv.y
%
4
;
FLT4
first
=
read_imagef
(
input,
smp_none,
(
INT2
)((
Y
+
begin.z
)
*
input_shape.w
+
begin.w,
(
X
+
begin.y
)))
;
FLT4
first
=
READ_IMAGE
(
input,
smp_none,
(
INT2
)((
Y
+
begin.z
)
*
input_shape.w
+
begin.w,
(
X
+
begin.y
)))
;
if
(
begin_postion
==
1
)
{
for
(
int
i
=
1
; i <= out_shape.w; i++) {
FLT4
second
=
read_imagef
(
input,
smp_none,
(
INT2
)((
Y
+
begin.z
)
*
input_shape.w
+
(
begin.w
+
i
)
,
(
X
+
begin.y
)))
;
FLT4
second
=
READ_IMAGE
(
input,
smp_none,
(
INT2
)((
Y
+
begin.z
)
*
input_shape.w
+
(
begin.w
+
i
)
,
(
X
+
begin.y
)))
;
result.x
=
first.y
;
result.y
=
first.z
;
result.z
=
first.w
;
result.w
=
second.x
;
write_imagef
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
i
-
1
,
(
X
))
,
result
)
;
WRITE_IMAGE
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
i
-
1
,
(
X
))
,
result
)
;
first.y
=
second.y
;
first.z
=
second.z
;
first.w
=
second.w
;
}
}
else
if
(
begin_postion
==
2
)
{
for
(
int
i
=
1
; i <= out_shape.w; i++) {
FLT4
second
=
read_imagef
(
input,
smp_none,
(
INT2
)((
Y
+
begin.z
)
*
input_shape.w
+
(
begin.w
+
i
)
,
(
X
+
begin.y
)))
;
FLT4
second
=
READ_IMAGE
(
input,
smp_none,
(
INT2
)((
Y
+
begin.z
)
*
input_shape.w
+
(
begin.w
+
i
)
,
(
X
+
begin.y
)))
;
result.x
=
first.z
;
result.y
=
first.w
;
result.z
=
second.x
;
result.w
=
second.y
;
write_imagef
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
i
-
1
,
(
X
))
,
result
)
;
WRITE_IMAGE
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
i
-
1
,
(
X
))
,
result
)
;
first.z
=
second.z
;
first.w
=
second.w
;
}
}
else
{
for
(
int
i
=
1
; i <= out_shape.w; i++) {
FLT4
second
=
read_imagef
(
input,
smp_none,
(
INT2
)((
Y
+
begin.z
)
*
input_shape.w
+
(
begin.w
+
i
)
,
(
X
+
begin.y
)))
;
FLT4
second
=
READ_IMAGE
(
input,
smp_none,
(
INT2
)((
Y
+
begin.z
)
*
input_shape.w
+
(
begin.w
+
i
)
,
(
X
+
begin.y
)))
;
result.x
=
first.w
;
result.y
=
second.x
;
result.z
=
second.y
;
result.w
=
second.z
;
write_imagef
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
i
-
1
,
(
X
))
,
result
)
;
WRITE_IMAGE
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
i
-
1
,
(
X
))
,
result
)
;
first.w
=
second.w
;
}
}
...
...
@@ -64,18 +61,18 @@ __kernel void slice(__read_only image2d_t input, __write_only image2d_t output,
result_fill0.y
=
0
;
result_fill0.z
=
0
;
result_fill0.w
=
0
;
write_imagef
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
out_shape.w
-
1
,
(
X
))
,
result_fill0
)
;
WRITE_IMAGE
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
out_shape.w
-
1
,
(
X
))
,
result_fill0
)
;
}
else
if
(
size
==
2
)
{
result_fill0.x
=
result.x
;
result_fill0.y
=
result.y
;
result_fill0.z
=
0
;
result_fill0.w
=
0
;
write_imagef
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
out_shape.w
-
1
,
(
X
))
,
result_fill0
)
;
WRITE_IMAGE
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
out_shape.w
-
1
,
(
X
))
,
result_fill0
)
;
}
else
if
(
size
==
3
)
{
result_fill0.x
=
result.x
;
result_fill0.y
=
result.y
;
result_fill0.z
=
result.z
;
result_fill0.w
=
0
;
write_imagef
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
out_shape.w
-
1
,
(
X
))
,
result_fill0
)
;
WRITE_IMAGE
(
output,
(
INT2
)((
Y
)
*out_shape.w
+
out_shape.w
-
1
,
(
X
))
,
result_fill0
)
;
}
}
This diff is collapsed.
Click to expand it.
mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
浏览文件 @
b5ed5466
...
...
@@ -38,11 +38,12 @@ int BatchNormOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_siz
im_dst_y
=
out_tensors_
[
0
]
->
Height
()
*
CO4
;
im_dst_x
=
out_tensors_
[
0
]
->
Width
();
}
#ifdef ENABLE_FP16
size_t
img_dtype
=
CL_HALF_FLOAT
;
#else
size_t
img_dtype
=
CL_FLOAT
;
#endif
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
auto
enable_fp16_
=
ocl_runtime
->
GetFp16Enable
();
if
(
enable_fp16_
)
{
img_dtype
=
CL_HALF_FLOAT
;
}
img_size
->
clear
();
std
::
vector
<
size_t
>
vec
{
im_dst_x
,
im_dst_y
,
img_dtype
};
*
img_size
=
vec
;
...
...
@@ -148,4 +149,5 @@ kernel::LiteKernel *OpenCLBatchnormKernelCreator(const std::vector<lite::tensor:
}
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_BatchNorm
,
OpenCLBatchnormKernelCreator
);
REG_KERNEL
(
kGPU
,
kNumberTypeFloat16
,
PrimitiveType_BatchNorm
,
OpenCLBatchnormKernelCreator
);
}
// namespace mindspore::kernel
This diff is collapsed.
Click to expand it.
mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
浏览文件 @
b5ed5466
...
...
@@ -38,11 +38,12 @@ int ConcatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size)
im_dst_y
=
out_tensors_
[
0
]
->
Height
()
*
CO4
;
im_dst_x
=
out_tensors_
[
0
]
->
Width
();
}
#ifdef ENABLE_FP16
size_t
img_dtype
=
CL_HALF_FLOAT
;
#else
size_t
img_dtype
=
CL_FLOAT
;
#endif
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
auto
enable_fp16_
=
ocl_runtime
->
GetFp16Enable
();
if
(
enable_fp16_
)
{
img_dtype
=
CL_HALF_FLOAT
;
}
img_size
->
clear
();
std
::
vector
<
size_t
>
vec
{
im_dst_x
,
im_dst_y
,
img_dtype
};
*
img_size
=
vec
;
...
...
@@ -225,4 +226,5 @@ kernel::LiteKernel *OpenCLConcatKernelCreator(const std::vector<lite::tensor::Te
}
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_Concat
,
OpenCLConcatKernelCreator
);
REG_KERNEL
(
kGPU
,
kNumberTypeFloat16
,
PrimitiveType_Concat
,
OpenCLConcatKernelCreator
);
}
// namespace mindspore::kernel
This diff is collapsed.
Click to expand it.
mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
浏览文件 @
b5ed5466
...
...
@@ -202,4 +202,6 @@ kernel::LiteKernel *OpenCLMatMulKernelCreator(const std::vector<lite::tensor::Te
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_MatMul
,
OpenCLMatMulKernelCreator
)
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_FullConnection
,
OpenCLMatMulKernelCreator
)
REG_KERNEL
(
kGPU
,
kNumberTypeFloat16
,
PrimitiveType_MatMul
,
OpenCLMatMulKernelCreator
)
REG_KERNEL
(
kGPU
,
kNumberTypeFloat16
,
PrimitiveType_FullConnection
,
OpenCLMatMulKernelCreator
)
}
// namespace mindspore::kernel
This diff is collapsed.
Click to expand it.
mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc
浏览文件 @
b5ed5466
...
...
@@ -38,11 +38,12 @@ int SliceOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
im_dst_y
=
out_tensors_
[
0
]
->
Height
()
*
CO4
;
im_dst_x
=
out_tensors_
[
0
]
->
Width
();
}
#ifdef ENABLE_FP16
size_t
img_dtype
=
CL_HALF_FLOAT
;
#else
size_t
img_dtype
=
CL_FLOAT
;
#endif
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
auto
enable_fp16_
=
ocl_runtime
->
GetFp16Enable
();
if
(
enable_fp16_
)
{
img_dtype
=
CL_HALF_FLOAT
;
}
img_size
->
clear
();
std
::
vector
<
size_t
>
vec
{
im_dst_x
,
im_dst_y
,
img_dtype
};
*
img_size
=
vec
;
...
...
@@ -143,4 +144,6 @@ kernel::LiteKernel *OpenCLSliceKernelCreator(const std::vector<lite::tensor::Ten
}
REG_KERNEL
(
kGPU
,
kNumberTypeFloat32
,
PrimitiveType_Slice
,
OpenCLSliceKernelCreator
);
REG_KERNEL
(
kGPU
,
kNumberTypeFloat16
,
PrimitiveType_Slice
,
OpenCLSliceKernelCreator
);
}
// namespace mindspore::kernel
This diff is collapsed.
Click to expand it.
mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc
浏览文件 @
b5ed5466
...
...
@@ -23,9 +23,13 @@
#include "mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h"
namespace
mindspore
{
class
TestBatchnormOpenCL
:
public
mindspore
::
CommonTest
{
class
TestBatchnormOpenCL
fp32
:
public
mindspore
::
CommonTest
{
public:
TestBatchnormOpenCL
()
{}
TestBatchnormOpenCLfp32
()
{}
};
class
TestBatchnormOpenCLfp16
:
public
mindspore
::
CommonTest
{
public:
TestBatchnormOpenCLfp16
()
{}
};
template
<
typename
T
>
...
...
@@ -35,30 +39,153 @@ void CompareOutputData1(T *output_data, T *correct_data, int size, float err_bou
ASSERT_LE
(
abs
,
err_bound
);
}
}
TEST_F
(
TestBatchnormOpenCLfp16
,
Batchnormfp16input_dim4
)
{
MS_LOG
(
INFO
)
<<
"begin test"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
SetFp16Enable
(
true
);
ocl_runtime
->
Init
();
auto
allocator
=
ocl_runtime
->
GetAllocator
();
MS_LOG
(
INFO
)
<<
"Read tensors from .bin"
;
std
::
vector
<
int
>
input_shape
=
{
1
,
256
,
256
,
48
};
std
::
vector
<
int
>
output_shape
=
{
1
,
256
,
256
,
48
};
auto
data_type
=
kNumberTypeFloat32
;
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
// get the input from .bin
size_t
input_size
,
output_size
;
std
::
string
input_path
=
"./test_data/batchnorm_in_datafp16.bin"
;
std
::
string
mean_path
=
"./test_data/batchnorm_meanfp16.bin"
;
std
::
string
var_path
=
"./test_data/batchnorm_varfp16.bin"
;
std
::
string
offset_path
=
"./test_data/batchnorm_offsetfp16.bin"
;
std
::
string
scale_path
=
"./test_data/batchnorm_scalefp16.bin"
;
std
::
string
output_path
=
"./test_data/batchnorm_out_datafp16.bin"
;
auto
input_data
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
input_path
.
c_str
(),
&
input_size
));
auto
correct_data
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
output_path
.
c_str
(),
&
output_size
));
size_t
mean_size
,
var_size
,
scale_size
,
offset_size
;
auto
mean_data
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
mean_path
.
c_str
(),
&
mean_size
));
auto
var_data
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
var_path
.
c_str
(),
&
var_size
));
auto
scale_data
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
scale_path
.
c_str
(),
&
scale_size
));
auto
offset_data
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
offset_path
.
c_str
(),
&
offset_size
));
MS_LOG
(
INFO
)
<<
"construct tensors"
;
lite
::
tensor
::
Tensor
*
tensor_data
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
schema
::
Format_NHWC
,
tensor_type
);
lite
::
tensor
::
Tensor
*
tensor_mean
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
{
1
,
1
,
1
,
input_shape
[
3
]},
schema
::
Format_NHWC
,
tensor_type
);
lite
::
tensor
::
Tensor
*
tensor_var
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
{
1
,
1
,
1
,
input_shape
[
3
]},
schema
::
Format_NHWC
,
tensor_type
);
lite
::
tensor
::
Tensor
*
tensor_scale
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
{
1
,
1
,
1
,
input_shape
[
3
]},
schema
::
Format_NHWC
,
tensor_type
);
lite
::
tensor
::
Tensor
*
tensor_offset
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
{
1
,
1
,
1
,
input_shape
[
3
]},
schema
::
Format_NHWC
,
tensor_type
);
if
(
tensor_data
==
nullptr
||
tensor_mean
==
nullptr
||
tensor_var
==
nullptr
||
tensor_scale
==
nullptr
||
tensor_offset
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"init tensor failed"
;
return
;
}
auto
*
output_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
output_shape
,
schema
::
Format_NHWC4
,
tensor_type
);
if
(
output_tensor
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"init tensor failed"
;
delete
tensor_data
;
delete
tensor_mean
;
delete
tensor_var
;
delete
tensor_scale
;
delete
tensor_offset
;
return
;
}
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
=
{
tensor_data
,
tensor_scale
,
tensor_offset
,
tensor_mean
,
tensor_var
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
{
output_tensor
};
MS_LOG
(
INFO
)
<<
"initialize tensors"
;
auto
param
=
new
(
std
::
nothrow
)
BatchNormParameter
();
if
(
param
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"new BatchNormParameter failed"
;
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
return
;
}
param
->
epsilon_
=
pow
(
10
,
-
5
);
auto
*
batchnorm_kernel
=
new
(
std
::
nothrow
)
kernel
::
BatchNormOpenCLKernel
(
reinterpret_cast
<
OpParameter
*>
(
param
),
inputs
,
outputs
);
if
(
batchnorm_kernel
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"new kernel::BatchNorm_kernel failed"
;
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
param
;
return
;
}
batchnorm_kernel
->
Init
();
TEST_F
(
TestBatchnormOpenCL
,
Batchnorminput_dim4
)
{
// to do allocate memory for inputs and outputs
for
(
auto
&
input_tensor
:
inputs
)
{
input_tensor
->
MallocData
(
allocator
);
}
MS_LOG
(
INFO
)
<<
"initialize sub_graph"
;
std
::
vector
<
kernel
::
LiteKernel
*>
kernels
{
batchnorm_kernel
};
auto
*
sub_graph
=
new
(
std
::
nothrow
)
kernel
::
SubGraphOpenCLKernel
(
inputs
,
outputs
,
kernels
,
kernels
,
kernels
);
if
(
sub_graph
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"new kernel::SubGraphOpenCLKernel failed"
;
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
param
;
delete
batchnorm_kernel
;
return
;
}
sub_graph
->
Init
();
MS_LOG
(
INFO
)
<<
"init tensors"
;
memcpy
(
inputs
[
0
]
->
Data
(),
input_data
,
input_size
);
memcpy
(
inputs
[
1
]
->
Data
(),
scale_data
,
scale_size
);
memcpy
(
inputs
[
2
]
->
Data
(),
offset_data
,
offset_size
);
memcpy
(
inputs
[
3
]
->
Data
(),
mean_data
,
mean_size
);
memcpy
(
inputs
[
4
]
->
Data
(),
var_data
,
var_size
);
std
::
cout
<<
"==================output data================"
<<
std
::
endl
;
sub_graph
->
Run
();
auto
*
output_data_gpu
=
reinterpret_cast
<
float16_t
*>
(
output_tensor
->
Data
());
CompareOutputData1
(
output_data_gpu
,
correct_data
,
output_tensor
->
ElementsNum
(),
0.0001
);
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
param
;
delete
batchnorm_kernel
;
delete
sub_graph
;
}
TEST_F
(
TestBatchnormOpenCLfp32
,
Batchnormfp32input_dim4
)
{
MS_LOG
(
INFO
)
<<
"begin test"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
Init
();
auto
allocator
=
ocl_runtime
->
GetAllocator
();
MS_LOG
(
INFO
)
<<
"Read tensors from .bin"
;
std
::
vector
<
int
>
input_shape
=
{
1
,
256
,
256
,
16
};
std
::
vector
<
int
>
output_shape
=
{
1
,
256
,
256
,
16
};
std
::
vector
<
int
>
input_shape
=
{
1
,
256
,
256
,
47
};
std
::
vector
<
int
>
output_shape
=
{
1
,
256
,
256
,
47
};
auto
data_type
=
kNumberTypeFloat32
;
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
// get the input from .bin
size_t
input_size
,
output_size
;
std
::
string
input_path
=
"./test_data/in_data.bin"
;
std
::
string
mean_path
=
"./test_data/mean.bin"
;
std
::
string
var_path
=
"./test_data/var.bin"
;
std
::
string
output_path
=
"./test_data/out_data.bin"
;
std
::
string
input_path
=
"./test_data/batchnorm_in_datafp32.bin"
;
std
::
string
mean_path
=
"./test_data/batchnorm_meanfp32.bin"
;
std
::
string
var_path
=
"./test_data/batchnorm_varfp32.bin"
;
std
::
string
offset_path
=
"./test_data/batchnorm_offsetfp32.bin"
;
std
::
string
scale_path
=
"./test_data/batchnorm_scalefp32.bin"
;
std
::
string
output_path
=
"./test_data/batchnorm_out_datafp32.bin"
;
auto
input_data
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
input_path
.
c_str
(),
&
input_size
));
auto
correct_data
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
output_path
.
c_str
(),
&
output_size
));
size_t
mean_size
,
var_size
;
size_t
mean_size
,
var_size
,
scale_size
,
offset_size
;
auto
mean_data
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
mean_path
.
c_str
(),
&
mean_size
));
auto
var_data
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
var_path
.
c_str
(),
&
var_size
));
auto
scale_data
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
scale_path
.
c_str
(),
&
scale_size
));
auto
offset_data
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
offset_path
.
c_str
(),
&
offset_size
));
MS_LOG
(
INFO
)
<<
"construct tensors"
;
lite
::
tensor
::
Tensor
*
tensor_data
=
...
...
@@ -131,14 +258,9 @@ TEST_F(TestBatchnormOpenCL, Batchnorminput_dim4) {
}
sub_graph
->
Init
();
MS_LOG
(
INFO
)
<<
"init tensors"
;
std
::
cout
<<
"init tensors"
<<
std
::
endl
;
memcpy
(
inputs
[
0
]
->
Data
(),
input_data
,
input_size
);
auto
&
temp
=
inputs
[
1
];
auto
tensor_temp
=
reinterpret_cast
<
float
*>
(
temp
->
Data
());
int
UPDIV_tensor_scale
=
UP_DIV
(
tensor_scale
->
ElementsNum
(),
C4NUM
)
*
4
;
for
(
int
i
=
0
;
i
<
UPDIV_tensor_scale
;
++
i
)
{
tensor_temp
[
i
]
=
static_cast
<
float
>
(
1
);
}
memcpy
(
inputs
[
1
]
->
Data
(),
scale_data
,
scale_size
);
memcpy
(
inputs
[
2
]
->
Data
(),
offset_data
,
offset_size
);
memcpy
(
inputs
[
3
]
->
Data
(),
mean_data
,
mean_size
);
memcpy
(
inputs
[
4
]
->
Data
(),
var_data
,
var_size
);
std
::
cout
<<
"==================output data================"
<<
std
::
endl
;
...
...
This diff is collapsed.
Click to expand it.
mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc
浏览文件 @
b5ed5466
...
...
@@ -21,9 +21,10 @@
#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
#include "mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h"
void
ConcatComputeByCPU_2input_dim4_axis3
(
const
float
*
input0
,
const
float
*
input1
,
float
*
output
,
std
::
vector
<
int
>
input_shape0
,
std
::
vector
<
int
>
input_shape1
,
std
::
vector
<
int
>
output_shape
,
const
int
axis
)
{
template
<
typename
T
>
void
ConcatComputeByCPU_2input_dim4_axis3
(
const
T
*
input0
,
const
T
*
input1
,
T
*
output
,
std
::
vector
<
int
>
input_shape0
,
std
::
vector
<
int
>
input_shape1
,
std
::
vector
<
int
>
output_shape
,
const
int
axis
)
{
int
postion
,
index0
=
0
,
index1
=
0
;
for
(
int
i
=
0
;
i
<
output_shape
[
0
];
i
++
)
{
for
(
int
j
=
0
;
j
<
output_shape
[
1
];
j
++
)
{
...
...
@@ -43,10 +44,10 @@ void ConcatComputeByCPU_2input_dim4_axis3(const float *input0, const float *inpu
}
}
}
void
ConcatComputeByCPU_3input_dim4_axis3
(
float
*
input0
,
float
*
input1
,
float
*
input2
,
float
*
output
,
std
::
vector
<
int
>
input_shape0
,
std
::
vector
<
int
>
input_shape1
,
std
::
vector
<
int
>
input_shape
2
,
std
::
vector
<
int
>
output_shape
,
const
int
axis
)
{
template
<
typename
T
>
void
ConcatComputeByCPU_3input_dim4_axis3
(
T
*
input0
,
T
*
input1
,
T
*
input2
,
T
*
output
,
std
::
vector
<
int
>
input_shape0
,
std
::
vector
<
int
>
input_shape
1
,
std
::
vector
<
int
>
input_shape2
,
std
::
vector
<
int
>
output_shape
,
const
int
axis
)
{
int
postion
,
index0
=
0
,
index1
=
0
,
index2
=
0
;
for
(
int
i
=
0
;
i
<
output_shape
[
0
];
i
++
)
{
for
(
int
j
=
0
;
j
<
output_shape
[
1
];
j
++
)
{
...
...
@@ -82,9 +83,13 @@ void ConcatComputeByCPU_3input_dim4_axis3(float *input0, float *input1, float *i
}
namespace
mindspore
{
class
TestConcatOpenCL
:
public
mindspore
::
CommonTest
{
class
TestConcatOpenCLfp32
:
public
mindspore
::
CommonTest
{
public:
TestConcatOpenCLfp32
()
{}
};
class
TestConcatOpenCLfp16
:
public
mindspore
::
CommonTest
{
public:
TestConcatOpenCL
()
{}
TestConcatOpenCL
fp16
()
{}
};
template
<
typename
T
>
...
...
@@ -94,18 +99,138 @@ void CompareOutputData1(T *output_data, T *correct_data, int size, float err_bou
ASSERT_LE
(
abs
,
err_bound
);
}
}
TEST_F
(
TestConcatOpenCLfp16
,
ConcatFp16_2input_dim4_axis3
)
{
MS_LOG
(
INFO
)
<<
"begin test"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
SetFp16Enable
(
true
);
ocl_runtime
->
Init
();
auto
allocator
=
ocl_runtime
->
GetAllocator
();
MS_LOG
(
INFO
)
<<
"init tensors"
;
constexpr
int
INPUT_NUM
=
3
;
std
::
array
<
std
::
vector
<
int
>
,
INPUT_NUM
>
input_shapes
=
{
std
::
vector
<
int
>
{
1
,
16
,
256
,
80
},
std
::
vector
<
int
>
{
1
,
16
,
256
,
80
},
std
::
vector
<
int
>
{
1
,
16
,
256
,
80
}};
std
::
vector
<
int
>
output_shape
=
{
1
,
16
,
256
,
240
};
auto
data_type
=
kNumberTypeFloat16
;
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
;
for
(
auto
&
shape
:
input_shapes
)
{
auto
input_temp
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
shape
,
schema
::
Format_NHWC4
,
tensor_type
);
inputs
.
push_back
(
input_temp
);
if
(
input_temp
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"new input_tensor failed"
;
return
;
}
}
auto
*
output_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
output_shape
,
schema
::
Format_NHWC4
,
tensor_type
);
if
(
output_tensor
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"new output_tensor failed"
;
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
return
;
}
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
{
output_tensor
};
MS_LOG
(
INFO
)
<<
"input_shapes size=: "
<<
input_shapes
.
size
();
MS_LOG
(
INFO
)
<<
"initialize tensors"
;
auto
param
=
new
(
std
::
nothrow
)
ConcatParameter
();
if
(
param
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"new ConcatParameter failed"
;
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
return
;
}
param
->
axis_
=
3
;
auto
*
concat_kernel
=
new
(
std
::
nothrow
)
kernel
::
ConcatOpenCLKernel
(
reinterpret_cast
<
OpParameter
*>
(
param
),
inputs
,
outputs
);
if
(
concat_kernel
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"new kernel::ConcatOpenCLKernel failed"
;
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
param
;
return
;
}
concat_kernel
->
Init
();
// to do allocate memory for inputs and outputs
for
(
auto
&
input_tensor
:
inputs
)
{
input_tensor
->
MallocData
(
allocator
);
}
MS_LOG
(
INFO
)
<<
"initialize sub_graph"
;
std
::
vector
<
kernel
::
LiteKernel
*>
kernels
{
concat_kernel
};
auto
*
sub_graph
=
new
(
std
::
nothrow
)
kernel
::
SubGraphOpenCLKernel
(
inputs
,
outputs
,
kernels
,
kernels
,
kernels
);
if
(
sub_graph
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"new kernel::SubGraphOpenCLKernel failed"
;
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
param
;
delete
concat_kernel
;
return
;
}
sub_graph
->
Init
();
unsigned
int
seed
=
123
;
MS_LOG
(
INFO
)
<<
"initialize input data"
;
for
(
auto
&
input_tensor
:
inputs
)
{
auto
input_data
=
reinterpret_cast
<
float16_t
*>
(
input_tensor
->
Data
());
for
(
int
i
=
0
;
i
<
input_tensor
->
ElementsNum
();
++
i
)
{
input_data
[
i
]
=
static_cast
<
float16_t
>
(
rand_r
(
&
seed
)
%
10
+
1
);
}
}
// compute the result for CPU
auto
*
input_data0
=
reinterpret_cast
<
float16_t
*>
(
inputs
[
0
]
->
Data
());
auto
*
input_data1
=
reinterpret_cast
<
float16_t
*>
(
inputs
[
1
]
->
Data
());
std
::
vector
<
float16_t
>
output_data_cpu
(
output_shape
[
0
]
*
output_shape
[
1
]
*
output_shape
[
2
]
*
output_shape
[
3
]);
if
(
inputs
.
size
()
==
2
)
{
ConcatComputeByCPU_2input_dim4_axis3
(
input_data0
,
input_data1
,
output_data_cpu
.
data
(),
input_shapes
[
0
],
input_shapes
[
1
],
output_shape
,
param
->
axis_
);
}
if
(
inputs
.
size
()
==
3
)
{
auto
*
input_data2
=
reinterpret_cast
<
float16_t
*>
(
inputs
[
2
]
->
Data
());
ConcatComputeByCPU_3input_dim4_axis3
(
input_data0
,
input_data1
,
input_data2
,
output_data_cpu
.
data
(),
input_shapes
[
0
],
input_shapes
[
1
],
input_shapes
[
2
],
output_shape
,
param
->
axis_
);
}
std
::
cout
<<
"==================output data================"
<<
std
::
endl
;
sub_graph
->
Run
();
auto
*
output_data_gpu
=
reinterpret_cast
<
float16_t
*>
(
output_tensor
->
Data
());
CompareOutputData1
(
output_data_gpu
,
output_data_cpu
.
data
(),
output_tensor
->
ElementsNum
(),
0.00001
);
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
param
;
delete
concat_kernel
;
delete
sub_graph
;
}
TEST_F
(
TestConcatOpenCL
,
ConcatFp32_2input_dim4_axis3
)
{
TEST_F
(
TestConcatOpenCL
fp32
,
ConcatFp32_2input_dim4_axis3
)
{
MS_LOG
(
INFO
)
<<
"begin test"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
Init
();
auto
allocator
=
ocl_runtime
->
GetAllocator
();
MS_LOG
(
INFO
)
<<
"init tensors"
;
constexpr
int
INPUT_NUM
=
2
;
std
::
array
<
std
::
vector
<
int
>
,
INPUT_NUM
>
input_shapes
=
{
std
::
vector
<
int
>
{
1
,
16
,
256
,
80
},
std
::
vector
<
int
>
{
1
,
16
,
256
,
80
}};
std
::
vector
<
int
>
output_shape
=
{
1
,
16
,
256
,
16
0
};
constexpr
int
INPUT_NUM
=
3
;
std
::
array
<
std
::
vector
<
int
>
,
INPUT_NUM
>
input_shapes
=
{
std
::
vector
<
int
>
{
1
,
16
,
256
,
80
},
std
::
vector
<
int
>
{
1
,
16
,
256
,
80
},
std
::
vector
<
int
>
{
1
,
16
,
256
,
80
}};
std
::
vector
<
int
>
output_shape
=
{
1
,
16
,
256
,
24
0
};
auto
data_type
=
kNumberTypeFloat32
;
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
;
...
...
This diff is collapsed.
Click to expand it.
mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc
浏览文件 @
b5ed5466
...
...
@@ -23,9 +23,13 @@
#include "mindspore/lite/src/runtime/kernel/opencl/kernel/slice.h"
namespace
mindspore
{
class
TestSliceOpenCL
:
public
mindspore
::
CommonTest
{
class
TestSliceOpenCL
fp32
:
public
mindspore
::
CommonTest
{
public:
TestSliceOpenCL
()
{}
TestSliceOpenCLfp32
()
{}
};
class
TestSliceOpenCLfp16
:
public
mindspore
::
CommonTest
{
public:
TestSliceOpenCLfp16
()
{}
};
template
<
typename
T
>
...
...
@@ -36,7 +40,7 @@ void CompareOutputData1(T *output_data, T *correct_data, int size, float err_bou
}
}
TEST_F
(
TestSliceOpenCL
,
Slice
input_dim4
)
{
TEST_F
(
TestSliceOpenCL
fp32
,
Slicefp32
input_dim4
)
{
MS_LOG
(
INFO
)
<<
"begin test"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
Init
();
...
...
@@ -52,8 +56,8 @@ TEST_F(TestSliceOpenCL, Sliceinput_dim4) {
// get the input from .bin
size_t
input_size
,
output_size
;
std
::
string
input_path
=
"./test_data/in_data.bin"
;
std
::
string
output_path
=
"./test_data/out_data.bin"
;
std
::
string
input_path
=
"./test_data/in_data
fp32
.bin"
;
std
::
string
output_path
=
"./test_data/out_data
fp32
.bin"
;
auto
input_data
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
input_path
.
c_str
(),
&
input_size
));
auto
correct_data
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
output_path
.
c_str
(),
&
output_size
));
...
...
@@ -86,7 +90,7 @@ TEST_F(TestSliceOpenCL, Sliceinput_dim4) {
MS_LOG
(
INFO
)
<<
"new SliceParameter failed"
;
return
;
}
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
for
(
int
i
=
0
;
i
<
input_shape
.
size
()
;
i
++
)
{
param
->
begin_
[
i
]
=
begin
[
i
];
param
->
size_
[
i
]
=
size
[
i
];
}
...
...
@@ -145,4 +149,114 @@ TEST_F(TestSliceOpenCL, Sliceinput_dim4) {
delete
slice_kernel
;
delete
sub_graph
;
}
TEST_F
(
TestSliceOpenCLfp16
,
Slicefp16input_dim4
)
{
MS_LOG
(
INFO
)
<<
"begin test"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
SetFp16Enable
(
true
);
ocl_runtime
->
Init
();
auto
allocator
=
ocl_runtime
->
GetAllocator
();
MS_LOG
(
INFO
)
<<
"Read tensors from .bin"
;
std
::
vector
<
int
>
input_shape
=
{
1
,
256
,
256
,
48
};
std
::
vector
<
int
>
output_shape
=
{
1
,
255
,
255
,
15
};
std
::
vector
<
int
>
begin
=
{
0
,
1
,
1
,
7
};
std
::
vector
<
int
>
size
=
{
1
,
255
,
255
,
15
};
auto
data_type
=
kNumberTypeFloat16
;
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
// get the input from .bin
size_t
input_size
,
output_size
;
std
::
string
input_path
=
"./test_data/in_data.bin"
;
std
::
string
output_path
=
"./test_data/out_data.bin"
;
auto
input_data
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
input_path
.
c_str
(),
&
input_size
));
auto
correct_data
=
reinterpret_cast
<
float16_t
*>
(
mindspore
::
lite
::
ReadFile
(
output_path
.
c_str
(),
&
output_size
));
MS_LOG
(
INFO
)
<<
"construct tensors"
;
lite
::
tensor
::
Tensor
*
tensor_data
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
schema
::
Format_NHWC
,
tensor_type
);
if
(
tensor_data
==
nullptr
)
{
MS_LOG
(
INFO
)
<<
"init tensor failed"
;
return
;
}
auto
*
output_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
output_shape
,
schema
::
Format_NHWC4
,
tensor_type
);
if
(
output_tensor
==
nullptr
)
{
delete
tensor_data
;
MS_LOG
(
INFO
)
<<
"init tensor failed"
;
return
;
}
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
=
{
tensor_data
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
=
{
output_tensor
};
MS_LOG
(
INFO
)
<<
"setting SliceParameter"
;
auto
param
=
new
(
std
::
nothrow
)
SliceParameter
();
if
(
param
==
nullptr
)
{
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
MS_LOG
(
INFO
)
<<
"new SliceParameter failed"
;
return
;
}
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
param
->
begin_
[
i
]
=
begin
[
i
];
param
->
size_
[
i
]
=
size
[
i
];
}
auto
*
slice_kernel
=
new
(
std
::
nothrow
)
kernel
::
SliceOpenCLKernel
(
reinterpret_cast
<
OpParameter
*>
(
param
),
inputs
,
outputs
);
if
(
slice_kernel
==
nullptr
)
{
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
param
;
MS_LOG
(
INFO
)
<<
"new kernel::slice_kernel failed"
;
return
;
}
slice_kernel
->
Init
();
// to do allocate memory for inputs and outputs
for
(
auto
&
input_tensor
:
inputs
)
{
input_tensor
->
MallocData
(
allocator
);
}
MS_LOG
(
INFO
)
<<
"initialize sub_graph"
;
std
::
vector
<
kernel
::
LiteKernel
*>
kernels
{
slice_kernel
};
auto
*
sub_graph
=
new
(
std
::
nothrow
)
kernel
::
SubGraphOpenCLKernel
(
inputs
,
outputs
,
kernels
,
kernels
,
kernels
);
if
(
sub_graph
==
nullptr
)
{
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
param
;
delete
slice_kernel
;
MS_LOG
(
INFO
)
<<
"new kernel::SubGraphOpenCLKernel failed"
;
return
;
}
sub_graph
->
Init
();
MS_LOG
(
INFO
)
<<
"init tensors"
;
memcpy
(
inputs
[
0
]
->
Data
(),
input_data
,
input_size
);
std
::
cout
<<
"==================output data================"
<<
std
::
endl
;
sub_graph
->
Run
();
auto
*
output_data_gpu
=
reinterpret_cast
<
float16_t
*>
(
output_tensor
->
Data
());
CompareOutputData1
(
output_data_gpu
,
correct_data
,
output_tensor
->
ElementsNum
(),
0.0001
);
for
(
auto
tensor
:
inputs
)
{
delete
tensor
;
}
for
(
auto
tensor
:
outputs
)
{
delete
tensor
;
}
delete
slice_kernel
;
delete
sub_graph
;
}
}
// namespace mindspore
This diff is collapsed.
Click to expand it.
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录
新手
引导
客服
返回
顶部