Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
39e27911
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
39e27911
编写于
8月 31, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
8月 31, 2020
浏览文件
操作
浏览文件
下载
差异文件
!5588 activation support f16 in opencl
Merge pull request !5588 from liuzhongkai/activation1_fp16
上级
65819fb5
c29e9596
变更
9
显示空白变更内容
内联
并排
Showing
9 changed file
with
169 addition
and
93 deletion
+169
-93
mindspore/lite/src/runtime/kernel/opencl/cl/activation.cl
mindspore/lite/src/runtime/kernel/opencl/cl/activation.cl
+19
-19
mindspore/lite/src/runtime/kernel/opencl/cl/depthwise_conv2d.cl
...ore/lite/src/runtime/kernel/opencl/cl/depthwise_conv2d.cl
+2
-2
mindspore/lite/src/runtime/kernel/opencl/cl/prelu.cl
mindspore/lite/src/runtime/kernel/opencl/cl/prelu.cl
+8
-8
mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc
...spore/lite/src/runtime/kernel/opencl/kernel/activation.cc
+23
-8
mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h
mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h
+4
-0
mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc
+1
-1
mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
...lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
+7
-3
mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc
+1
-1
mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc
...ite/test/ut/src/runtime/kernel/opencl/activation_tests.cc
+104
-51
未找到文件。
mindspore/lite/src/runtime/kernel/opencl/cl/activation.cl
浏览文件 @
39e27911
#
pragma
OPENCL
EXTENSION
cl_
arm_printf
:
enable
#
pragma
OPENCL
EXTENSION
cl_
khr_fp16
:
enable
#
define
SLICES
4
#
define
UP_DIV
(
x,
y
)
(((
x
)
+
(
y
)
-
(
1
))
/
(
y
))
#
define
MIN
(
X,
Y
)
(
X
<
Y
?
X
:
Y
)
__constant
sampler_t
smp_zero
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
__kernel
void
ReluScalar
(
__read_only
image2d_t
input,
__write_only
image2d_t
output,
const
int4
input_shape,
const
float
alpha
)
{
__kernel
void
LeakyRelu
(
__read_only
image2d_t
input,
__write_only
image2d_t
output,
const
int4
input_shape,
__global
FLT
*
alpha
)
{
int
C
=
input_shape.w
; // channel size
int
Y
=
get_global_id
(
0
)
; // height id
int
X
=
get_global_id
(
1
)
; // weight id
for
(
int
num
=
0
; num < UP_DIV(C, SLICES); ++num) {
FLT4
in_c4
=
READ_IMAGE
(
input,
smp_zero,
(
int2
)(
X
*
UP_DIV
(
C,
SLICES
)
+
num,
Y
))
; // NHWC4: H WC
FLT4
tmp
;
tmp.x
=
in_c4.x
>
=
0
?
in_c4.x
:
in_c4.x
*
alpha
;
tmp.y
=
in_c4.y
>
=
0
?
in_c4.y
:
in_c4.y
*
alpha
;
tmp.z
=
in_c4.z
>
=
0
?
in_c4.z
:
in_c4.z
*
alpha
;
tmp.w
=
in_c4.w
>
=
0
?
in_c4.w
:
in_c4.w
*
alpha
;
tmp.x
=
in_c4.x
>
0.0f
?
in_c4.x
:
in_c4.x
*
alpha[0]
;
tmp.y
=
in_c4.y
>
0.0f
?
in_c4.y
:
in_c4.y
*
alpha[0]
;
tmp.z
=
in_c4.z
>
0.0f
?
in_c4.z
:
in_c4.z
*
alpha[0]
;
tmp.w
=
in_c4.w
>
0.0f
?
in_c4.w
:
in_c4.w
*
alpha[0]
;
WRITE_IMAGE
(
output,
(
int2
)(
X
*
UP_DIV
(
C,
SLICES
)
+
num,
Y
)
,
tmp
)
; // NHWC4: H WC
}
}
...
...
@@ -28,10 +28,10 @@ __kernel void Relu(__read_only image2d_t input, __write_only image2d_t output, c
for
(
int
num
=
0
; num < UP_DIV(C, SLICES); ++num) {
FLT4
in_c4
=
READ_IMAGE
(
input,
smp_zero,
(
int2
)(
X
*
UP_DIV
(
C,
SLICES
)
+
num,
Y
))
; // NHWC4: H WC
FLT4
tmp
;
tmp.x
=
in_c4.x
>
=
0
?
in_c4.x
:
0
;
tmp.y
=
in_c4.y
>
=
0
?
in_c4.y
:
0
;
tmp.z
=
in_c4.z
>
=
0
?
in_c4.z
:
0
;
tmp.w
=
in_c4.w
>
=
0
?
in_c4.w
:
0
;
tmp.x
=
in_c4.x
>
0.0f
?
in_c4.x
:
0.0f
;
tmp.y
=
in_c4.y
>
0.0f
?
in_c4.y
:
0.0f
;
tmp.z
=
in_c4.z
>
0.0f
?
in_c4.z
:
0.0f
;
tmp.w
=
in_c4.w
>
0.0f
?
in_c4.w
:
0.0f
;
WRITE_IMAGE
(
output,
(
int2
)(
X
*
UP_DIV
(
C,
SLICES
)
+
num,
Y
)
,
tmp
)
; // NHWC4: H WC
}
}
...
...
@@ -43,10 +43,10 @@ __kernel void Relu6(__read_only image2d_t input, __write_only image2d_t output,
for
(
int
num
=
0
; num < UP_DIV(C, SLICES); ++num) {
FLT4
in_c4
=
READ_IMAGE
(
input,
smp_zero,
(
int2
)(
X
*
UP_DIV
(
C,
SLICES
)
+
num,
Y
))
; // NHWC4: H WC
FLT4
tmp
;
tmp.x
=
in_c4.x
>
=
0
?
MIN
(
in_c4.x,
6
)
:
0
;
tmp.y
=
in_c4.y
>
=
0
?
MIN
(
in_c4.y,
6
)
:
0
;
tmp.z
=
in_c4.z
>
=
0
?
MIN
(
in_c4.z,
6
)
:
0
;
tmp.w
=
in_c4.w
>
=
0
?
MIN
(
in_c4.w,
6
)
:
0
;
tmp.x
=
in_c4.x
>
0.0f
?
MIN
(
in_c4.x,
6.0f
)
:
0.0f
;
tmp.y
=
in_c4.y
>
0.0f
?
MIN
(
in_c4.y,
6.0f
)
:
0.0f
;
tmp.z
=
in_c4.z
>
0.0f
?
MIN
(
in_c4.z,
6.0f
)
:
0.0f
;
tmp.w
=
in_c4.w
>
0.0f
?
MIN
(
in_c4.w,
6.0f
)
:
0.0f
;
WRITE_IMAGE
(
output,
(
int2
)(
X
*
UP_DIV
(
C,
SLICES
)
+
num,
Y
)
,
tmp
)
; // NHWC4: H WC
}
}
...
...
@@ -58,10 +58,10 @@ __kernel void Sigmoid(__read_only image2d_t input, __write_only image2d_t output
for
(
int
num
=
0
; num < UP_DIV(C, SLICES); ++num) {
FLT4
in_c4
=
READ_IMAGE
(
input,
smp_zero,
(
int2
)(
X
*
UP_DIV
(
C,
SLICES
)
+
num,
Y
))
; // NHWC4: H WC
FLT4
tmp
;
tmp.x
=
1
/
(
1
+
exp
(
-in_c4.x
))
;
tmp.y
=
1
/
(
1
+
exp
(
-in_c4.y
))
;
tmp.z
=
1
/
(
1
+
exp
(
-in_c4.z
))
;
tmp.w
=
1
/
(
1
+
exp
(
-in_c4.w
))
;
tmp.x
=
1
.0f
/
(
1.0f
+
exp
(
-in_c4.x
))
;
tmp.y
=
1
.0f
/
(
1.0f
+
exp
(
-in_c4.y
))
;
tmp.z
=
1
.0f
/
(
1.0f
+
exp
(
-in_c4.z
))
;
tmp.w
=
1
.0f
/
(
1.0f
+
exp
(
-in_c4.w
))
;
WRITE_IMAGE
(
output,
(
int2
)(
X
*
UP_DIV
(
C,
SLICES
)
+
num,
Y
)
,
tmp
)
; // NHWC4: H WC
}
}
mindspore/lite/src/runtime/kernel/opencl/cl/depthwise_conv2d.cl
浏览文件 @
39e27911
...
...
@@ -84,7 +84,7 @@ __kernel void DepthwiseConv2d_IMG_NHWC4_1x1(__read_only image2d_t src_data, __gl
bool outside_x = x_c < 0 || x_c >= src_size.x;
if (!outside_x && !outside_y) {
FLT4 flt_p = filter[fx_c];
FLT4 src_p = READ_IMAGE(src_data, smp_zero, (int2)(Z
, (y_c * src_size.x + x_c) * src_size.z
));
FLT4 src_p = READ_IMAGE(src_data, smp_zero, (int2)(Z
+ x_c * src_size.z, y_c
));
r += TO_FLT4(src_p * flt_p);
}
}
...
...
@@ -92,7 +92,7 @@ __kernel void DepthwiseConv2d_IMG_NHWC4_1x1(__read_only image2d_t src_data, __gl
FLT4 bias_p = bias[Z];
FLT4 res = TO_FLT4(r) + bias_p;
res = clamp(res, (FLT)(relu_clip_min), (FLT)(relu_clip_max));
WRITE_IMAGE(dst_data, (int2)(
Z, (Y * dst_size.x + X) * dst_size.z
), res);
WRITE_IMAGE(dst_data, (int2)(
X * dst_size.z + Z, Y
), res);
}
__kernel void DepthwiseConv2d_BUF_NC4HW4(__global FLT4 *src_data, __global FLT4 *filter, __global FLT4 *bias,
__global FLT4 *dst_data, int2 kernel_size, int2 stride,
...
...
mindspore/lite/src/runtime/kernel/opencl/cl/prelu.cl
浏览文件 @
39e27911
...
...
@@ -14,16 +14,16 @@ __kernel void PRelu(__read_only image2d_t input, __write_only image2d_t output,
FLT4
tmp
;
if
(
dim
==
1
)
{
FLT4
weight
=
READ_IMAGE
(
alpha,
smp_zero,
(
int2
)(
0
,
0
))
;
tmp.x
=
in_c4.x
>
=
0
?
in_c4.x
:
in_c4.x
*
weight.x
;
tmp.y
=
in_c4.y
>
=
0
?
in_c4.y
:
in_c4.y
*
weight.x
;
tmp.z
=
in_c4.z
>
=
0
?
in_c4.z
:
in_c4.z
*
weight.x
;
tmp.w
=
in_c4.w
>
=
0
?
in_c4.w
:
in_c4.w
*
weight.x
;
tmp.x
=
in_c4.x
>
0.0f
?
in_c4.x
:
in_c4.x
*
weight.x
;
tmp.y
=
in_c4.y
>
0.0f
?
in_c4.y
:
in_c4.y
*
weight.x
;
tmp.z
=
in_c4.z
>
0.0f
?
in_c4.z
:
in_c4.z
*
weight.x
;
tmp.w
=
in_c4.w
>
0.0f
?
in_c4.w
:
in_c4.w
*
weight.x
;
}
else
{
FLT4
weight
=
READ_IMAGE
(
alpha,
smp_zero,
(
int2
)(
num,
0
))
;
tmp.x
=
in_c4.x
>
=
0
?
in_c4.x
:
in_c4.x
*
weight.x
;
tmp.y
=
in_c4.y
>
=
0
?
in_c4.y
:
in_c4.y
*
weight.y
;
tmp.z
=
in_c4.z
>
=
0
?
in_c4.z
:
in_c4.z
*
weight.z
;
tmp.w
=
in_c4.w
>
=
0
?
in_c4.w
:
in_c4.w
*
weight.w
;
tmp.x
=
in_c4.x
>
0.0f
?
in_c4.x
:
in_c4.x
*
weight.x
;
tmp.y
=
in_c4.y
>
0.0f
?
in_c4.y
:
in_c4.y
*
weight.y
;
tmp.z
=
in_c4.z
>
0.0f
?
in_c4.z
:
in_c4.z
*
weight.z
;
tmp.w
=
in_c4.w
>
0.0f
?
in_c4.w
:
in_c4.w
*
weight.w
;
}
WRITE_IMAGE
(
output,
(
int2
)(
X
*
UP_DIV
(
C,
SLICES
)
+
num,
Y
)
,
tmp
)
; // NHWC4: H WC
}
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc
浏览文件 @
39e27911
...
...
@@ -38,15 +38,33 @@ using mindspore::schema::PrimitiveType_Activation;
namespace
mindspore
::
kernel
{
void
ActivationOpenClKernel
::
InitBuffer
()
{
auto
allocator
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
()
->
GetAllocator
();
alpha_buff_
=
allocator
->
Malloc
(
fp_size
);
alpha_buff_
=
allocator
->
MapBuffer
(
alpha_buff_
,
CL_MAP_WRITE
,
nullptr
,
true
);
memset
(
alpha_buff_
,
0x00
,
fp_size
);
if
(
enable_fp16_
)
{
auto
fp16
=
(
float16_t
)
alpha_
;
memcpy
(
alpha_buff_
,
&
fp16
,
fp_size
);
}
else
{
memcpy
(
alpha_buff_
,
&
alpha_
,
fp_size
);
}
allocator
->
UnmapBuffer
(
alpha_buff_
);
}
int
ActivationOpenClKernel
::
Init
()
{
in_size_
=
in_tensors_
[
0
]
->
shape
().
size
();
out_size_
=
out_tensors_
[
0
]
->
shape
().
size
();
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
enable_fp16_
=
ocl_runtime
->
GetFp16Enable
();
fp_size
=
enable_fp16_
?
sizeof
(
uint16_t
)
:
sizeof
(
float
);
if
(
in_size_
!=
2
&&
in_size_
!=
4
)
{
MS_LOG
(
ERROR
)
<<
"Activate fun only support dim=4 or 2, but your dim="
<<
in_size_
;
return
RET_ERROR
;
}
InitBuffer
();
std
::
map
<
int
,
std
::
vector
<
std
::
string
>>
Program_Kernel
{
{
ActivationType_LEAKY_RELU
,
std
::
vector
<
std
::
string
>
{
"LEAKY_RELU"
,
"
ReluScalar
"
}},
{
ActivationType_LEAKY_RELU
,
std
::
vector
<
std
::
string
>
{
"LEAKY_RELU"
,
"
LeakyRelu
"
}},
{
ActivationType_RELU
,
std
::
vector
<
std
::
string
>
{
"RELU"
,
"Relu"
}},
{
ActivationType_SIGMOID
,
std
::
vector
<
std
::
string
>
{
"SIGMOID"
,
"Sigmoid"
}},
{
ActivationType_RELU6
,
std
::
vector
<
std
::
string
>
{
"RELU6"
,
"Relu6"
}}};
...
...
@@ -57,7 +75,6 @@ int ActivationOpenClKernel::Init() {
std
::
string
source
=
activation_source
;
std
::
set
<
std
::
string
>
build_options
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
LoadSource
(
Program_Kernel
[
type_
][
0
],
source
);
ocl_runtime
->
BuildKernel
(
kernel_
,
Program_Kernel
[
type_
][
0
],
Program_Kernel
[
type_
][
1
],
build_options
);
...
...
@@ -87,7 +104,7 @@ int ActivationOpenClKernel::Run() {
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
out_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
img2d_shape
);
if
(
type_
==
ActivationType_LEAKY_RELU
)
{
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
alpha_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
alpha_
buff_
,
lite
::
opencl
::
MemType
::
BUF
);
}
std
::
vector
<
size_t
>
local
=
{
1
,
1
};
std
::
vector
<
size_t
>
global
=
{
static_cast
<
size_t
>
(
img2d_shape
.
s
[
1
]),
static_cast
<
size_t
>
(
img2d_shape
.
s
[
2
])};
...
...
@@ -114,12 +131,10 @@ cl_int4 ActivationOpenClKernel::GetImg2dShape() {
int
ActivationOpenClKernel
::
GetImageSize
(
size_t
idx
,
std
::
vector
<
size_t
>
*
img_size
)
{
cl_int4
img_shape
=
GetImg2dShape
();
#ifdef ENABLE_FP16
size_t
img_dtype
=
CL_HALF_FLOAT
;
#else
size_t
img_dtype
=
CL_FLOAT
;
#endif
if
(
enable_fp16_
)
{
img_dtype
=
CL_HALF_FLOAT
;
}
img_size
->
clear
();
img_size
->
push_back
(
img_shape
.
s
[
2
]
*
UP_DIV
(
img_shape
.
s
[
3
],
C4NUM
));
img_size
->
push_back
(
img_shape
.
s
[
1
]);
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h
浏览文件 @
39e27911
...
...
@@ -39,13 +39,17 @@ class ActivationOpenClKernel : public OpenCLKernel {
int
Run
()
override
;
int
GetImageSize
(
size_t
idx
,
std
::
vector
<
size_t
>
*
img_size
)
override
;
cl_int4
GetImg2dShape
();
void
InitBuffer
();
private:
cl
::
Kernel
kernel_
;
int
type_
;
float
alpha_
;
void
*
alpha_buff_
;
int
in_size_
;
int
out_size_
;
size_t
fp_size
;
bool
enable_fp16_
{
false
};
};
}
// namespace mindspore::kernel
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc
浏览文件 @
39e27911
...
...
@@ -56,7 +56,7 @@ int BiasAddOpenCLKernel::Init() {
out_size_
=
out_tensors_
[
0
]
->
shape
().
size
();
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
enable_fp16_
=
ocl_runtime
->
GetFp16Enable
();
fp_size
=
enable_fp16_
?
sizeof
(
float
)
/
2
:
sizeof
(
float
);
fp_size
=
enable_fp16_
?
sizeof
(
uint16_t
)
:
sizeof
(
float
);
if
(
in_size_
!=
4
&&
in_size_
!=
2
)
{
MS_LOG
(
ERROR
)
<<
"BiasAdd only support dim=4 or 2, but your dim="
<<
in_size_
;
return
RET_ERROR
;
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
浏览文件 @
39e27911
...
...
@@ -44,13 +44,17 @@ int DepthwiseConv2dOpenCLKernel::Init() {
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
std
::
string
kernel_name
=
"DepthwiseConv2d"
;
auto
in_format
=
in_tensors_
[
0
]
->
GetFormat
();
in_ori_format_
=
in_
format
;
in_ori_format_
=
in_
tensors_
[
0
]
->
GetFormat
()
;
out_ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
out_tensors_
[
0
]
->
SetFormat
(
in_format
);
in_format
=
(
in_format
==
schema
::
Format_NHWC
)
?
schema
::
Format_NHWC4
:
((
in_format
==
schema
::
Format_NCHW
)
?
schema
::
Format_NC4HW4
:
in_format
);
if
(
in_format
!=
schema
::
Format_NHWC4
&&
in_format
!=
schema
::
Format_NC4HW4
)
{
MS_LOG
(
ERROR
)
<<
"input format("
<<
in_format
<<
") "
<<
"format not support!"
;
}
in_tensors_
[
0
]
->
SetFormat
(
in_format
);
out_tensors_
[
0
]
->
SetFormat
(
in_format
);
if
(
out_mem_type_
==
OpenCLMemType
::
BUF
)
{
kernel_name
+=
"_BUF"
;
}
else
{
...
...
@@ -182,7 +186,7 @@ int DepthwiseConv2dOpenCLKernel::Run() {
GetLocalSize
(
0
,
global
,
&
local
);
std
::
map
<
ActType
,
std
::
pair
<
float
,
float
>>
relu_clips
{
{
ActType_No
,
{
FLT_MIN
,
FLT_MAX
}},
{
ActType_Relu
,
{
0.0
,
FLT_MAX
}},
{
ActType_Relu6
,
{
0
,
6.0
}}};
{
ActType_No
,
{
-
FLT_MAX
,
FLT_MAX
}},
{
ActType_Relu
,
{
0.0
,
FLT_MAX
}},
{
ActType_Relu6
,
{
0
,
6.0
}}};
cl_int2
kernel_size
=
{
parameter
->
kernel_h_
,
parameter
->
kernel_w_
};
cl_int2
stride
=
{
parameter
->
stride_h_
,
parameter
->
stride_w_
};
cl_int2
padding
=
{
-
parameter
->
pad_u_
,
-
parameter
->
pad_l_
};
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc
浏览文件 @
39e27911
...
...
@@ -68,7 +68,7 @@ int PReluOpenCLKernel::Init() {
std
::
string
kernel_name
=
"PRelu"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
enable_fp16_
=
ocl_runtime
->
GetFp16Enable
();
fp_size
=
enable_fp16_
?
sizeof
(
float
)
/
2
:
sizeof
(
float
);
fp_size
=
enable_fp16_
?
sizeof
(
uint16_t
)
:
sizeof
(
float
);
InitBuffer
();
ocl_runtime
->
LoadSource
(
program_name
,
source
);
ocl_runtime
->
BuildKernel
(
kernel_
,
program_name
,
kernel_name
,
build_options
);
...
...
mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc
浏览文件 @
39e27911
...
...
@@ -46,16 +46,17 @@ void LoadActivationData(void *dst, size_t dst_size, const std::string &file_path
}
}
template
<
typename
T
>
void
CompareRes
(
lite
::
tensor
::
Tensor
*
output_tensor
,
const
std
::
string
&
standard_answer_file
)
{
auto
*
output_data
=
reinterpret_cast
<
float
*>
(
output_tensor
->
Data
());
auto
*
output_data
=
reinterpret_cast
<
T
*>
(
output_tensor
->
Data
());
size_t
output_size
=
output_tensor
->
Size
();
auto
expect_data
=
reinterpret_cast
<
float
*>
(
mindspore
::
lite
::
ReadFile
(
standard_answer_file
.
c_str
(),
&
output_size
));
auto
expect_data
=
reinterpret_cast
<
T
*>
(
mindspore
::
lite
::
ReadFile
(
standard_answer_file
.
c_str
(),
&
output_size
));
constexpr
float
atol
=
0.0002
;
for
(
int
i
=
0
;
i
<
output_tensor
->
ElementsNum
();
++
i
)
{
if
(
std
::
fabs
(
output_data
[
i
]
-
expect_data
[
i
])
>
atol
)
{
printf
(
"error at idx[%d] expect=%
.3f output=%.3
f
\n
"
,
i
,
expect_data
[
i
],
output_data
[
i
]);
printf
(
"error at idx[%d] expect=%
.3f output=%.3
f
\n
"
,
i
,
expect_data
[
i
],
output_data
[
i
]);
printf
(
"error at idx[%d] expect=%
.3f output=%.3
f
\n\n\n
"
,
i
,
expect_data
[
i
],
output_data
[
i
]);
printf
(
"error at idx[%d] expect=%
f output=%
f
\n
"
,
i
,
expect_data
[
i
],
output_data
[
i
]);
printf
(
"error at idx[%d] expect=%
f output=%
f
\n
"
,
i
,
expect_data
[
i
],
output_data
[
i
]);
printf
(
"error at idx[%d] expect=%
f output=%
f
\n\n\n
"
,
i
,
expect_data
[
i
],
output_data
[
i
]);
return
;
}
}
...
...
@@ -64,8 +65,10 @@ void CompareRes(lite::tensor::Tensor *output_tensor, const std::string &standard
printf
(
"compare success!
\n\n\n
"
);
}
void
printf_tensor
(
mindspore
::
lite
::
tensor
::
Tensor
*
in_data
)
{
auto
input_data
=
reinterpret_cast
<
float
*>
(
in_data
->
Data
());
template
<
typename
T
>
void
printf_tensor
(
const
std
::
string
&
str
,
mindspore
::
lite
::
tensor
::
Tensor
*
in_data
)
{
MS_LOG
(
INFO
)
<<
str
;
auto
input_data
=
reinterpret_cast
<
T
*>
(
in_data
->
Data
());
for
(
int
i
=
0
;
i
<
in_data
->
ElementsNum
();
++
i
)
{
printf
(
"%f "
,
input_data
[
i
]);
}
...
...
@@ -73,24 +76,29 @@ void printf_tensor(mindspore::lite::tensor::Tensor *in_data) {
MS_LOG
(
INFO
)
<<
"Print tensor done"
;
}
TEST_F
(
TestActivationOpenCL
,
ReluFp
32
_dim4
)
{
TEST_F
(
TestActivationOpenCL
,
ReluFp_dim4
)
{
std
::
string
in_file
=
"/data/local/tmp/in_data.bin"
;
std
::
string
out_file
=
"/data/local/tmp/relu.bin"
;
MS_LOG
(
INFO
)
<<
"Relu Begin test!"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
Init
();
auto
allocator
=
ocl_runtime
->
GetAllocator
();
auto
data_type
=
kNumberTypeFloat16
;
ocl_runtime
->
SetFp16Enable
(
data_type
==
kNumberTypeFloat16
);
bool
enable_fp16
=
ocl_runtime
->
GetFp16Enable
();
MS_LOG
(
INFO
)
<<
"Init tensors."
;
std
::
vector
<
int
>
input_shape
=
{
1
,
9
};
auto
data_type
=
kNumberTypeFloat32
;
schema
::
Format
format
=
schema
::
Format_NHWC
;
if
(
input_shape
.
size
()
==
2
)
{
format
=
schema
::
Format_NC
;
}
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
auto
*
input_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
schema
::
Format_NC
,
tensor_type
);
auto
*
input_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
format
,
tensor_type
);
if
(
input_tensor
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"new input tensor error!"
;
return
;
}
auto
*
output_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
schema
::
Format_NC
,
tensor_type
);
auto
*
output_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
format
,
tensor_type
);
if
(
output_tensor
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"new output tensor error!"
;
delete
input_tensor
;
...
...
@@ -99,10 +107,12 @@ TEST_F(TestActivationOpenCL, ReluFp32_dim4) {
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
{
input_tensor
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
{
output_tensor
};
inputs
[
0
]
->
MallocData
(
allocator
);
MS_LOG
(
INFO
)
<<
"Initialize input data"
;
LoadActivationData
(
inputs
[
0
]
->
Data
(),
inputs
[
0
]
->
Size
(),
in_file
);
MS_LOG
(
INFO
)
<<
"==================input data================"
;
printf_tensor
(
inputs
[
0
]);
if
(
enable_fp16
)
{
printf_tensor
<
float16_t
>
(
"ReluFp16:--input data---"
,
inputs
[
0
]);
}
else
{
printf_tensor
<
float
>
(
"ReluFp32:--input data---"
,
inputs
[
0
]);
}
auto
*
param
=
new
(
std
::
nothrow
)
ActivationParameter
();
if
(
param
==
nullptr
)
{
...
...
@@ -164,35 +174,44 @@ TEST_F(TestActivationOpenCL, ReluFp32_dim4) {
MS_LOG
(
ERROR
)
<<
"Run SubGraphOpenCLKernel error."
;
return
;
}
MS_LOG
(
INFO
)
<<
"==================output data================"
;
printf_tensor
(
outputs
[
0
]);
CompareRes
(
output_tensor
,
out_file
);
if
(
enable_fp16
)
{
printf_tensor
<
float16_t
>
(
"ReluFp16--output data---"
,
outputs
[
0
]);
CompareRes
<
float16_t
>
(
output_tensor
,
out_file
);
}
else
{
printf_tensor
<
float
>
(
"ReluFp32--output data--"
,
outputs
[
0
]);
CompareRes
<
float
>
(
output_tensor
,
out_file
);
}
delete
kernel
;
delete
param
;
delete
input_tensor
;
delete
output_tensor
;
delete
sub_graph
;
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
TEST_F
(
TestActivationOpenCL
,
Relu6Fp
32
_dim4
)
{
TEST_F
(
TestActivationOpenCL
,
Relu6Fp_dim4
)
{
std
::
string
in_file
=
"/data/local/tmp/in_data.bin"
;
std
::
string
out_file
=
"/data/local/tmp/relu6.bin"
;
MS_LOG
(
INFO
)
<<
"Relu6 Begin test!"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
auto
data_type
=
kNumberTypeFloat32
;
ocl_runtime
->
SetFp16Enable
(
data_type
==
kNumberTypeFloat16
);
bool
enable_fp16
=
ocl_runtime
->
GetFp16Enable
();
ocl_runtime
->
Init
();
auto
allocator
=
ocl_runtime
->
GetAllocator
();
MS_LOG
(
INFO
)
<<
"Init tensors."
;
std
::
vector
<
int
>
input_shape
=
{
1
,
9
};
auto
data_type
=
kNumberTypeFloat32
;
schema
::
Format
format
=
schema
::
Format_NHWC
;
if
(
input_shape
.
size
()
==
2
)
{
format
=
schema
::
Format_NC
;
}
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
auto
*
input_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
schema
::
Format_NC
,
tensor_type
);
auto
*
input_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
format
,
tensor_type
);
if
(
input_tensor
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"new input tensor error!"
;
return
;
}
auto
*
output_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
schema
::
Format_NC
,
tensor_type
);
auto
*
output_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
format
,
tensor_type
);
if
(
output_tensor
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"new output tensor error!"
;
delete
input_tensor
;
...
...
@@ -200,11 +219,15 @@ TEST_F(TestActivationOpenCL, Relu6Fp32_dim4) {
}
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
{
input_tensor
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
{
output_tensor
};
auto
allocator
=
ocl_runtime
->
GetAllocator
();
inputs
[
0
]
->
MallocData
(
allocator
);
MS_LOG
(
INFO
)
<<
"Initialize input data"
;
LoadActivationData
(
inputs
[
0
]
->
Data
(),
inputs
[
0
]
->
Size
(),
in_file
);
MS_LOG
(
INFO
)
<<
"==================input data================"
;
printf_tensor
(
inputs
[
0
]);
if
(
enable_fp16
)
{
printf_tensor
<
float16_t
>
(
"Relu6:FP16--input data--"
,
inputs
[
0
]);
}
else
{
printf_tensor
<
float
>
(
"Relu6:FP32--input data--"
,
inputs
[
0
]);
}
auto
*
param
=
new
(
std
::
nothrow
)
ActivationParameter
();
if
(
param
==
nullptr
)
{
...
...
@@ -267,34 +290,44 @@ TEST_F(TestActivationOpenCL, Relu6Fp32_dim4) {
return
;
}
MS_LOG
(
INFO
)
<<
"==================output data================"
;
printf_tensor
(
outputs
[
0
]);
CompareRes
(
output_tensor
,
out_file
);
if
(
enable_fp16
)
{
printf_tensor
<
float16_t
>
(
"Relu6:FP16--output data---"
,
outputs
[
0
]);
CompareRes
<
float16_t
>
(
output_tensor
,
out_file
);
}
else
{
printf_tensor
<
float
>
(
"Relu6:FP32--output data---"
,
outputs
[
0
]);
CompareRes
<
float
>
(
output_tensor
,
out_file
);
}
delete
kernel
;
delete
param
;
delete
input_tensor
;
delete
output_tensor
;
delete
sub_graph
;
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
TEST_F
(
TestActivationOpenCL
,
SigmoidFp
32
_dim4
)
{
TEST_F
(
TestActivationOpenCL
,
SigmoidFp_dim4
)
{
std
::
string
in_file
=
"/data/local/tmp/in_data.bin"
;
std
::
string
out_file
=
"/data/local/tmp/sigmoid.bin"
;
MS_LOG
(
INFO
)
<<
"Sigmoid Begin test!"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
Init
();
auto
allocator
=
ocl_runtime
->
GetAllocator
();
auto
data_type
=
kNumberTypeFloat16
;
ocl_runtime
->
SetFp16Enable
(
data_type
==
kNumberTypeFloat16
);
bool
enable_fp16
=
ocl_runtime
->
GetFp16Enable
();
MS_LOG
(
INFO
)
<<
"Init tensors."
;
std
::
vector
<
int
>
input_shape
=
{
1
,
9
};
auto
data_type
=
kNumberTypeFloat32
;
schema
::
Format
format
=
schema
::
Format_NHWC
;
if
(
input_shape
.
size
()
==
2
)
{
format
=
schema
::
Format_NC
;
}
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
auto
*
input_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
schema
::
Format_NC
,
tensor_type
);
auto
*
input_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
format
,
tensor_type
);
if
(
input_tensor
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"new input tensor error!"
;
return
;
}
auto
*
output_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
schema
::
Format_NC
,
tensor_type
);
auto
*
output_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
format
,
tensor_type
);
if
(
output_tensor
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"new output tensor error!"
;
delete
input_tensor
;
...
...
@@ -302,11 +335,15 @@ TEST_F(TestActivationOpenCL, SigmoidFp32_dim4) {
}
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
{
input_tensor
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
{
output_tensor
};
auto
allocator
=
ocl_runtime
->
GetAllocator
();
inputs
[
0
]
->
MallocData
(
allocator
);
MS_LOG
(
INFO
)
<<
"Initialize input data"
;
LoadActivationData
(
inputs
[
0
]
->
Data
(),
inputs
[
0
]
->
Size
(),
in_file
);
MS_LOG
(
INFO
)
<<
"==================input data================"
;
printf_tensor
(
inputs
[
0
]);
if
(
enable_fp16
)
{
printf_tensor
<
float16_t
>
(
"Sigmoid:FP16--input data--"
,
inputs
[
0
]);
}
else
{
printf_tensor
<
float
>
(
"Sigmoid:FP32--input data--"
,
inputs
[
0
]);
}
auto
*
param
=
new
(
std
::
nothrow
)
ActivationParameter
();
if
(
param
==
nullptr
)
{
...
...
@@ -369,9 +406,13 @@ TEST_F(TestActivationOpenCL, SigmoidFp32_dim4) {
return
;
}
MS_LOG
(
INFO
)
<<
"==================output data================"
;
printf_tensor
(
outputs
[
0
]);
CompareRes
(
output_tensor
,
out_file
);
if
(
enable_fp16
)
{
printf_tensor
<
float16_t
>
(
"Sigmoid:FP16--output data---"
,
outputs
[
0
]);
CompareRes
<
float16_t
>
(
output_tensor
,
out_file
);
}
else
{
printf_tensor
<
float
>
(
"Sigmoid:FP32--output data---"
,
outputs
[
0
]);
CompareRes
<
float
>
(
output_tensor
,
out_file
);
}
delete
kernel
;
delete
param
;
delete
input_tensor
;
...
...
@@ -380,24 +421,29 @@ TEST_F(TestActivationOpenCL, SigmoidFp32_dim4) {
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
TEST_F
(
TestActivationOpenCL
,
LeakyReluFp
32
_dim4
)
{
TEST_F
(
TestActivationOpenCL
,
LeakyReluFp_dim4
)
{
std
::
string
in_file
=
"/data/local/tmp/in_data.bin"
;
std
::
string
out_file
=
"/data/local/tmp/leaky_relu.bin"
;
MS_LOG
(
INFO
)
<<
"Leaky relu Begin test!"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
Init
();
auto
allocator
=
ocl_runtime
->
GetAllocator
();
auto
data_type
=
kNumberTypeFloat32
;
ocl_runtime
->
SetFp16Enable
(
data_type
==
kNumberTypeFloat16
);
bool
enable_fp16
=
ocl_runtime
->
GetFp16Enable
();
MS_LOG
(
INFO
)
<<
"Init tensors."
;
std
::
vector
<
int
>
input_shape
=
{
1
,
9
};
auto
data_type
=
kNumberTypeFloat32
;
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
auto
*
input_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
schema
::
Format_NC
,
tensor_type
);
schema
::
Format
format
=
schema
::
Format_NHWC
;
if
(
input_shape
.
size
()
==
2
)
{
format
=
schema
::
Format_NC
;
}
auto
*
input_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
format
,
tensor_type
);
if
(
input_tensor
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"new input tensor error!"
;
return
;
}
auto
*
output_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
schema
::
Format_NC
,
tensor_type
);
auto
*
output_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
format
,
tensor_type
);
if
(
output_tensor
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"new output tensor error!"
;
delete
input_tensor
;
...
...
@@ -405,11 +451,15 @@ TEST_F(TestActivationOpenCL, LeakyReluFp32_dim4) {
}
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
{
input_tensor
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
{
output_tensor
};
auto
allocator
=
ocl_runtime
->
GetAllocator
();
inputs
[
0
]
->
MallocData
(
allocator
);
MS_LOG
(
INFO
)
<<
"Initialize input data"
;
LoadActivationData
(
inputs
[
0
]
->
Data
(),
inputs
[
0
]
->
Size
(),
in_file
);
MS_LOG
(
INFO
)
<<
"==================input data================"
;
printf_tensor
(
inputs
[
0
]);
if
(
enable_fp16
)
{
printf_tensor
<
float16_t
>
(
"Leaky Relu:FP16--input data--"
,
inputs
[
0
]);
}
else
{
printf_tensor
<
float
>
(
"Leaky Relu:FP32--input data--"
,
inputs
[
0
]);
}
auto
*
param
=
new
(
std
::
nothrow
)
ActivationParameter
();
if
(
param
==
nullptr
)
{
...
...
@@ -418,7 +468,7 @@ TEST_F(TestActivationOpenCL, LeakyReluFp32_dim4) {
delete
output_tensor
;
return
;
}
param
->
alpha_
=
0.3
;
param
->
alpha_
=
0.3
f
;
param
->
type_
=
ActivationType_LEAKY_RELU
;
auto
*
kernel
=
new
(
std
::
nothrow
)
kernel
::
ActivationOpenClKernel
(
reinterpret_cast
<
OpParameter
*>
(
param
),
inputs
,
outputs
);
...
...
@@ -472,10 +522,13 @@ TEST_F(TestActivationOpenCL, LeakyReluFp32_dim4) {
MS_LOG
(
ERROR
)
<<
"Run SubGraphOpenCLKernel error."
;
return
;
}
MS_LOG
(
INFO
)
<<
"==================output data================"
;
printf_tensor
(
outputs
[
0
]);
CompareRes
(
output_tensor
,
out_file
);
if
(
enable_fp16
)
{
printf_tensor
<
float16_t
>
(
"Leaky Relu:FP16--output data---"
,
outputs
[
0
]);
CompareRes
<
float16_t
>
(
output_tensor
,
out_file
);
}
else
{
printf_tensor
<
float
>
(
"Leaky Relu:FP32--output data---"
,
outputs
[
0
]);
CompareRes
<
float
>
(
output_tensor
,
out_file
);
}
delete
kernel
;
delete
param
;
delete
input_tensor
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录