Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MindSpore
mindspore
提交
873e0342
M
mindspore
项目概览
MindSpore
/
mindspore
通知
35
Star
15
Fork
15
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
873e0342
编写于
9月 03, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
9月 03, 2020
浏览文件
操作
浏览文件
下载
差异文件
!5676 activation support NC4HW4 in opencl
Merge pull request !5676 from liuzhongkai/NC4HW4
上级
8896b88f
c659f352
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
140 addition
and
132 deletion
+140
-132
mindspore/lite/src/runtime/kernel/opencl/cl/biasadd.cl
mindspore/lite/src/runtime/kernel/opencl/cl/biasadd.cl
+19
-13
mindspore/lite/src/runtime/kernel/opencl/cl/prelu.cl
mindspore/lite/src/runtime/kernel/opencl/cl/prelu.cl
+30
-19
mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc
...spore/lite/src/runtime/kernel/opencl/kernel/activation.cc
+7
-13
mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc
+27
-31
mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.h
mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.h
+2
-1
mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc
+25
-23
mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h
mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h
+2
-0
mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc
...ite/test/ut/src/runtime/kernel/opencl/activation_tests.cc
+17
-21
mindspore/lite/test/ut/src/runtime/kernel/opencl/biasadd_tests.cc
...e/lite/test/ut/src/runtime/kernel/opencl/biasadd_tests.cc
+6
-8
mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc
...ore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc
+5
-3
未找到文件。
mindspore/lite/src/runtime/kernel/opencl/cl/biasadd.cl
浏览文件 @
873e0342
...
...
@@ -4,20 +4,26 @@
__constant
sampler_t
smp_zero
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
__kernel
void
BiasAdd
(
__read_only
image2d_t
input,
__write_only
image2d_t
output,
const
int4
input_shape,
__read_only
image2d_t
alpha,
const
int
dim
)
{
int
C
=
input_shape.w
; // channel size
__read_only
image2d_t
alpha,
const
int
data_type
)
{
int
H
=
input_shape.y
;
int
C
=
input_shape.w
; // channel size
C
=
UP_DIV
(
C,
C4NUM
)
;
if
((
C
==
0
||
H
==
0
)
&&
data_type
!=
1
)
{
return
;
}
int
Y
=
get_global_id
(
0
)
; // height id
int
X
=
get_global_id
(
1
)
; // weight id
for
(
int
num
=
0
; num < UP_DIV(C, C4NUM); ++num) {
FLT4
in_c4
=
READ_IMAGE
(
input,
smp_zero,
(
int2
)(
X
*
UP_DIV
(
C,
C4NUM
)
+
num,
Y
))
; // NHWC4: H WC
FLT4
tmp
=
in_c4
;
int
index
=
0
;
if
(
dim
==
2
)
{
index
=
X
;
}
else
{
index
=
num
;
}
tmp
+=
READ_IMAGE
(
alpha,
smp_zero,
(
int2
)(
index,
0
))
;
WRITE_IMAGE
(
output,
(
int2
)(
X
*
UP_DIV
(
C,
C4NUM
)
+
num,
Y
)
,
tmp
)
; // NHWC4: H WC
FLT4
in_c4
=
READ_IMAGE
(
input,
smp_zero,
(
int2
)(
X,
Y
))
;
FLT4
tmp
=
in_c4
;
int
index
=
0
;
if
(
data_type
==
1
)
{
//
NC
index
=
X
;
}
else
if
(
data_type
==
2
)
{
//
NHWC4
index
=
X
%
C
;
}
else
{
//
NC4HW4
index
=
Y
/
H
;
}
tmp
+=
READ_IMAGE
(
alpha,
smp_zero,
(
int2
)(
index,
0
))
;
WRITE_IMAGE
(
output,
(
int2
)(
X,
Y
)
,
tmp
)
;
}
mindspore/lite/src/runtime/kernel/opencl/cl/prelu.cl
浏览文件 @
873e0342
...
...
@@ -4,27 +4,38 @@
__constant
sampler_t
smp_zero
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
__kernel
void
PRelu
(
__read_only
image2d_t
input,
__write_only
image2d_t
output,
const
int4
input_shape,
__read_only
image2d_t
alpha,
const
int
dim
)
{
__read_only
image2d_t
alpha,
const
int
data_type,
const
int
bias_dim
)
{
int
H
=
input_shape.y
;
int
C
=
input_shape.w
; // channel size
C
=
UP_DIV
(
C,
SLICES
)
;
if
(
C
==
0
||
H
==
0
)
{
return
;
}
int
Y
=
get_global_id
(
0
)
; // height id
int
X
=
get_global_id
(
1
)
; // weight id
for
(
int
num
=
0
; num < UP_DIV(C, SLICES); ++num) {
FLT4
in_c4
=
READ_IMAGE
(
input,
smp_zero,
(
int2
)(
X
*
UP_DIV
(
C,
SLICES
)
+
num,
Y
))
; // NHWC4: H WC
FLT4
tmp
;
if
(
dim
==
1
)
{
FLT4
weight
=
READ_IMAGE
(
alpha,
smp_zero,
(
int2
)(
0
,
0
))
;
tmp.x
=
in_c4.x
>
0.0f
?
in_c4.x
:
in_c4.x
*
weight.x
;
tmp.y
=
in_c4.y
>
0.0f
?
in_c4.y
:
in_c4.y
*
weight.x
;
tmp.z
=
in_c4.z
>
0.0f
?
in_c4.z
:
in_c4.z
*
weight.x
;
tmp.w
=
in_c4.w
>
0.0f
?
in_c4.w
:
in_c4.w
*
weight.x
;
}
else
{
FLT4
weight
=
READ_IMAGE
(
alpha,
smp_zero,
(
int2
)(
num,
0
))
;
tmp.x
=
in_c4.x
>
0.0f
?
in_c4.x
:
in_c4.x
*
weight.x
;
tmp.y
=
in_c4.y
>
0.0f
?
in_c4.y
:
in_c4.y
*
weight.y
;
tmp.z
=
in_c4.z
>
0.0f
?
in_c4.z
:
in_c4.z
*
weight.z
;
tmp.w
=
in_c4.w
>
0.0f
?
in_c4.w
:
in_c4.w
*
weight.w
;
}
WRITE_IMAGE
(
output,
(
int2
)(
X
*
UP_DIV
(
C,
SLICES
)
+
num,
Y
)
,
tmp
)
; // NHWC4: H WC
FLT4
in_c4
=
READ_IMAGE
(
input,
smp_zero,
(
int2
)(
X,
Y
))
;
FLT4
tmp
;
int
index
=
0
;
if
(
data_type
==
1
)
{
//
NHWC4
index
=
X
%
C
;
}
else
if
(
data_type
==
2
)
{
//
NC4HW4
index
=
Y
/
H
;
}
else
{
return
;
}
if
(
bias_dim
==
1
)
{
index
=
0
;
}
FLT4
weight
=
READ_IMAGE
(
alpha,
smp_zero,
(
int2
)(
index,
0
))
;
FLT4
bias
=
weight
;
if
(
bias_dim
==
1
)
{
bias.y
=
weight.x
;
bias.z
=
weight.x
;
bias.w
=
weight.x
;
}
tmp.x
=
in_c4.x
>
0.0f
?
in_c4.x
:
in_c4.x
*
bias.x
;
tmp.y
=
in_c4.y
>
0.0f
?
in_c4.y
:
in_c4.y
*
bias.y
;
tmp.z
=
in_c4.z
>
0.0f
?
in_c4.z
:
in_c4.z
*
bias.z
;
tmp.w
=
in_c4.w
>
0.0f
?
in_c4.w
:
in_c4.w
*
bias.w
;
WRITE_IMAGE
(
output,
(
int2
)(
X,
Y
)
,
tmp
)
;
}
mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc
浏览文件 @
873e0342
...
...
@@ -77,20 +77,10 @@ int ActivationOpenClKernel::Init() {
std
::
set
<
std
::
string
>
build_options
;
ocl_runtime
->
LoadSource
(
Program_Kernel
[
type_
][
0
],
source
);
ocl_runtime
->
BuildKernel
(
kernel_
,
Program_Kernel
[
type_
][
0
],
Program_Kernel
[
type_
][
1
],
build_options
);
std
::
map
<
int
,
schema
::
Format
>
format
{{
4
,
schema
::
Format_NHWC4
},
{
2
,
schema
::
Format_NC4
}};
if
(
format
.
count
(
out_size_
)
==
0
)
{
MS_LOG
(
ERROR
)
<<
"Not found output tensor format"
;
return
RET_ERROR
;
}
in_ori_format_
=
in_tensors_
[
0
]
->
GetFormat
();
out_ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
in_tensors_
[
0
]
->
SetFormat
(
format
[
in_size_
]);
out_tensors_
[
0
]
->
SetFormat
(
format
[
out_size_
]);
if
(
in_size_
==
2
)
{
in_ori_format_
=
schema
::
Format_NC4
;
out_ori_format_
=
schema
::
Format_NC4
;
}
in_tensors_
[
0
]
->
SetFormat
(
op_format_
);
out_tensors_
[
0
]
->
SetFormat
(
op_format_
);
MS_LOG
(
DEBUG
)
<<
op_parameter_
->
name_
<<
" init Done!"
;
return
RET_OK
;
}
...
...
@@ -121,11 +111,15 @@ cl_int4 ActivationOpenClKernel::GetImg2dShape() {
for
(
int
i
=
0
;
i
<
in_size_
;
++
i
)
{
img2d_shape
.
s
[
i
+
4
-
in_size_
]
=
in_tensors_
[
0
]
->
shape
()[
i
];
}
if
(
in_size_
==
2
)
{
if
(
op_format_
==
schema
::
Format_NC4
)
{
img2d_shape
.
s
[
1
]
=
img2d_shape
.
s
[
2
];
img2d_shape
.
s
[
2
]
=
UP_DIV
(
img2d_shape
.
s
[
3
],
C4NUM
);
img2d_shape
.
s
[
3
]
=
C4NUM
;
}
if
(
op_format_
==
schema
::
Format_NC4HW4
)
{
img2d_shape
.
s
[
1
]
=
UP_DIV
(
img2d_shape
.
s
[
3
],
C4NUM
)
*
img2d_shape
.
s
[
1
];
// UP(c / 4) * H
img2d_shape
.
s
[
3
]
=
C4NUM
;
}
return
img2d_shape
;
}
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc
浏览文件 @
873e0342
...
...
@@ -54,6 +54,9 @@ void BiasAddOpenCLKernel::InitBuffer() {
int
BiasAddOpenCLKernel
::
Init
()
{
in_size_
=
in_tensors_
[
0
]
->
shape
().
size
();
out_size_
=
out_tensors_
[
0
]
->
shape
().
size
();
for
(
int
i
=
0
;
i
<
in_size_
;
++
i
)
{
input_shape_
.
s
[
i
+
4
-
in_size_
]
=
in_tensors_
[
0
]
->
shape
()[
i
];
}
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
enable_fp16_
=
ocl_runtime
->
GetFp16Enable
();
fp_size
=
enable_fp16_
?
sizeof
(
uint16_t
)
:
sizeof
(
float
);
...
...
@@ -77,33 +80,26 @@ int BiasAddOpenCLKernel::Init() {
in_ori_format_
=
in_tensors_
[
0
]
->
GetFormat
();
out_ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
std
::
map
<
int
,
schema
::
Format
>
format
{{
4
,
schema
::
Format_NHWC4
},
{
2
,
schema
::
Format_NC4
}};
if
(
format
.
count
(
out_size_
)
==
0
)
{
MS_LOG
(
ERROR
)
<<
"Not found output tensor format"
;
return
RET_ERROR
;
}
in_tensors_
[
0
]
->
SetFormat
(
format
[
in_size_
]);
out_tensors_
[
0
]
->
SetFormat
(
format
[
out_size_
]);
if
(
in_size_
==
2
)
{
in_ori_format_
=
format
[
in_size_
];
out_ori_format_
=
format
[
out_size_
];
}
in_tensors_
[
0
]
->
SetFormat
(
op_format_
);
out_tensors_
[
0
]
->
SetFormat
(
op_format_
);
MS_LOG
(
DEBUG
)
<<
program_name
<<
" Init Done!"
;
return
RET_OK
;
}
int
BiasAddOpenCLKernel
::
Run
()
{
cl_int4
input_shape
=
GetImg2dS
hape
();
cl_int4
global_size
=
GetGlobals
hape
();
MS_LOG
(
DEBUG
)
<<
op_parameter_
->
name_
<<
" Running!"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
int
arg_idx
=
0
;
std
::
map
<
schema
::
Format
,
int
>
data_type
{
{
schema
::
Format_NC4
,
1
},
{
schema
::
Format_NHWC4
,
2
},
{
schema
::
Format_NC4HW4
,
3
}};
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
in_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
out_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
input_shape
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
input_shape
_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
BiasAdd_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
in_size_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
data_type
[
op_format_
]
);
std
::
vector
<
size_t
>
local
=
{
1
,
1
};
std
::
vector
<
size_t
>
global
=
{
static_cast
<
size_t
>
(
input_shape
.
s
[
1
]),
static_cast
<
size_t
>
(
input_shap
e
.
s
[
2
])};
std
::
vector
<
size_t
>
global
=
{
static_cast
<
size_t
>
(
global_size
.
s
[
1
]),
static_cast
<
size_t
>
(
global_siz
e
.
s
[
2
])};
auto
ret
=
ocl_runtime
->
RunKernel
(
kernel_
,
global
,
local
,
nullptr
);
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Run kernel "
<<
op_parameter_
->
name_
<<
" error."
;
...
...
@@ -112,29 +108,29 @@ int BiasAddOpenCLKernel::Run() {
return
RET_OK
;
}
cl_int4
BiasAddOpenCLKernel
::
GetImg2dShape
()
{
cl_int4
img2d_shape
=
{
0
,
0
,
0
,
0
};
for
(
int
i
=
0
;
i
<
in_size_
;
++
i
)
{
img2d_shape
.
s
[
i
+
4
-
in_size_
]
=
in_tensors_
[
0
]
->
shape
()[
i
];
cl_int4
BiasAddOpenCLKernel
::
GetGlobalshape
()
{
cl_int4
global_shape
=
input_shape_
;
if
(
op_format_
==
schema
::
Format_NC4
)
{
global_shape
.
s
[
1
]
=
global_shape
.
s
[
2
];
global_shape
.
s
[
2
]
=
UP_DIV
(
global_shape
.
s
[
3
],
C4NUM
);
}
if
(
in_size_
==
2
)
{
img2d_shape
.
s
[
1
]
=
img2d_shape
.
s
[
2
];
img2d_shape
.
s
[
2
]
=
UP_DIV
(
img2d_shape
.
s
[
3
],
C4NUM
);
img2d_shape
.
s
[
3
]
=
C4NUM
;
if
(
op_format_
==
schema
::
Format_NC4HW4
)
{
global_shape
.
s
[
1
]
=
UP_DIV
(
global_shape
.
s
[
3
],
C4NUM
)
*
global_shape
.
s
[
1
];
// c / 4 * H
}
return
img2d_shape
;
if
(
op_format_
==
schema
::
Format_NHWC4
)
{
global_shape
.
s
[
2
]
=
UP_DIV
(
global_shape
.
s
[
3
],
C4NUM
)
*
global_shape
.
s
[
2
];
}
return
global_shape
;
}
int
BiasAddOpenCLKernel
::
GetImageSize
(
size_t
idx
,
std
::
vector
<
size_t
>
*
img_size
)
{
cl_int4
img_shape
=
GetImg2dShape
();
#ifdef ENABLE_FP16
size_t
img_dtype
=
CL_HALF_FLOAT
;
#else
cl_int4
img_shape
=
GetGlobalshape
();
size_t
img_dtype
=
CL_FLOAT
;
#endif
if
(
enable_fp16_
)
{
img_dtype
=
CL_HALF_FLOAT
;
}
img_size
->
clear
();
img_size
->
push_back
(
img_shape
.
s
[
2
]
*
UP_DIV
(
img_shape
.
s
[
3
],
C4NUM
)
);
img_size
->
push_back
(
img_shape
.
s
[
2
]);
img_size
->
push_back
(
img_shape
.
s
[
1
]);
img_size
->
push_back
(
img_dtype
);
return
RET_OK
;
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.h
浏览文件 @
873e0342
...
...
@@ -38,7 +38,7 @@ class BiasAddOpenCLKernel : public OpenCLKernel {
int
Run
()
override
;
int
GetImageSize
(
size_t
idx
,
std
::
vector
<
size_t
>
*
img_size
)
override
;
void
InitBuffer
();
cl_int4
Get
Img2dS
hape
();
cl_int4
Get
Globals
hape
();
private:
cl
::
Kernel
kernel_
;
...
...
@@ -46,6 +46,7 @@ class BiasAddOpenCLKernel : public OpenCLKernel {
int
in_size_
;
int
out_size_
;
size_t
fp_size
;
cl_int4
input_shape_
;
bool
enable_fp16_
{
false
};
};
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc
浏览文件 @
873e0342
...
...
@@ -18,6 +18,7 @@
#include <set>
#include <vector>
#include <map>
#include "src/kernel_registry.h"
#include "include/errorcode.h"
...
...
@@ -62,6 +63,9 @@ int PReluOpenCLKernel::Init() {
<<
C_Weight
<<
" and your input channel size is "
<<
C
;
return
RET_ERROR
;
}
for
(
int
i
=
0
;
i
<
in_tensors_
[
0
]
->
shape
().
size
();
++
i
)
{
input_shape_
.
s
[
i
]
=
in_tensors_
[
0
]
->
shape
()[
i
];
}
std
::
set
<
std
::
string
>
build_options
;
std
::
string
source
=
prelu_source
;
std
::
string
program_name
=
"PRelu"
;
...
...
@@ -73,31 +77,26 @@ int PReluOpenCLKernel::Init() {
ocl_runtime
->
LoadSource
(
program_name
,
source
);
ocl_runtime
->
BuildKernel
(
kernel_
,
program_name
,
kernel_name
,
build_options
);
in_ori_format_
=
in_tensors_
[
0
]
->
GetFormat
();
in_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
in_tensors_
[
0
]
->
SetFormat
(
op_format_
);
out_ori_format_
=
out_tensors_
[
0
]
->
GetFormat
();
out_tensors_
[
0
]
->
SetFormat
(
schema
::
Format_NHWC4
);
out_tensors_
[
0
]
->
SetFormat
(
op_format_
);
MS_LOG
(
DEBUG
)
<<
program_name
<<
" init Done!"
;
return
RET_OK
;
}
int
PReluOpenCLKernel
::
Run
()
{
MS_LOG
(
DEBUG
)
<<
op_parameter_
->
name_
<<
" Running!"
;
int
N
=
in_tensors_
[
0
]
->
shape
()[
0
];
int
H
=
in_tensors_
[
0
]
->
shape
()[
1
];
int
W
=
in_tensors_
[
0
]
->
shape
()[
2
];
int
C
=
in_tensors_
[
0
]
->
shape
()[
3
];
cl_int4
input_shape
=
{
N
,
H
,
W
,
C
};
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
std
::
map
<
schema
::
Format
,
int
>
data_type
{{
schema
::
Format_NHWC4
,
1
},
{
schema
::
Format_NC4HW4
,
2
}};
int
arg_idx
=
0
;
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
in_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
out_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
input_shape
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
input_shape
_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
PReluWeight_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
data_type
[
op_format_
]);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_idx
++
,
reinterpret_cast
<
int
>
(
in_tensors_
[
1
]
->
shape
()[
0
]));
std
::
vector
<
size_t
>
local
=
{
1
,
1
};
std
::
vector
<
size_t
>
global
=
{
static_cast
<
size_t
>
(
H
),
static_cast
<
size_t
>
(
W
)};
std
::
vector
<
size_t
>
global
=
{
static_cast
<
size_t
>
(
global_shape_
.
s
[
1
]),
static_cast
<
size_t
>
(
global_shape_
.
s
[
2
]
)};
auto
ret
=
ocl_runtime
->
RunKernel
(
kernel_
,
global
,
local
,
nullptr
);
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Run kernel "
<<
op_parameter_
->
name_
<<
" error."
;
...
...
@@ -107,19 +106,22 @@ int PReluOpenCLKernel::Run() {
}
int
PReluOpenCLKernel
::
GetImageSize
(
size_t
idx
,
std
::
vector
<
size_t
>
*
img_size
)
{
int
H
=
in_tensors_
[
0
]
->
shape
()[
1
];
int
W
=
in_tensors_
[
0
]
->
shape
()[
2
];
int
C
=
in_tensors_
[
0
]
->
shape
()[
3
];
#ifdef ENABLE_FP16
size_t
img_dtype
=
CL_HALF_FLOAT
;
#else
size_t
img_dtype
=
CL_FLOAT
;
#endif
if
(
enable_fp16_
)
{
img_dtype
=
CL_HALF_FLOAT
;
}
global_shape_
=
input_shape_
;
if
(
op_format_
==
schema
::
Format_NC4HW4
)
{
global_shape_
.
s
[
1
]
=
UP_DIV
(
input_shape_
.
s
[
3
],
C4NUM
)
*
input_shape_
.
s
[
1
];
}
else
if
(
op_format_
==
schema
::
Format_NHWC4
)
{
global_shape_
.
s
[
2
]
=
UP_DIV
(
input_shape_
.
s
[
3
],
C4NUM
)
*
input_shape_
.
s
[
2
];
}
else
{
MS_LOG
(
ERROR
)
<<
"op_format_:"
<<
op_format_
<<
" is do not support!"
;
return
RET_ERROR
;
}
img_size
->
clear
();
img_size
->
push_back
(
W
*
UP_DIV
(
C
,
C4NUM
)
);
img_size
->
push_back
(
H
);
img_size
->
push_back
(
global_shape_
.
s
[
2
]
);
img_size
->
push_back
(
global_shape_
.
s
[
1
]
);
img_size
->
push_back
(
img_dtype
);
return
RET_OK
;
}
...
...
@@ -128,7 +130,7 @@ kernel::LiteKernel *OpenCLPReluKernelCreator(const std::vector<lite::tensor::Ten
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
,
OpParameter
*
opParameter
,
const
lite
::
Context
*
ctx
,
const
kernel
::
KernelKey
&
desc
,
const
lite
::
PrimitiveC
*
primitive
)
{
if
(
inputs
.
size
()
==
0
)
{
if
(
inputs
.
empty
()
)
{
MS_LOG
(
ERROR
)
<<
"Input data size must be greater than 0, but your size is "
<<
inputs
.
size
();
return
nullptr
;
}
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h
浏览文件 @
873e0342
...
...
@@ -41,6 +41,8 @@ class PReluOpenCLKernel : public OpenCLKernel {
private:
cl
::
Kernel
kernel_
;
void
*
PReluWeight_
;
cl_int4
input_shape_
;
cl_int4
global_shape_
;
size_t
fp_size
;
bool
enable_fp16_
{
false
};
};
...
...
mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc
浏览文件 @
873e0342
...
...
@@ -51,7 +51,7 @@ void CompareRes(lite::tensor::Tensor *output_tensor, const std::string &standard
auto
*
output_data
=
reinterpret_cast
<
T
*>
(
output_tensor
->
Data
());
size_t
output_size
=
output_tensor
->
Size
();
auto
expect_data
=
reinterpret_cast
<
T
*>
(
mindspore
::
lite
::
ReadFile
(
standard_answer_file
.
c_str
(),
&
output_size
));
constexpr
float
atol
=
0.00
02
;
constexpr
float
atol
=
0.00
1
;
for
(
int
i
=
0
;
i
<
output_tensor
->
ElementsNum
();
++
i
)
{
if
(
std
::
fabs
(
output_data
[
i
]
-
expect_data
[
i
])
>
atol
)
{
printf
(
"error at idx[%d] expect=%f output=%f
\n
"
,
i
,
expect_data
[
i
],
output_data
[
i
]);
...
...
@@ -88,10 +88,8 @@ TEST_F(TestActivationOpenCL, ReluFp_dim4) {
bool
enable_fp16
=
ocl_runtime
->
GetFp16Enable
();
MS_LOG
(
INFO
)
<<
"Init tensors."
;
std
::
vector
<
int
>
input_shape
=
{
1
,
9
};
schema
::
Format
format
=
schema
::
Format_NHWC
;
if
(
input_shape
.
size
()
==
2
)
{
format
=
schema
::
Format_NC
;
}
schema
::
Format
format
=
schema
::
Format_NC
;
schema
::
Format
op_format
=
schema
::
Format_NC4
;
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
auto
*
input_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
format
,
tensor_type
);
if
(
input_tensor
==
nullptr
)
{
...
...
@@ -124,6 +122,7 @@ TEST_F(TestActivationOpenCL, ReluFp_dim4) {
param
->
type_
=
ActivationType_RELU
;
auto
*
kernel
=
new
(
std
::
nothrow
)
kernel
::
ActivationOpenClKernel
(
reinterpret_cast
<
OpParameter
*>
(
param
),
inputs
,
outputs
);
kernel
->
SetFormatType
(
op_format
);
if
(
kernel
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Kernel:Relu create fail."
;
delete
param
;
...
...
@@ -194,17 +193,15 @@ TEST_F(TestActivationOpenCL, Relu6Fp_dim4) {
std
::
string
out_file
=
"/data/local/tmp/relu6.bin"
;
MS_LOG
(
INFO
)
<<
"Relu6 Begin test!"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
auto
data_type
=
kNumberTypeFloat
32
;
auto
data_type
=
kNumberTypeFloat
16
;
ocl_runtime
->
SetFp16Enable
(
data_type
==
kNumberTypeFloat16
);
bool
enable_fp16
=
ocl_runtime
->
GetFp16Enable
();
ocl_runtime
->
Init
();
MS_LOG
(
INFO
)
<<
"Init tensors."
;
std
::
vector
<
int
>
input_shape
=
{
1
,
9
};
schema
::
Format
format
=
schema
::
Format_NHWC
;
if
(
input_shape
.
size
()
==
2
)
{
format
=
schema
::
Format_NC
;
}
schema
::
Format
format
=
schema
::
Format_NC
;
schema
::
Format
op_format
=
schema
::
Format_NC4
;
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
auto
*
input_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
format
,
tensor_type
);
if
(
input_tensor
==
nullptr
)
{
...
...
@@ -246,6 +243,7 @@ TEST_F(TestActivationOpenCL, Relu6Fp_dim4) {
delete
output_tensor
;
return
;
}
kernel
->
SetFormatType
(
op_format
);
auto
ret
=
kernel
->
Init
();
if
(
ret
!=
RET_OK
)
{
delete
param
;
...
...
@@ -311,16 +309,14 @@ TEST_F(TestActivationOpenCL, SigmoidFp_dim4) {
MS_LOG
(
INFO
)
<<
"Sigmoid Begin test!"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
Init
();
auto
data_type
=
kNumberTypeFloat
16
;
auto
data_type
=
kNumberTypeFloat
32
;
ocl_runtime
->
SetFp16Enable
(
data_type
==
kNumberTypeFloat16
);
bool
enable_fp16
=
ocl_runtime
->
GetFp16Enable
();
MS_LOG
(
INFO
)
<<
"Init tensors."
;
std
::
vector
<
int
>
input_shape
=
{
1
,
9
};
schema
::
Format
format
=
schema
::
Format_NHWC
;
if
(
input_shape
.
size
()
==
2
)
{
format
=
schema
::
Format_NC
;
}
schema
::
Format
format
=
schema
::
Format_NC
;
schema
::
Format
op_format
=
schema
::
Format_NC4
;
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
auto
*
input_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
format
,
tensor_type
);
if
(
input_tensor
==
nullptr
)
{
...
...
@@ -362,6 +358,7 @@ TEST_F(TestActivationOpenCL, SigmoidFp_dim4) {
delete
output_tensor
;
return
;
}
kernel
->
SetFormatType
(
op_format
);
auto
ret
=
kernel
->
Init
();
if
(
ret
!=
RET_OK
)
{
delete
param
;
...
...
@@ -427,17 +424,15 @@ TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) {
MS_LOG
(
INFO
)
<<
"Leaky relu Begin test!"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
Init
();
auto
data_type
=
kNumberTypeFloat
32
;
auto
data_type
=
kNumberTypeFloat
16
;
// need modify
ocl_runtime
->
SetFp16Enable
(
data_type
==
kNumberTypeFloat16
);
bool
enable_fp16
=
ocl_runtime
->
GetFp16Enable
();
MS_LOG
(
INFO
)
<<
"Init tensors."
;
std
::
vector
<
int
>
input_shape
=
{
1
,
9
};
std
::
vector
<
int
>
input_shape
=
{
1
,
9
};
// need modify
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
schema
::
Format
format
=
schema
::
Format_NHWC
;
if
(
input_shape
.
size
()
==
2
)
{
format
=
schema
::
Format_NC
;
}
schema
::
Format
format
=
schema
::
Format_NC
;
// need modify
schema
::
Format
op_format
=
schema
::
Format_NC4
;
// need modify
auto
*
input_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
format
,
tensor_type
);
if
(
input_tensor
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"new input tensor error!"
;
...
...
@@ -479,6 +474,7 @@ TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) {
delete
output_tensor
;
return
;
}
kernel
->
SetFormatType
(
op_format
);
auto
ret
=
kernel
->
Init
();
if
(
ret
!=
RET_OK
)
{
delete
param
;
...
...
mindspore/lite/test/ut/src/runtime/kernel/opencl/biasadd_tests.cc
浏览文件 @
873e0342
...
...
@@ -77,20 +77,18 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) {
MS_LOG
(
INFO
)
<<
"BiasAdd Begin test:"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
Init
();
auto
data_type
=
kNumberTypeFloat16
;
auto
data_type
=
kNumberTypeFloat16
;
// need modify
ocl_runtime
->
SetFp16Enable
(
data_type
==
kNumberTypeFloat16
);
std
::
vector
<
int
>
input_shape
=
{
1
,
9
};
std
::
vector
<
int
>
output_shape
=
{
1
,
9
};
std
::
vector
<
int
>
input_shape
=
{
1
,
9
};
// need modify
std
::
vector
<
int
>
output_shape
=
{
1
,
9
};
// need modify
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
schema
::
Format
type
;
schema
::
Format
type
=
schema
::
Format_NC
;
// need modify
schema
::
Format
op_format
=
schema
::
Format_NC4
;
// need modify
int
weight_shape
=
0
;
if
(
input_shape
.
size
()
==
4
)
{
weight_shape
=
input_shape
[
3
];
type
=
schema
::
Format_NHWC
;
}
else
{
weight_shape
=
input_shape
[
1
];
type
=
schema
::
Format_NC
;
}
auto
*
input_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
type
,
tensor_type
);
if
(
input_tensor
==
nullptr
)
{
...
...
@@ -144,7 +142,7 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) {
delete
param
;
return
;
}
biasadd_kernel
->
SetFormatType
(
op_format
);
auto
ret
=
biasadd_kernel
->
Init
();
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"biasadd kernel init error."
;
...
...
mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc
浏览文件 @
873e0342
...
...
@@ -85,14 +85,15 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) {
std
::
vector
<
int
>
input_shape
=
{
1
,
4
,
3
,
9
};
auto
data_type
=
kNumberTypeFloat16
;
ocl_runtime
->
SetFp16Enable
(
data_type
==
kNumberTypeFloat16
);
schema
::
Format
format
=
schema
::
Format_NHWC
;
schema
::
Format
op_format
=
schema
::
Format_NC4HW4
;
auto
tensor_type
=
schema
::
NodeType_ValueNode
;
auto
input_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
schema
::
Format_NHWC
,
tensor_type
);
auto
input_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
format
,
tensor_type
);
if
(
input_tensor
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"new input_tensor error!"
;
return
;
}
auto
output_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
schema
::
Format_NHWC
,
tensor_type
);
auto
output_tensor
=
new
(
std
::
nothrow
)
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
format
,
tensor_type
);
if
(
output_tensor
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"new output_tensor error"
;
delete
input_tensor
;
...
...
@@ -140,6 +141,7 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) {
delete
param
;
return
;
}
prelu_kernel
->
SetFormatType
(
op_format
);
auto
ret
=
prelu_kernel
->
Init
();
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Init prelu kernel error"
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录