Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
5ffb9588
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5ffb9588
编写于
8月 28, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
8月 28, 2020
浏览文件
操作
浏览文件
下载
差异文件
!5375 add print info to opencl GPU subgraph
Merge pull request !5375 from wandongdong/master
上级
c006e7b9
e06e9c71
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
94 addition
and
70 deletion
+94
-70
mindspore/lite/src/lite_session.cc
mindspore/lite/src/lite_session.cc
+1
-0
mindspore/lite/src/runtime/kernel/opencl/cl/depthwise_conv2d.cl
...ore/lite/src/runtime/kernel/opencl/cl/depthwise_conv2d.cl
+24
-20
mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
...lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
+2
-2
mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc
...pore/lite/src/runtime/kernel/opencl/kernel/convolution.cc
+14
-14
mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
...lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
+16
-12
mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
+2
-2
mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
+4
-2
mindspore/lite/src/runtime/opencl/opencl_runtime.cc
mindspore/lite/src/runtime/opencl/opencl_runtime.cc
+10
-9
mindspore/lite/src/scheduler.cc
mindspore/lite/src/scheduler.cc
+12
-2
mindspore/lite/src/scheduler.h
mindspore/lite/src/scheduler.h
+2
-1
mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc
...st/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc
+7
-6
未找到文件。
mindspore/lite/src/lite_session.cc
浏览文件 @
5ffb9588
...
...
@@ -273,6 +273,7 @@ int LiteSession::Init(Context *context) {
if
(
context_
->
device_ctx_
.
type
==
DT_GPU
)
{
auto
opencl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
opencl_runtime
->
Init
();
MS_LOG
(
INFO
)
<<
"Init OpenCL runtime."
;
}
#endif
executor
=
new
Executor
();
...
...
mindspore/lite/src/runtime/kernel/opencl/cl/depthwise_conv2d.cl
浏览文件 @
5ffb9588
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
__constant
sampler_t
smp_zero
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
__kernel
void
DepthwiseConv2d_IMG_NC4HW4
(
__read_only
image2d_t
src_data,
__global
FLT4
*filter,
__global
FLT4
*bias,
float
relu_clip,
__write_only
image2d_t
dst_data,
int2
kernel_size,
int2
stride,
int2
padding,
int2
dilation,
int4
src_size,
int4
dst_size
)
{
__write_only
image2d_t
dst_data,
int2
kernel_size,
int2
stride,
int2
padding,
int2
dilation,
int4
src_size,
int4
dst_size,
float
relu_clip_min,
float
relu_clip_max
)
{
int
X
=
get_global_id
(
0
)
;
int
Y
=
get_global_id
(
1
)
;
int
Z
=
get_global_id
(
2
)
;
...
...
@@ -27,13 +28,14 @@ __kernel void DepthwiseConv2d_IMG_NC4HW4(__read_only image2d_t src_data, __globa
}
FLT4 bias_p = bias[Z];
FLT4 res = TO_FLT4(r) + bias_p;
res = clamp(res, (FLT)(
0.0f), (FLT)(relu_clip
));
res = clamp(res, (FLT)(
relu_clip_min), (FLT)(relu_clip_max
));
WRITE_IMAGE(dst_data, (int2)(X, (Z * dst_size.y + Y)), res);
}
__kernel void DepthwiseConv2d_IMG_NHWC4(__read_only image2d_t src_data, __global FLT4 *filter, __global FLT4 *bias,
float relu_clip, __write_only image2d_t dst_data, int2 kernel_size,
int2 stride, int2 padding, int2 dilation, int4 src_size, int4 dst_size) {
__write_only image2d_t dst_data, int2 kernel_size,
int2 stride, int2 padding, int2 dilation, int4 src_size, int4 dst_size,
float relu_clip_min, float relu_clip_max) {
int X = get_global_id(0);
int Y = get_global_id(1);
int Z = get_global_id(2);
...
...
@@ -58,13 +60,14 @@ __kernel void DepthwiseConv2d_IMG_NHWC4(__read_only image2d_t src_data, __global
}
FLT4 bias_p = bias[Z];
FLT4 res = TO_FLT4(r) + bias_p;
res = clamp(res, (FLT)(
0.0f), (FLT)(relu_clip
));
res = clamp(res, (FLT)(
relu_clip_min), (FLT)(relu_clip_max
));
WRITE_IMAGE(dst_data, (int2)(X * dst_size.z + Z, Y), res);
}
__kernel void DepthwiseConv2d_IMG_NHWC4_1x1(__read_only image2d_t src_data, __global FLT4 *filter, __global FLT4 *bias,
float relu_clip, __write_only image2d_t dst_data, int2 kernel_size,
int2 stride, int2 padding, int2 dilation, int4 src_size, int4 dst_size) {
__write_only image2d_t dst_data, int2 kernel_size, int2 stride,
int2 padding, int2 dilation, int4 src_size, int4 dst_size,
float relu_clip_min, float relu_clip_max) {
int X = get_global_id(0);
int Y = get_global_id(1);
int Z = get_global_id(2);
...
...
@@ -81,7 +84,6 @@ __kernel void DepthwiseConv2d_IMG_NHWC4_1x1(__read_only image2d_t src_data, __gl
bool outside_x = x_c < 0 || x_c >= src_size.x;
if (!outside_x && !outside_y) {
FLT4 flt_p = filter[fx_c];
// FLT4 src_p =src_data[((y_c * src_size.x + x_c) * src_size.z + Z)];
FLT4 src_p = READ_IMAGE(src_data, smp_zero, (int2)(Z, (y_c * src_size.x + x_c) * src_size.z));
r += TO_FLT4(src_p * flt_p);
}
...
...
@@ -89,13 +91,13 @@ __kernel void DepthwiseConv2d_IMG_NHWC4_1x1(__read_only image2d_t src_data, __gl
}
FLT4 bias_p = bias[Z];
FLT4 res = TO_FLT4(r) + bias_p;
res = clamp(res, (FLT)(0.0f), (FLT)(relu_clip));
// dst_data[((Y * dst_size.x + X) * dst_size.z + Z)] = res;
res = clamp(res, (FLT)(relu_clip_min), (FLT)(relu_clip_max));
WRITE_IMAGE(dst_data, (int2)(Z, (Y * dst_size.x + X) * dst_size.z), res);
}
__kernel void DepthwiseConv2d_BUF_NC4HW4(__global FLT4 *src_data, __global FLT4 *filter, __global FLT4 *bias,
float relu_clip, __global FLT4 *dst_data, int2 kernel_size, int2 stride,
int2 padding, int2 dilation, int4 src_size, int4 dst_size) {
__global FLT4 *dst_data, int2 kernel_size, int2 stride,
int2 padding, int2 dilation, int4 src_size, int4 dst_size,
float relu_clip_min, float relu_clip_max) {
int X = get_global_id(0);
int Y = get_global_id(1);
int Z = get_global_id(2);
...
...
@@ -120,13 +122,14 @@ __kernel void DepthwiseConv2d_BUF_NC4HW4(__global FLT4 *src_data, __global FLT4
}
FLT4 bias_p = bias[Z];
FLT4 res = TO_FLT4(r) + bias_p;
res = clamp(res, (FLT)(
0.0f), (FLT)(relu_clip
));
res = clamp(res, (FLT)(
relu_clip_min), (FLT)(relu_clip_max
));
dst_data[(((Z)*dst_size.y + (Y)) * dst_size.x + (X))] = res;
}
__kernel void DepthwiseConv2d_BUF_NHWC4(__global FLT4 *src_data, __global FLT4 *filter, __global FLT4 *bias,
float relu_clip, __global FLT4 *dst_data, int2 kernel_size, int2 stride,
int2 padding, int2 dilation, int4 src_size, int4 dst_size) {
__global FLT4 *dst_data, int2 kernel_size, int2 stride,
int2 padding, int2 dilation, int4 src_size, int4 dst_size,
float relu_clip_min, float relu_clip_max) {
int X = get_global_id(0);
int Y = get_global_id(1);
int Z = get_global_id(2);
...
...
@@ -151,13 +154,14 @@ __kernel void DepthwiseConv2d_BUF_NHWC4(__global FLT4 *src_data, __global FLT4 *
}
FLT4 bias_p = bias[Z];
FLT4 res = TO_FLT4(r) + bias_p;
res = clamp(res, (FLT)(
0.0f), (FLT)(relu_clip
));
res = clamp(res, (FLT)(
relu_clip_min), (FLT)(relu_clip_max
));
dst_data[((Y * dst_size.x + X) * dst_size.z + Z)] = res;
}
__kernel void DepthwiseConv2d_BUF_NHWC4_1x1(__global FLT4 *src_data, __global FLT4 *filter, __global FLT4 *bias,
float relu_clip, __global FLT4 *dst_data, int2 kernel_size, int2 stride,
int2 padding, int2 dilation, int4 src_size, int4 dst_size) {
__global FLT4 *dst_data, int2 kernel_size, int2 stride,
int2 padding, int2 dilation, int4 src_size, int4 dst_size,
float relu_clip_min, float relu_clip_max) {
int X = get_global_id(0);
int Y = get_global_id(1);
int Z = get_global_id(2);
...
...
@@ -181,6 +185,6 @@ __kernel void DepthwiseConv2d_BUF_NHWC4_1x1(__global FLT4 *src_data, __global FL
}
FLT4
bias_p
=
bias[Z]
;
FLT4
res
=
TO_FLT4
(
r
)
+
bias_p
;
res
=
clamp
(
res,
(
FLT
)(
0.0f
)
,
(
FLT
)(
relu_clip
))
;
res
=
clamp
(
res,
(
FLT
)(
relu_clip_min
)
,
(
FLT
)(
relu_clip_max
))
;
dst_data[
((
Y
*
dst_size.x
+
X
)
*
dst_size.z
+
Z
)
]
=
res
;
}
mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
浏览文件 @
5ffb9588
...
...
@@ -186,8 +186,8 @@ int Conv2dTransposeOpenCLKernel::Run() {
cl_int4
dst_size
=
{
oh
,
ow
,
UP_DIV
(
co
,
C4NUM
),
1
};
int
arg_cnt
=
0
;
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cnt
++
,
in_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cnt
++
,
padWeight_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cnt
++
,
bias_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cnt
++
,
padWeight_
,
lite
::
opencl
::
MemType
::
BUF
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cnt
++
,
bias_
,
lite
::
opencl
::
MemType
::
BUF
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cnt
++
,
out_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cnt
++
,
kernel_size
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cnt
++
,
stride
);
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc
浏览文件 @
5ffb9588
...
...
@@ -254,34 +254,34 @@ int ConvolutionOpenCLKernel::Run() {
arg_cn
=
0
;
cl_int4
_4x4to36_in_shape
=
{
1
,
IH
,
IW
,
CI_SLICES
};
cl_int4
_4x4to36_out_shape
=
{
1
,
36
,
TILES_XY
,
CI_SLICES
};
ocl_runtime
->
SetKernelArg
(
kernel_4x4to36
,
arg_cn
++
,
in_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_4x4to36
,
arg_cn
++
,
winograd_mem0_
);
ocl_runtime
->
SetKernelArg
(
kernel_4x4to36
,
arg_cn
++
,
in_tensors_
[
0
]
->
Data
()
,
lite
::
opencl
::
MemType
::
IMG
);
ocl_runtime
->
SetKernelArg
(
kernel_4x4to36
,
arg_cn
++
,
winograd_mem0_
,
lite
::
opencl
::
MemType
::
IMG
);
ocl_runtime
->
SetKernelArg
(
kernel_4x4to36
,
arg_cn
++
,
_4x4to36_in_shape
);
ocl_runtime
->
SetKernelArg
(
kernel_4x4to36
,
arg_cn
++
,
_4x4to36_out_shape
);
arg_cn
=
0
;
cl_int4
conv_in_shape
=
{
1
,
36
,
TILES_XY
,
CI_SLICES
};
cl_int4
conv_out_shape
=
{
1
,
36
,
TILES_XY
,
CO_SLICES
};
ocl_runtime
->
SetKernelArg
(
kernel_conv
,
arg_cn
++
,
winograd_mem0_
);
ocl_runtime
->
SetKernelArg
(
kernel_conv
,
arg_cn
++
,
winograd_mem1_
);
ocl_runtime
->
SetKernelArg
(
kernel_conv
,
arg_cn
++
,
packed_weight_
);
ocl_runtime
->
SetKernelArg
(
kernel_conv
,
arg_cn
++
,
winograd_mem0_
,
lite
::
opencl
::
MemType
::
IMG
);
ocl_runtime
->
SetKernelArg
(
kernel_conv
,
arg_cn
++
,
winograd_mem1_
,
lite
::
opencl
::
MemType
::
IMG
);
ocl_runtime
->
SetKernelArg
(
kernel_conv
,
arg_cn
++
,
packed_weight_
,
lite
::
opencl
::
MemType
::
BUF
);
ocl_runtime
->
SetKernelArg
(
kernel_conv
,
arg_cn
++
,
conv_in_shape
);
ocl_runtime
->
SetKernelArg
(
kernel_conv
,
arg_cn
++
,
conv_out_shape
);
arg_cn
=
0
;
cl_int4
_36to4x4_in_shape
=
{
1
,
16
,
TILES_XY
,
CO_SLICES
};
cl_int4
_36to4x4_out_shape
=
{
1
,
OH
,
OW
,
CO_SLICES
};
ocl_runtime
->
SetKernelArg
(
kernel_36to4x4
,
arg_cn
++
,
winograd_mem1_
);
ocl_runtime
->
SetKernelArg
(
kernel_36to4x4
,
arg_cn
++
,
out_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_36to4x4
,
arg_cn
++
,
packed_bias_
);
ocl_runtime
->
SetKernelArg
(
kernel_36to4x4
,
arg_cn
++
,
winograd_mem1_
,
lite
::
opencl
::
MemType
::
IMG
);
ocl_runtime
->
SetKernelArg
(
kernel_36to4x4
,
arg_cn
++
,
out_tensors_
[
0
]
->
Data
()
,
lite
::
opencl
::
MemType
::
IMG
);
ocl_runtime
->
SetKernelArg
(
kernel_36to4x4
,
arg_cn
++
,
packed_bias_
,
lite
::
opencl
::
MemType
::
BUF
);
ocl_runtime
->
SetKernelArg
(
kernel_36to4x4
,
arg_cn
++
,
_36to4x4_in_shape
);
ocl_runtime
->
SetKernelArg
(
kernel_36to4x4
,
arg_cn
++
,
_36to4x4_out_shape
);
}
else
{
arg_cn
=
0
;
ocl_runtime
->
SetKernelArg
(
kernel_conv
,
arg_cn
++
,
in_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_conv
,
arg_cn
++
,
out_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_conv
,
arg_cn
++
,
packed_weight_
);
ocl_runtime
->
SetKernelArg
(
kernel_conv
,
arg_cn
++
,
packed_bias_
);
ocl_runtime
->
SetKernelArg
(
kernel_conv
,
arg_cn
++
,
in_tensors_
[
0
]
->
Data
()
,
lite
::
opencl
::
MemType
::
IMG
);
ocl_runtime
->
SetKernelArg
(
kernel_conv
,
arg_cn
++
,
out_tensors_
[
0
]
->
Data
()
,
lite
::
opencl
::
MemType
::
IMG
);
ocl_runtime
->
SetKernelArg
(
kernel_conv
,
arg_cn
++
,
packed_weight_
,
lite
::
opencl
::
MemType
::
BUF
);
ocl_runtime
->
SetKernelArg
(
kernel_conv
,
arg_cn
++
,
packed_bias_
,
lite
::
opencl
::
MemType
::
BUF
);
}
if
(
use_winograd_
)
{
...
...
@@ -594,9 +594,9 @@ std::string ConvolutionOpenCLKernel::CodeGenWinograd36To4x4() {
auto
param
=
reinterpret_cast
<
ConvParameter
*>
(
op_parameter_
);
if
(
param
->
act_type_
==
ActType_Relu
)
{
code
+=
" acc = max(acc, (
float
4)(0.0f));
\n
"
;
code
+=
" acc = max(acc, (
FLT
4)(0.0f));
\n
"
;
}
else
if
(
param
->
act_type_
==
ActType_Relu6
)
{
code
+=
" acc = clamp(acc, (
float4)(0.0f), (float
4)(6.0f));
\n
"
;
code
+=
" acc = clamp(acc, (
FLT4)(0.0f), (FLT
4)(6.0f));
\n
"
;
}
code
+=
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
浏览文件 @
5ffb9588
...
...
@@ -15,6 +15,7 @@
*/
#include "src/runtime/kernel/opencl/kernel/depthwise_conv2d.h"
#include <float.h>
#include <string>
#include <set>
#include <utility>
...
...
@@ -180,7 +181,8 @@ int DepthwiseConv2dOpenCLKernel::Run() {
std
::
vector
<
size_t
>
local
;
GetLocalSize
(
0
,
global
,
&
local
);
float
relu_clip1
=
6.0
;
std
::
map
<
ActType
,
std
::
pair
<
float
,
float
>>
relu_clips
{
{
ActType_No
,
{
FLT_MIN
,
FLT_MAX
}},
{
ActType_Relu
,
{
0.0
,
FLT_MAX
}},
{
ActType_Relu6
,
{
0
,
6.0
}}};
cl_int2
kernel_size
=
{
parameter
->
kernel_h_
,
parameter
->
kernel_w_
};
cl_int2
stride
=
{
parameter
->
stride_h_
,
parameter
->
stride_w_
};
cl_int2
padding
=
{
-
parameter
->
pad_u_
,
-
parameter
->
pad_l_
};
...
...
@@ -189,17 +191,19 @@ int DepthwiseConv2dOpenCLKernel::Run() {
cl_int4
dst_size
=
{(
cl_int
)
out_tensors_
[
0
]
->
Width
(),
(
cl_int
)
out_tensors_
[
0
]
->
Height
(),
(
cl_int
)
CO4
,
(
cl_int
)
out_tensors_
[
0
]
->
Batch
()};
ocl_runtime
->
SetKernelArg
(
kernel_
,
1
,
packed_weight_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
2
,
bias_data_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
3
,
relu_clip1
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
5
,
kernel_size
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
6
,
stride
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
7
,
padding
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
8
,
dilation
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
9
,
src_size
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
10
,
dst_size
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
0
,
in_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
4
,
out_tensors_
[
0
]
->
Data
());
int
arg_cnt
=
0
;
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cnt
++
,
in_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cnt
++
,
packed_weight_
,
lite
::
opencl
::
MemType
::
BUF
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cnt
++
,
bias_data_
,
lite
::
opencl
::
MemType
::
BUF
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cnt
++
,
out_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cnt
++
,
kernel_size
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cnt
++
,
stride
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cnt
++
,
padding
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cnt
++
,
dilation
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cnt
++
,
src_size
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cnt
++
,
dst_size
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cnt
++
,
relu_clips
[
parameter
->
act_type_
].
first
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_cnt
++
,
relu_clips
[
parameter
->
act_type_
].
second
);
ocl_runtime
->
RunKernel
(
kernel_
,
global
,
local
,
nullptr
);
return
RET_OK
;
}
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
浏览文件 @
5ffb9588
...
...
@@ -163,8 +163,8 @@ int MatMulOpenCLKernel::Run() {
std
::
vector
<
size_t
>
global
=
{
UP_ROUND
(
sizeCO
.
s
[
1
],
local
[
0
]),
4
};
int
arg_count
=
0
;
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_count
++
,
in_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_count
++
,
padWeight_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_count
++
,
bias_
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_count
++
,
padWeight_
,
lite
::
opencl
::
MemType
::
BUF
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_count
++
,
bias_
,
lite
::
opencl
::
MemType
::
BUF
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_count
++
,
out_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_count
++
,
sizeCI
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
arg_count
++
,
sizeCO
);
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
浏览文件 @
5ffb9588
...
...
@@ -147,8 +147,10 @@ int ToFormatOpenCLKernel::Run() {
cl_int4
shape
{(
cl_int
)
nhwc_shape_
[
0
],
(
cl_int
)
nhwc_shape_
[
1
],
(
cl_int
)
nhwc_shape_
[
2
],
(
cl_int
)
nhwc_shape_
[
3
]};
cl_int4
gsize
{(
cl_int
)
global
[
0
],
(
cl_int
)
global
[
1
],
(
cl_int
)
global
[
2
],
1
};
ocl_runtime
->
SetKernelArg
(
kernel_
,
0
,
in_tensors_
[
0
]
->
Data
());
ocl_runtime
->
SetKernelArg
(
kernel_
,
1
,
out_tensors_
[
0
]
->
Data
());
auto
src_mem_type
=
(
out_mem_type_
==
OpenCLMemType
::
IMG
)
?
lite
::
opencl
::
MemType
::
BUF
:
lite
::
opencl
::
MemType
::
IMG
;
auto
dst_mem_type
=
(
out_mem_type_
==
OpenCLMemType
::
IMG
)
?
lite
::
opencl
::
MemType
::
IMG
:
lite
::
opencl
::
MemType
::
BUF
;
ocl_runtime
->
SetKernelArg
(
kernel_
,
0
,
in_tensors_
[
0
]
->
Data
(),
src_mem_type
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
1
,
out_tensors_
[
0
]
->
Data
(),
dst_mem_type
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
2
,
gsize
);
ocl_runtime
->
SetKernelArg
(
kernel_
,
3
,
shape
);
ocl_runtime
->
RunKernel
(
kernel_
,
global
,
local
,
nullptr
);
...
...
mindspore/lite/src/runtime/opencl/opencl_runtime.cc
浏览文件 @
5ffb9588
...
...
@@ -301,20 +301,23 @@ int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_na
// fp16 enable, kernel will use half and read_imageh and write_imageh.
build_options_str
=
"-DFLT=half -DFLT4=half4 -DFLT16=half16 "
"-DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh -DTO_FLT4=convert_half4"
;
"-DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh -DTO_FLT4=convert_half4
"
;
}
else
{
// fp16 not enable, kernel will use float and read_imagef and write_imagef.
build_options_str
=
"-DFLT=float -DFLT4=float4 -DFLT16=float16 "
"-DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef -DTO_FLT4=convert_float4"
;
"-DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef -DTO_FLT4=convert_float4
"
;
}
build_options_str
=
std
::
accumulate
(
build_options
.
begin
(),
build_options
.
end
(),
build_options_str
,
[](
const
std
::
string
&
options
,
const
std
::
string
&
option
)
->
std
::
string
{
return
options
+
" "
+
option
;
});
auto
build_options_ext
=
std
::
accumulate
(
build_options
.
begin
(),
build_options
.
end
(),
std
::
string
(
""
),
[](
const
std
::
string
&
options
,
const
std
::
string
&
option
)
->
std
::
string
{
auto
res
=
options
+
" "
+
option
;
return
res
;
});
build_options_str
+=
default_build_opts_
;
// program identifier = program_name + build_options
std
::
string
build_program_key
=
program_name
+
build_options_str
;
std
::
string
build_program_key
=
program_name
+
build_options_str
+
build_options_ext
;
auto
build_program_it
=
program_map_
.
find
(
build_program_key
);
cl
::
Program
program
;
...
...
@@ -435,9 +438,7 @@ GpuInfo OpenCLRuntime::ParseGpuInfo(std::string device_name, std::string device_
bool
OpenCLRuntime
::
LoadSource
(
const
std
::
string
&
program_name
,
const
std
::
string
&
source
)
{
auto
it_source
=
g_opencl_program_map
.
find
(
program_name
);
if
(
it_source
!=
g_opencl_program_map
.
end
())
{
it_source
->
second
=
source
;
}
else
{
if
(
it_source
==
g_opencl_program_map
.
end
())
{
g_opencl_program_map
.
emplace
(
program_name
,
source
);
}
return
true
;
...
...
mindspore/lite/src/scheduler.cc
浏览文件 @
5ffb9588
...
...
@@ -16,6 +16,8 @@
#include "src/scheduler.h"
#include <vector>
#include <string>
#include <algorithm>
#include "include/errorcode.h"
#include "src/kernel_registry.h"
#include "src/common/graph_util.h"
...
...
@@ -140,7 +142,7 @@ int Scheduler::InitOp2Kernel(const lite::Model *model, std::vector<tensor::Tenso
outputs
.
emplace_back
(
tensors
->
at
(
size_t
(
outIndexes
->
GetAs
<
uint32_t
>
(
j
))));
}
auto
*
primitive
=
model
->
GetOp
(
cNode
->
name
()
->
str
());
auto
*
kernel
=
this
->
ScheduleNode
(
inputs
,
outputs
,
primitive
);
auto
*
kernel
=
this
->
ScheduleNode
(
inputs
,
outputs
,
primitive
,
cNode
);
if
(
nullptr
==
kernel
)
{
MS_LOG
(
ERROR
)
<<
"ScheduleNode return nullptr, name: "
<<
cNode
->
name
()
->
str
()
<<
", type: "
<<
schema
::
EnumNamePrimitiveType
(
cNode
->
primitive
()
->
value_type
());
...
...
@@ -176,6 +178,7 @@ void Scheduler::ConstructSubgraphs(std::vector<kernel::LiteKernel *> *kernels) {
}
std
::
vector
<
kernel
::
LiteKernel
*>
subgraph_kernels
;
size_t
sub_cnt
{
0
};
for
(
auto
temp_kernels
:
sub_kernels_list
)
{
kernel
::
KERNEL_ARCH
arch
=
temp_kernels
.
front
()
->
desc
().
arch
;
if
(
arch
==
kernel
::
KERNEL_ARCH
::
kCPU
)
{
...
...
@@ -194,7 +197,10 @@ void Scheduler::ConstructSubgraphs(std::vector<kernel::LiteKernel *> *kernels) {
}
else
{
auto
subgraph_kernel
=
CreateSubKernel
(
temp_kernels
,
arch
);
subgraph_kernels
.
emplace_back
(
subgraph_kernel
);
std
::
string
arch_name
=
(
arch
==
kernel
::
KERNEL_ARCH
::
kGPU
)
?
"GPU"
:
"NPU"
;
MS_LOG
(
INFO
)
<<
arch_name
<<
" subgraph id"
<<
sub_cnt
<<
" created."
;
}
++
sub_cnt
;
}
kernels
->
clear
();
kernels
->
insert
(
kernels
->
begin
(),
subgraph_kernels
.
begin
(),
subgraph_kernels
.
end
());
...
...
@@ -223,7 +229,7 @@ kernel::LiteKernel *Scheduler::CreateSubKernel(const std::vector<kernel::LiteKer
kernel
::
LiteKernel
*
Scheduler
::
ScheduleNode
(
const
std
::
vector
<
tensor
::
Tensor
*>
&
in_tensors
,
const
std
::
vector
<
tensor
::
Tensor
*>
&
out_tensors
,
const
mindspore
::
lite
::
PrimitiveC
*
primitive
)
{
const
mindspore
::
lite
::
PrimitiveC
*
primitive
,
const
schema
::
CNode
*
cnode
)
{
MS_ASSERT
(
nullptr
!=
primitive
);
auto
data_type
=
in_tensors
.
front
()
->
data_type
();
kernel
::
KernelKey
desc
{
kernel
::
KERNEL_ARCH
::
kCPU
,
data_type
,
static_cast
<
schema
::
PrimitiveType
>
(
primitive
->
Type
())};
...
...
@@ -233,6 +239,10 @@ kernel::LiteKernel *Scheduler::ScheduleNode(const std::vector<tensor::Tensor *>
if
(
nullptr
!=
kernel
)
{
kernel
->
set_desc
(
desc
);
return
kernel
;
}
else
{
MS_LOG
(
ERROR
)
<<
"Not supported GPU Op "
<<
schema
::
EnumNamePrimitiveType
(
static_cast
<
schema
::
PrimitiveType
>
(
primitive
->
Type
()))
<<
" "
<<
(
cnode
->
name
()
->
str
());
}
}
...
...
mindspore/lite/src/scheduler.h
浏览文件 @
5ffb9588
...
...
@@ -35,7 +35,8 @@ class Scheduler {
protected:
kernel
::
LiteKernel
*
ScheduleNode
(
const
std
::
vector
<
tensor
::
Tensor
*>
&
in_tensors
,
const
std
::
vector
<
tensor
::
Tensor
*>
&
out_tensors
,
const
mindspore
::
lite
::
PrimitiveC
*
primitive
);
const
mindspore
::
lite
::
PrimitiveC
*
primitive
,
const
schema
::
CNode
*
cnode
);
private:
int
InitOp2Kernel
(
const
lite
::
Model
*
model
,
std
::
vector
<
tensor
::
Tensor
*>
*
tensors
,
...
...
mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc
浏览文件 @
5ffb9588
...
...
@@ -208,7 +208,7 @@ TEST_F(TestConvolutionDwOpenCL, NoPadNC4HW4Fp32) {
float
gnd_data
[]
=
{
3.3848767
,
1.4446403
,
1.8428744
,
1.3194335
,
2.5873442
,
2.1384869
,
2.04022
,
1.1872686
,
2.2294958
,
1.6570128
,
2.465089
,
1.4294086
,
2.7941442
,
1.7871612
,
2.188921
,
1.0601988
};
DepthWiseTestMain
<
float
,
float
>
(
conv_param
.
get
(),
input_data
,
weight_data
,
gnd_data
,
schema
::
Format_NC4HW4
);
DepthWiseTestMain
<
float
,
float
>
(
conv_param
.
release
(),
input_data
,
weight_data
,
gnd_data
,
schema
::
Format_NC4HW4
);
}
TEST_F
(
TestConvolutionDwOpenCL
,
PadNC4HW4Fp32
)
{
...
...
@@ -280,7 +280,7 @@ TEST_F(TestConvolutionDwOpenCL, PadNC4HW4Fp32) {
0.8749627
,
0.8953936
,
0.5093431
,
1.5496738
,
0.54936385
,
0.7683113
,
1.165742
,
1.3682933
,
1.0517888
,
0.59817517
,
0.75649744
,
1.2075498
,
0.38804203
};
DepthWiseTestMain
<
float
,
float
>
(
conv_param
.
get
(),
input_data
,
weight_data
,
gnd_data
,
schema
::
Format_NC4HW4
);
DepthWiseTestMain
<
float
,
float
>
(
conv_param
.
release
(),
input_data
,
weight_data
,
gnd_data
,
schema
::
Format_NC4HW4
);
}
TEST_F
(
TestConvolutionDwOpenCL
,
NoPadNHWC4Fp32
)
{
...
...
@@ -325,7 +325,8 @@ TEST_F(TestConvolutionDwOpenCL, NoPadNHWC4Fp32) {
float
gnd_data
[]
=
{
3.3848767
,
1.4446403
,
1.8428744
,
1.3194335
,
2.5873442
,
2.1384869
,
2.04022
,
1.1872686
,
2.2294958
,
1.6570128
,
2.465089
,
1.4294086
,
2.7941442
,
1.7871612
,
2.188921
,
1.0601988
};
DepthWiseTestMain
<
float
,
float
>
(
conv_param
.
get
(),
input_data
,
weight_data
,
gnd_data
,
schema
::
Format_NHWC4
);
DepthWiseTestMain
<
float
,
float
>
(
conv_param
.
release
(),
input_data
,
weight_data
,
gnd_data
,
schema
::
Format_NHWC4
);
// delete conv_param;
}
TEST_F
(
TestConvolutionDwOpenCL
,
PadNHWC4Fp32
)
{
...
...
@@ -397,7 +398,7 @@ TEST_F(TestConvolutionDwOpenCL, PadNHWC4Fp32) {
0.8749627
,
0.8953936
,
0.5093431
,
1.5496738
,
0.54936385
,
0.7683113
,
1.165742
,
1.3682933
,
1.0517888
,
0.59817517
,
0.75649744
,
1.2075498
,
0.38804203
};
DepthWiseTestMain
<
float
,
float
>
(
conv_param
.
get
(),
input_data
,
weight_data
,
gnd_data
,
schema
::
Format_NHWC4
);
DepthWiseTestMain
<
float
,
float
>
(
conv_param
.
release
(),
input_data
,
weight_data
,
gnd_data
,
schema
::
Format_NHWC4
);
}
TEST_F
(
TestConvolutionDwOpenCL
,
NoPadNHWC4Fp16
)
{
...
...
@@ -444,7 +445,7 @@ TEST_F(TestConvolutionDwOpenCL, NoPadNHWC4Fp16) {
2.2294958
,
1.6570128
,
2.465089
,
1.4294086
,
2.7941442
,
1.7871612
,
2.188921
,
1.0601988
};
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
()
->
SetFp16Enable
(
true
);
DepthWiseTestMain
<
float16_t
,
float16_t
>
(
conv_param
.
get
(),
input_data
,
weight_data
,
gnd_data
,
schema
::
Format_NHWC4
,
DepthWiseTestMain
<
float16_t
,
float16_t
>
(
conv_param
.
release
(),
input_data
,
weight_data
,
gnd_data
,
schema
::
Format_NHWC4
,
kNumberTypeFloat16
,
true
,
1e-2
);
}
...
...
@@ -518,7 +519,7 @@ TEST_F(TestConvolutionDwOpenCL, PadNHWC4Fp16) {
1.0517888
,
0.59817517
,
0.75649744
,
1.2075498
,
0.38804203
};
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
()
->
SetFp16Enable
(
true
);
DepthWiseTestMain
<
float16_t
,
float16_t
>
(
conv_param
.
get
(),
input_data
,
weight_data
,
gnd_data
,
schema
::
Format_NHWC4
,
DepthWiseTestMain
<
float16_t
,
float16_t
>
(
conv_param
.
release
(),
input_data
,
weight_data
,
gnd_data
,
schema
::
Format_NHWC4
,
kNumberTypeFloat16
,
true
,
1e-2
);
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录