Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
05529f39
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
05529f39
编写于
8月 17, 2020
作者:
W
wangdongxu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
opencl convolution kernel support winograd
上级
52e2d925
变更
5
展开全部
隐藏空白更改
内联
并排
Showing
5 changed file
with
572 addition
and
181 deletion
+572
-181
mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc
...pore/lite/src/runtime/kernel/opencl/kernel/convolution.cc
+468
-126
mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.h
...spore/lite/src/runtime/kernel/opencl/kernel/convolution.h
+50
-3
mindspore/lite/src/runtime/opencl/opencl_allocator.cc
mindspore/lite/src/runtime/opencl/opencl_allocator.cc
+1
-0
mindspore/lite/src/runtime/opencl/opencl_runtime.cc
mindspore/lite/src/runtime/opencl/opencl_runtime.cc
+7
-0
mindspore/lite/test/ut/src/runtime/kernel/opencl/convolution_tests.cc
...te/test/ut/src/runtime/kernel/opencl/convolution_tests.cc
+46
-52
未找到文件。
mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc
浏览文件 @
05529f39
此差异已折叠。
点击以展开。
mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.h
浏览文件 @
05529f39
...
@@ -40,12 +40,59 @@ class ConvolutionOpenCLKernel : public OpenCLKernel {
...
@@ -40,12 +40,59 @@ class ConvolutionOpenCLKernel : public OpenCLKernel {
int
GetImageSize
(
size_t
idx
,
std
::
vector
<
size_t
>
*
img_size
)
override
;
int
GetImageSize
(
size_t
idx
,
std
::
vector
<
size_t
>
*
img_size
)
override
;
private:
private:
int
CI_SLICES
;
int
CO_SLICES
;
float
*
packed_weight_
=
nullptr
;
float
*
packed_weight_
=
nullptr
;
float
*
packed_bias_
=
nullptr
;
float
*
packed_bias_
=
nullptr
;
cl
::
Kernel
kernel_
;
std
::
string
CodeGen
();
bool
use_winograd_
=
false
;
int
GetGlobalLocal
(
std
::
vector
<
size_t
>
*
global
,
std
::
vector
<
size_t
>
*
local
);
int
TILES_X
;
int
TILES_Y
;
int
TILES_XY
;
void
*
winograd_mem0_
=
nullptr
;
void
*
winograd_mem1_
=
nullptr
;
cl
::
Kernel
kernel_4x4to36
;
cl
::
Kernel
kernel_conv
;
cl
::
Kernel
kernel_36to4x4
;
std
::
string
CodeGenConvolution
();
std
::
string
CodeGenWinograd4x4To36
();
std
::
string
CodeGenWinogradConvolution
();
std
::
string
CodeGenWinograd36To4x4
();
int
SetGlobalLocalConv
(
std
::
vector
<
size_t
>
*
global
,
std
::
vector
<
size_t
>
*
local
);
bool
UseWinograd4x4To6x6
()
{
auto
param
=
reinterpret_cast
<
ConvParameter
*>
(
op_parameter_
);
const
bool
attr_valid
=
param
->
kernel_h_
==
3
&&
param
->
kernel_w_
==
3
&&
param
->
dilation_h_
==
1
&&
param
->
dilation_w_
==
1
&&
param
->
stride_h_
==
1
&&
param
->
stride_w_
==
1
;
const
bool
channel_good
=
CO_SLICES
%
4
==
0
&&
CI_SLICES
>=
16
&&
CO_SLICES
>=
16
;
const
bool
hw_good
=
TILES_X
*
TILES_Y
>=
32
;
return
attr_valid
&&
channel_good
&&
hw_good
;
}
std
::
vector
<
float
>
MatrixMultiply
(
const
std
::
vector
<
float
>
&
A
,
const
std
::
vector
<
float
>
&
B
,
int
M
,
int
N
,
int
K
)
{
std
::
vector
<
float
>
C
(
M
*
K
);
for
(
int
i
=
0
;
i
<
M
;
++
i
)
{
for
(
int
j
=
0
;
j
<
K
;
++
j
)
{
float
s
=
0.0
f
;
for
(
int
k
=
0
;
k
<
N
;
++
k
)
{
s
+=
A
[
i
*
N
+
k
]
*
B
[
k
*
K
+
j
];
}
C
[
i
*
K
+
j
]
=
s
;
}
}
return
C
;
}
static
int
GetBiggestDivider
(
int
x
,
int
y
)
{
for
(
int
i
=
y
;
i
!=
0
;
i
--
)
{
if
(
x
%
i
==
0
)
{
return
i
;
}
}
return
1
;
}
};
};
}
// namespace mindspore::kernel
}
// namespace mindspore::kernel
...
...
mindspore/lite/src/runtime/opencl/opencl_allocator.cc
浏览文件 @
05529f39
...
@@ -113,6 +113,7 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t> &img_size)
...
@@ -113,6 +113,7 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t> &img_size)
UnLock
();
UnLock
();
return
nullptr
;
return
nullptr
;
}
}
MS_LOG
(
DEBUG
)
<<
"Malloc a new Image2D, width="
<<
img_size
[
0
]
<<
", height="
<<
img_size
[
1
];
image_ptr
=
static_cast
<
void
*>
(
image
);
image_ptr
=
static_cast
<
void
*>
(
image
);
}
}
}
}
...
...
mindspore/lite/src/runtime/opencl/opencl_runtime.cc
浏览文件 @
05529f39
...
@@ -71,6 +71,10 @@ void OpenCLRuntime::DeleteInstance() {
...
@@ -71,6 +71,10 @@ void OpenCLRuntime::DeleteInstance() {
OpenCLRuntime
::
OpenCLRuntime
()
{
default_build_opts_
=
" -cl-mad-enable -cl-fast-relaxed-math -Werror"
;
}
OpenCLRuntime
::
OpenCLRuntime
()
{
default_build_opts_
=
" -cl-mad-enable -cl-fast-relaxed-math -Werror"
;
}
void
printf_callback
(
const
char
*
buffer
,
size_t
length
,
size_t
final
,
void
*
user_data
)
{
fwrite
(
buffer
,
1
,
length
,
stdout
);
}
// Init will get platforms info, get devices info, create opencl context.
// Init will get platforms info, get devices info, create opencl context.
int
OpenCLRuntime
::
Init
()
{
int
OpenCLRuntime
::
Init
()
{
std
::
unique_lock
<
std
::
mutex
>
lck
(
g_init_mtx
);
std
::
unique_lock
<
std
::
mutex
>
lck
(
g_init_mtx
);
...
@@ -147,6 +151,9 @@ int OpenCLRuntime::Init() {
...
@@ -147,6 +151,9 @@ int OpenCLRuntime::Init() {
}
}
#else
#else
MS_LOG
(
INFO
)
<<
"Create common opencl context"
;
MS_LOG
(
INFO
)
<<
"Create common opencl context"
;
// cl_context_properties context_prop[] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[0](),
// CL_PRINTF_CALLBACK_ARM, (cl_context_properties)printf_callback, 0};
// context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, context_prop, nullptr, nullptr, &err);
context_
=
std
::
make_shared
<
cl
::
Context
>
(
std
::
vector
<
cl
::
Device
>
{
*
device_
},
nullptr
,
nullptr
,
nullptr
,
&
err
);
context_
=
std
::
make_shared
<
cl
::
Context
>
(
std
::
vector
<
cl
::
Device
>
{
*
device_
},
nullptr
,
nullptr
,
nullptr
,
&
err
);
#endif
#endif
if
(
err
!=
CL_SUCCESS
)
{
if
(
err
!=
CL_SUCCESS
)
{
...
...
mindspore/lite/test/ut/src/runtime/kernel/opencl/convolution_tests.cc
浏览文件 @
05529f39
...
@@ -63,9 +63,26 @@ void MyCompareOutput(lite::tensor::Tensor *output_tensor, const std::string &fil
...
@@ -63,9 +63,26 @@ void MyCompareOutput(lite::tensor::Tensor *output_tensor, const std::string &fil
printf
(
"compare success!
\n\n\n
"
);
printf
(
"compare success!
\n\n\n
"
);
}
}
void
TEST_MAIN
(
ConvParameter
*
param
,
schema
::
Format
data_format
,
const
std
::
string
&
input_file
,
void
TEST_MAIN
(
schema
::
Format
input_format
,
schema
::
Format
output_format
,
const
std
::
string
&
data_path
,
const
std
::
string
&
weight_file
,
const
std
::
string
&
bias_file
,
const
std
::
string
&
expect_file
)
{
std
::
string
attr_str
)
{
assert
(
data_format
==
schema
::
Format_NHWC
||
data_format
==
schema
::
Format_NHWC4
);
assert
(
data_format
==
schema
::
Format_NHWC
||
data_format
==
schema
::
Format_NHWC4
);
auto
param
=
new
ConvParameter
;
sscanf
(
attr_str
.
c_str
(),
"inputNHWC_%dx%dx%dx%d_outputNHWC_%dx%dx%dx%d_kernelHW_%dx%d_strideHW_%dx%d_padTopBottomLeftRight_%dx%dx%dx%d_"
"dilationHW_%dx%d"
,
&
param
->
input_batch_
,
&
param
->
input_h_
,
&
param
->
input_w_
,
&
param
->
input_channel_
,
&
param
->
output_batch_
,
&
param
->
output_h_
,
&
param
->
output_w_
,
&
param
->
output_channel_
,
&
param
->
kernel_h_
,
&
param
->
kernel_w_
,
&
param
->
stride_h_
,
&
param
->
stride_w_
,
&
param
->
pad_u_
,
&
param
->
pad_d_
,
&
param
->
pad_l_
,
&
param
->
pad_r_
,
&
param
->
dilation_h_
,
&
param
->
dilation_w_
);
auto
testcase_path
=
data_path
+
"/"
+
attr_str
+
"/"
;
auto
input_file
=
testcase_path
+
(
input_format
==
schema
::
Format_NHWC4
?
"input_NHWC4.bin"
:
"input_NHWC.bin"
);
auto
weight_file
=
testcase_path
+
"weight_OHWI.bin"
;
auto
bias_file
=
testcase_path
+
"bias_C4.bin"
;
auto
expect_file
=
testcase_path
+
(
output_format
==
schema
::
Format_NHWC4
?
"expect_NHWC4.bin"
:
"expect_NHWC.bin"
);
std
::
cout
<<
input_file
<<
std
::
endl
;
std
::
cout
<<
weight_file
<<
std
::
endl
;
std
::
cout
<<
bias_file
<<
std
::
endl
;
std
::
cout
<<
expect_file
<<
std
::
endl
;
std
::
cout
<<
"initialize OpenCLRuntime"
;
std
::
cout
<<
"initialize OpenCLRuntime"
;
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
...
@@ -79,10 +96,10 @@ void TEST_MAIN(ConvParameter *param, schema::Format data_format, const std::stri
...
@@ -79,10 +96,10 @@ void TEST_MAIN(ConvParameter *param, schema::Format data_format, const std::stri
std
::
vector
<
int
>
output_shape
=
{
param
->
output_batch_
,
param
->
output_h_
,
param
->
output_w_
,
param
->
output_channel_
};
std
::
vector
<
int
>
output_shape
=
{
param
->
output_batch_
,
param
->
output_h_
,
param
->
output_w_
,
param
->
output_channel_
};
auto
data_type
=
kNumberTypeFloat32
;
auto
data_type
=
kNumberTypeFloat32
;
auto
tensorType
=
schema
::
NodeType_ValueNode
;
auto
tensorType
=
schema
::
NodeType_ValueNode
;
auto
input_tensor
=
new
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
data
_format
,
tensorType
);
auto
input_tensor
=
new
lite
::
tensor
::
Tensor
(
data_type
,
input_shape
,
input
_format
,
tensorType
);
auto
weight_tensor
=
new
lite
::
tensor
::
Tensor
(
data_type
,
weight_shape
,
schema
::
Format_KHWC
,
tensorType
);
auto
weight_tensor
=
new
lite
::
tensor
::
Tensor
(
data_type
,
weight_shape
,
schema
::
Format_KHWC
,
tensorType
);
auto
bias_tensor
=
new
lite
::
tensor
::
Tensor
(
data_type
,
bias_shape
,
schema
::
Format_KHWC
,
tensorType
);
auto
bias_tensor
=
new
lite
::
tensor
::
Tensor
(
data_type
,
bias_shape
,
schema
::
Format_KHWC
,
tensorType
);
auto
output_tensor
=
new
lite
::
tensor
::
Tensor
(
data_type
,
output_shape
,
data
_format
,
tensorType
);
auto
output_tensor
=
new
lite
::
tensor
::
Tensor
(
data_type
,
output_shape
,
output
_format
,
tensorType
);
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
{
input_tensor
,
weight_tensor
,
bias_tensor
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
{
input_tensor
,
weight_tensor
,
bias_tensor
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
{
output_tensor
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
{
output_tensor
};
...
@@ -114,7 +131,6 @@ void TEST_MAIN(ConvParameter *param, schema::Format data_format, const std::stri
...
@@ -114,7 +131,6 @@ void TEST_MAIN(ConvParameter *param, schema::Format data_format, const std::stri
std
::
cout
<<
"sub_graph->Run()"
;
std
::
cout
<<
"sub_graph->Run()"
;
sub_graph
->
Run
();
sub_graph
->
Run
();
printf
(
"output_tensor->Size() =%zu
\n
"
,
output_tensor
->
Size
());
std
::
cout
<<
"compare result"
;
std
::
cout
<<
"compare result"
;
MyCompareOutput
(
output_tensor
,
expect_file
);
MyCompareOutput
(
output_tensor
,
expect_file
);
...
@@ -131,57 +147,35 @@ void TEST_MAIN(ConvParameter *param, schema::Format data_format, const std::stri
...
@@ -131,57 +147,35 @@ void TEST_MAIN(ConvParameter *param, schema::Format data_format, const std::stri
mindspore
::
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
mindspore
::
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
}
std
::
array
<
std
::
string
,
4
>
GenFilenames
(
ConvParameter
*
param
,
schema
::
Format
data_format
,
const
std
::
string
&
path
)
{
TEST_F
(
TestConvolutionOpenCL
,
in1x224x224x3_out1x112x112x32_k33_s22_p0101
)
{
auto
full_path
=
path
+
"inputNHWC_"
+
std
::
to_string
(
param
->
input_batch_
)
+
"x"
+
std
::
to_string
(
param
->
input_h_
)
+
TEST_MAIN
(
"x"
+
std
::
to_string
(
param
->
input_w_
)
+
"x"
+
std
::
to_string
(
param
->
input_channel_
)
+
schema
::
Format_NHWC
,
schema
::
Format_NHWC4
,
"testcases/mobilenetv2_fp32/"
,
"_outputNHWC_"
+
std
::
to_string
(
param
->
output_batch_
)
+
"x"
+
std
::
to_string
(
param
->
output_h_
)
+
"inputNHWC_1x224x224x3_outputNHWC_1x112x112x32_kernelHW_3x3_strideHW_2x2_padTopBottomLeftRight_0x1x0x1_dilationHW_"
"x"
+
std
::
to_string
(
param
->
output_w_
)
+
"x"
+
std
::
to_string
(
param
->
output_channel_
)
+
"1x1"
);
"_kernelHW_"
+
std
::
to_string
(
param
->
kernel_h_
)
+
"x"
+
std
::
to_string
(
param
->
kernel_w_
)
+
"_strideHW_"
+
std
::
to_string
(
param
->
stride_h_
)
+
"x"
+
std
::
to_string
(
param
->
stride_w_
)
+
"_padTopBottomLeftRight_"
+
std
::
to_string
(
param
->
pad_u_
)
+
"x"
+
std
::
to_string
(
param
->
pad_d_
)
+
"x"
+
std
::
to_string
(
param
->
pad_l_
)
+
"x"
+
std
::
to_string
(
param
->
pad_r_
)
+
"_dilationHW_1x1/"
;
if
(
data_format
==
schema
::
Format_NHWC4
)
{
return
std
::
array
<
std
::
string
,
4
>
{
full_path
+
"input_NHWC4.bin"
,
full_path
+
"weight_OHWI.bin"
,
full_path
+
"bias_C4.bin"
,
full_path
+
"expect_NHWC4.bin"
};
}
else
{
return
std
::
array
<
std
::
string
,
4
>
{
full_path
+
"input_NHWC.bin"
,
full_path
+
"weight_OHWI.bin"
,
full_path
+
"bias_C.bin"
,
full_path
+
"expect_NHWC.bin"
};
}
}
}
TEST_F
(
TestConvolutionOpenCL
,
in1x224x224x3_out1x112x112x32_k33_s22_p0101
)
{
// TEST_F(TestConvolutionOpenCL, in1x1x64x512_out1x1x64x7358_k11_s11_p0000) {
auto
param
=
new
ConvParameter
;
// TEST_MAIN(
param
->
input_batch_
=
1
,
param
->
input_h_
=
224
,
param
->
input_w_
=
224
,
param
->
input_channel_
=
3
;
// schema::Format_NHWC, schema::Format_NHWC4, "testcases/02_fp32/",
param
->
output_batch_
=
1
,
param
->
output_h_
=
112
,
param
->
output_w_
=
112
,
param
->
output_channel_
=
32
;
// "inputNHWC_1x1x64x512_outputNHWC_1x1x64x7358_kernelHW_1x1_strideHW_1x1_padTopBottomLeftRight_0x0x0x0_dilationHW_"
param
->
kernel_h_
=
3
,
param
->
kernel_w_
=
3
;
// "1x1");
param
->
stride_h_
=
2
,
param
->
stride_w_
=
2
;
//}
param
->
pad_u_
=
0
,
param
->
pad_d_
=
1
,
param
->
pad_l_
=
0
,
param
->
pad_r_
=
1
;
TEST_F
(
TestConvolutionOpenCL
,
winograd_inputNHWC_1x16x256x96_outputNHWC_1x16x256x80
)
{
auto
filenames
=
GenFilenames
(
param
,
schema
::
Format_NHWC4
,
"testcases/mobilenetv2_fp32/"
);
TEST_MAIN
(
schema
::
Format_NHWC
,
schema
::
Format_NHWC4
,
"testcases/test_fp32/"
,
// std::cout << filenames[0] << std::endl;
"inputNHWC_1x16x256x96_outputNHWC_1x16x256x80_kernelHW_3x3_strideHW_1x1_padTopBottomLeftRight_1x1x1x1_"
// std::cout << filenames[1] << std::endl;
"dilationHW_1x1"
);
// std::cout << filenames[2] << std::endl;
}
// std::cout << filenames[3] << std::endl;
TEST_F
(
TestConvolutionOpenCL
,
winograd_inputNHWC_1x16x256x100_outputNHWC_1x16x256x96
)
{
TEST_MAIN
(
param
,
schema
::
Format_NHWC4
,
filenames
[
0
],
filenames
[
1
],
filenames
[
2
],
filenames
[
3
]);
TEST_MAIN
(
schema
::
Format_NHWC
,
schema
::
Format_NHWC4
,
"testcases/test_fp32/"
,
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
"inputNHWC_1x16x256x100_outputNHWC_1x16x256x96_kernelHW_3x3_strideHW_1x1_padTopBottomLeftRight_1x1x1x1_"
"dilationHW_1x1"
);
}
}
TEST_F
(
TestConvolutionOpenCL
,
in1x1x64x512_out1x1x64x7358_k11_s11_p0000
)
{
TEST_F
(
TestConvolutionOpenCL
,
winograd_inputNHWC_1x480x480x128_outputNHWC_1x480x480x128
)
{
auto
param
=
new
ConvParameter
;
TEST_MAIN
(
schema
::
Format_NHWC
,
schema
::
Format_NHWC4
,
"testcases/test_fp32/"
,
param
->
input_batch_
=
1
,
param
->
input_h_
=
1
,
param
->
input_w_
=
64
,
param
->
input_channel_
=
512
;
"inputNHWC_1x480x480x128_outputNHWC_1x480x480x128_kernelHW_3x3_strideHW_1x1_padTopBottomLeftRight_"
param
->
output_batch_
=
1
,
param
->
output_h_
=
1
,
param
->
output_w_
=
64
,
param
->
output_channel_
=
7358
;
"1x1x1x1_dilationHW_1x1"
);
param
->
kernel_h_
=
1
,
param
->
kernel_w_
=
1
;
param
->
stride_h_
=
1
,
param
->
stride_w_
=
1
;
param
->
pad_u_
=
0
,
param
->
pad_d_
=
0
,
param
->
pad_l_
=
0
,
param
->
pad_r_
=
0
;
auto
filenames
=
GenFilenames
(
param
,
schema
::
Format_NHWC4
,
"testcases/02_fp32/"
);
// std::cout << filenames[0] << std::endl;
// std::cout << filenames[1] << std::endl;
// std::cout << filenames[2] << std::endl;
// std::cout << filenames[3] << std::endl;
TEST_MAIN
(
param
,
schema
::
Format_NHWC4
,
filenames
[
0
],
filenames
[
1
],
filenames
[
2
],
filenames
[
3
]);
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
}
}
// namespace mindspore
}
// namespace mindspore
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录