Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
12847919
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
12847919
编写于
8月 20, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
8月 20, 2020
浏览文件
操作
浏览文件
下载
差异文件
!4770 [MS][LITE][Develop] fix codex for opencl depthwise
Merge pull request !4770 from wandongdong/master
上级
2e936a27
877b4a97
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
179 addition
and
251 deletion
+179
-251
mindspore/lite/src/runtime/kernel/opencl/cl/fp32/arithmetic_image2d.cl
...e/src/runtime/kernel/opencl/cl/fp32/arithmetic_image2d.cl
+2
-4
mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
...lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
+0
-1
mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc
.../lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc
+13
-11
mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc
...st/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc
+164
-235
未找到文件。
mindspore/lite/src/runtime/kernel/opencl/cl/fp32/arithmetic_image2d.cl
浏览文件 @
12847919
#
define
divide_no_check
(
a,
b
)
(
a/b
)
__constant
sampler_t
smp_none
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_NONE |
CLK_FILTER_NEAREST
;
__kernel
void
ElementAdd
(
__read_only
image2d_t
input_a,
__read_only
image2d_t
input_b,
__write_only
image2d_t
output,
...
...
@@ -49,10 +50,7 @@ __kernel void ElementDiv(__read_only image2d_t input_a, __read_only image2d_t in
float4 a = read_imagef(input_a, smp_none, (int2)(X, Y));
float4 b = read_imagef(input_b, smp_none, (int2)(X, Y));
if (b == 0) {
return;
}
write_imagef(output, (int2)(X, Y), a / b);
write_imagef(output, (int2)(X, Y), divide_no_check(a, b));
}
__kernel void BoardcastArith(__read_only image2d_t input_a, float weight, float bias, __write_only image2d_t output,
...
...
mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
浏览文件 @
12847919
...
...
@@ -102,7 +102,6 @@ int DepthwiseConv2dOpenCLKernel::InitBuffer() {
allocator
->
UnmapBuffer
(
packed_weight_
);
// init bias
if
(
in_tensors_
.
size
()
==
kInputSize2
)
{
bias_data_
=
reinterpret_cast
<
FLOAT_t
*>
(
allocator
->
Malloc
(
C4NUM
*
CO4
*
sizeof
(
FLOAT_t
)));
bias_data_
=
reinterpret_cast
<
FLOAT_t
*>
(
allocator
->
MapBuffer
(
bias_data_
,
CL_MAP_WRITE
,
nullptr
,
true
));
...
...
mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc
浏览文件 @
12847919
...
...
@@ -15,7 +15,6 @@
*/
#include "src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
#include <set>
#include "src/runtime/opencl/opencl_executor.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/utils.h"
...
...
@@ -92,6 +91,8 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::tensor::Tensor *
MS_ASSERT
(
parameter
);
if
(
parameter
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"SubGraphOpenCLKernel new parameter failed!"
;
delete
new_tensor
;
new_tensor
=
nullptr
;
return
RET_ERROR
;
}
parameter
->
src_format
=
src_format
;
...
...
@@ -109,6 +110,10 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::tensor::Tensor *
MS_ASSERT
(
in_convert_op
);
if
(
in_convert_op
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"SubGraphOpenCLKernel create op failed!"
;
delete
new_tensor
;
new_tensor
=
nullptr
;
delete
parameter
;
parameter
=
nullptr
;
return
RET_ERROR
;
}
auto
in_opencl_op
=
reinterpret_cast
<
OpenCLKernel
*>
(
in_convert_op
);
...
...
@@ -272,16 +277,16 @@ int SubGraphOpenCLKernel::UnInit() {
delete
tensor
;
}
}
for
(
const
auto
parameter
:
in_parameters_
)
{
if
(
parameter
!=
nullptr
)
{
delete
parameter
;
}
}
for
(
const
auto
op
:
in_convert_ops_
)
{
if
(
op
!=
nullptr
)
{
delete
op
;
}
}
for
(
const
auto
parameter
:
in_parameters_
)
{
if
(
parameter
!=
nullptr
)
{
delete
parameter
;
}
}
return
RET_OK
;
}
...
...
@@ -290,18 +295,15 @@ int SubGraphOpenCLKernel::InferShape() { return RET_OK; }
int
SubGraphOpenCLKernel
::
ReSize
()
{
return
RET_OK
;
}
int
SubGraphOpenCLKernel
::
Run
()
{
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
for
(
auto
&
tensor
:
in_tensors_
)
{
allocator_
->
UnmapBuffer
(
tensor
->
Data
());
}
lite
::
opencl
::
OpenCLExecutor
executor
;
executor
.
Run
(
in_tensors_
,
out_tensors_
,
nodes_
,
allocator_
);
ocl_runtime
->
SyncCommandQueue
();
for
(
auto
&
tensor
:
out_tensors_
)
{
void
*
data
=
allocator_
->
MapBuffer
(
tensor
->
Data
(),
CL_MAP_READ
,
nullptr
,
true
);
tensor
->
SetData
(
data
);
}
return
RET_OK
;
}
}
// namespace mindspore::kernel
mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc
100755 → 100644
浏览文件 @
12847919
...
...
@@ -23,24 +23,12 @@
#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
#include "mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h"
#define SAFE_DELETE_ARRAY(a) \
if
(
a
!=
nullptr
)
{
\
delete
[]
a
;
\
a
=
nullptr
;
\
}
#define SAFE_DELETE_PTR(a) \
if
(
a
!=
nullptr
)
{
\
delete
a
;
\
a
=
nullptr
;
\
}
bool
IMAGE2D_OPEN
=
true
;
namespace
mindspore
{
class
TestConvolutionDwOpenCL
:
public
mindspore
::
CommonTest
{
public:
TestConvolutionDwOpenCL
(){}
TestConvolutionDwOpenCL
()
{}
};
void
DepthWiseTestMain
(
ConvParameter
*
conv_param
,
float_t
*
input_data
,
float_t
*
weight_data
,
float_t
*
gnd_data
,
...
...
@@ -52,13 +40,16 @@ void DepthWiseTestMain(ConvParameter *conv_param, float_t *input_data, float_t *
// pack input
int
IC4
=
UP_DIV
(
conv_param
->
input_channel_
,
C4NUM
);
int
pack_input_size
=
C4NUM
*
IC4
*
conv_param
->
input_h_
*
conv_param
->
input_w_
;
float
*
packed_input
=
new
float
[
pack_input_size
];
memset
(
packed_input
,
0
,
pack_input_size
*
sizeof
(
float
));
auto
packed_input
=
std
::
make_unique
<
float
>
(
pack_input_size
);
if
(
packed_input
.
get
()
==
nullptr
)
{
return
;
}
memset
(
packed_input
.
get
(),
0
,
pack_input_size
*
sizeof
(
float
));
int
plane
=
conv_param
->
input_w_
*
conv_param
->
input_h_
;
if
(
format
==
schema
::
Format_NHWC4
)
{
PackNHWCToNHWC4Fp32
(
input_data
,
packed_input
,
1
,
plane
,
conv_param
->
input_channel_
);
PackNHWCToNHWC4Fp32
(
input_data
,
packed_input
.
get
()
,
1
,
plane
,
conv_param
->
input_channel_
);
}
else
{
PackNHWCToNC4HW4Fp32
(
input_data
,
packed_input
,
1
,
plane
,
conv_param
->
input_channel_
);
PackNHWCToNC4HW4Fp32
(
input_data
,
packed_input
.
get
()
,
1
,
plane
,
conv_param
->
input_channel_
);
}
// pack weight
...
...
@@ -77,61 +68,62 @@ void DepthWiseTestMain(ConvParameter *conv_param, float_t *input_data, float_t *
std
::
vector
<
int
>
shape_bias
=
{
conv_param
->
output_channel_
};
std
::
vector
<
int
>
shape_out
=
{
conv_param
->
output_batch_
,
conv_param
->
output_h_
,
conv_param
->
output_w_
,
conv_param
->
output_channel_
};
lite
::
tensor
::
Tensor
*
tensor_a
=
new
lite
::
tensor
::
Tensor
(
TypeId
(
kNumberTypeFloat32
),
shape_in
,
format
);
// Note!!!actual is NHWC4
lite
::
tensor
::
Tensor
*
tensor_b
=
new
lite
::
tensor
::
Tensor
(
TypeId
(
kNumberTypeFloat32
),
shape_filter
,
schema
::
Format_NHWC
);
lite
::
tensor
::
Tensor
*
tensor_c
=
new
lite
::
tensor
::
Tensor
(
TypeId
(
kNumberTypeFloat32
),
shape_bias
,
schema
::
Format_NHWC
);
lite
::
tensor
::
Tensor
*
tensor_d
=
new
lite
::
tensor
::
Tensor
(
TypeId
(
kNumberTypeFloat32
),
shape_out
,
format
);
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
{
tensor_a
,
tensor_b
,
tensor_c
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
{
tensor_d
};
auto
tensor_a
=
std
::
make_unique
<
lite
::
tensor
::
Tensor
>
(
TypeId
(
kNumberTypeFloat32
),
shape_in
,
format
);
// Note!!!actual is NHWC4
auto
tensor_b
=
std
::
make_unique
<
lite
::
tensor
::
Tensor
>
(
TypeId
(
kNumberTypeFloat32
),
shape_filter
,
schema
::
Format_NHWC
);
auto
tensor_c
=
std
::
make_unique
<
lite
::
tensor
::
Tensor
>
(
TypeId
(
kNumberTypeFloat32
),
shape_bias
,
schema
::
Format_NHWC
);
auto
tensor_d
=
std
::
make_unique
<
lite
::
tensor
::
Tensor
>
(
TypeId
(
kNumberTypeFloat32
),
shape_out
,
format
);
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
{
tensor_a
.
get
(),
tensor_b
.
get
(),
tensor_c
.
get
()};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
{
tensor_d
.
get
()};
if
(
tensor_a
.
get
()
==
nullptr
||
tensor_b
.
get
()
==
nullptr
||
tensor_c
.
get
()
==
nullptr
||
tensor_d
.
get
()
==
nullptr
)
{
return
;
}
// freamework to do!!!
inputs
[
1
]
->
SetData
(
packed_weight
);
inputs
[
2
]
->
SetData
(
bias_data
);
OpParameter
*
parameter
=
reinterpret_cast
<
OpParameter
*>
(
conv_param
);
auto
*
pKernel
=
new
kernel
::
DepthwiseConv2dOpenCLKernel
(
parameter
,
inputs
,
outputs
);
OpParameter
*
parameter
=
reinterpret_cast
<
OpParameter
*>
(
conv_param
);
auto
pKernel
=
std
::
make_unique
<
kernel
::
DepthwiseConv2dOpenCLKernel
>
(
parameter
,
inputs
,
outputs
);
if
(
pKernel
.
get
()
==
nullptr
)
{
return
;
}
pKernel
->
Init
();
std
::
vector
<
kernel
::
LiteKernel
*>
kernels
{
pKernel
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs_
{
tensor_a
};
std
::
vector
<
kernel
::
LiteKernel
*>
kernels
{
pKernel
.
get
()
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs_
{
tensor_a
.
get
()
};
size_t
C4
=
UP_DIV
(
inputs
[
0
]
->
Channel
(),
C4NUM
);
// if (IMAGE2D_OPEN && format == schema::Format_NHWC4) {
// std::vector<size_t> img_size{inputs[0]->Width() * C4, (size_t)inputs[0]->Height(), CL_FLOAT};
// auto in_data = allocator->Malloc(inputs[0]->Size(), img_size);
// inputs[0]->SetData(in_data);
// } else if (IMAGE2D_OPEN && format == schema::Format_NC4HW4) {
// std::vector<size_t> img_size{(size_t)inputs[0]->Width(), inputs[0]->Height() * C4, CL_FLOAT};
// auto in_data = allocator->Malloc(inputs[0]->Size(), img_size);
// inputs[0]->SetData(in_data);
// } else {
inputs
[
0
]
->
MallocData
(
allocator
);
// }
auto
*
pGraph
=
new
kernel
::
SubGraphOpenCLKernel
(
inputs_
,
outputs
,
kernels
,
kernels
,
kernels
);
auto
pGraph
=
std
::
make_unique
<
kernel
::
SubGraphOpenCLKernel
>
(
inputs_
,
outputs
,
kernels
,
kernels
,
kernels
);
if
(
pKernel
.
get
()
==
nullptr
)
{
return
;
}
pGraph
->
Init
();
// freamework to do!!!
memcpy
(
inputs
[
0
]
->
Data
(),
packed_input
,
sizeof
(
float
)
*
pack_input_size
);
memcpy
(
inputs
[
0
]
->
Data
(),
packed_input
.
get
()
,
sizeof
(
float
)
*
pack_input_size
);
pGraph
->
Run
();
if
(
is_compare
)
{
float_t
*
packed_output
=
reinterpret_cast
<
float
*>
(
outputs
[
0
]
->
Data
());
float_t
*
packed_correct_data
=
new
float_t
[
packed_output_size
];
memset
(
packed_correct_data
,
0
,
packed_output_size
*
sizeof
(
float_t
));
float_t
*
packed_output
=
reinterpret_cast
<
float
*>
(
outputs
[
0
]
->
Data
());
auto
packed_correct_data
=
std
::
make_unique
<
float_t
>
(
packed_output_size
);
if
(
packed_correct_data
)
{
return
;
}
memset
(
packed_correct_data
.
get
(),
0
,
packed_output_size
*
sizeof
(
float_t
));
if
(
format
==
schema
::
Format_NC4HW4
)
{
PackNHWCToNC4HW4Fp32
(
gnd_data
,
packed_correct_data
,
conv_param
->
output_batch_
,
PackNHWCToNC4HW4Fp32
(
gnd_data
,
packed_correct_data
.
get
()
,
conv_param
->
output_batch_
,
conv_param
->
output_h_
*
conv_param
->
output_w_
,
conv_param
->
output_channel_
);
}
else
{
PackNHWCToNHWC4Fp32
(
gnd_data
,
packed_correct_data
,
conv_param
->
output_batch_
,
PackNHWCToNHWC4Fp32
(
gnd_data
,
packed_correct_data
.
get
()
,
conv_param
->
output_batch_
,
conv_param
->
output_h_
*
conv_param
->
output_w_
,
conv_param
->
output_channel_
);
}
printf
(
"==================input_data=================
\n
"
);
std
::
cout
<<
std
::
endl
;
for
(
int
i
=
0
;
i
<
pack_input_size
;
i
++
)
{
std
::
cout
<<
packed_input
[
i
]
<<
", "
;
std
::
cout
<<
packed_input
.
get
()
[
i
]
<<
", "
;
}
std
::
cout
<<
std
::
endl
;
printf
(
"==================weight data=================
\n
"
);
...
...
@@ -142,36 +134,26 @@ void DepthWiseTestMain(ConvParameter *conv_param, float_t *input_data, float_t *
std
::
cout
<<
std
::
endl
;
printf
(
"==================output data=================
\n
"
);
std
::
cout
<<
std
::
endl
;
for
(
int
i
=
0
;
i
<
80
/*packed_output_size*/
;
i
++
)
{
for
(
int
i
=
0
;
i
<
80
/*packed_output_size*/
;
i
++
)
{
std
::
cout
<<
packed_output
[
i
]
<<
", "
;
}
std
::
cout
<<
std
::
endl
;
printf
(
"==================expected output data=================
\n
"
);
for
(
int
i
=
0
;
i
<
packed_output_size
;
i
++
)
{
std
::
cout
<<
packed_correct_data
[
i
]
<<
", "
;
std
::
cout
<<
packed_correct_data
.
get
()
[
i
]
<<
", "
;
}
std
::
cout
<<
std
::
endl
;
// compare
CommonTest
::
CompareOutputData
(
packed_output
,
packed_correct_data
,
packed_output_size
,
0.00001
);
SAFE_DELETE_ARRAY
(
packed_correct_data
)
CommonTest
::
CompareOutputData
(
packed_output
,
packed_correct_data
.
get
(),
packed_output_size
,
0.00001
);
}
inputs
[
1
]
->
SetData
(
nullptr
);
inputs
[
2
]
->
SetData
(
nullptr
);
SAFE_DELETE_ARRAY
(
packed_input
);
for
(
auto
tensor
:
inputs
)
{
SAFE_DELETE_PTR
(
tensor
)
}
for
(
auto
tensor
:
outputs
)
{
SAFE_DELETE_PTR
(
tensor
)
}
SAFE_DELETE_PTR
(
pKernel
)
SAFE_DELETE_PTR
(
pGraph
)
return
;
}
TEST_F
(
TestConvolutionDwOpenCL
,
NoPadNC4HW4Fp32
)
{
ConvParameter
*
conv_param
=
new
ConvParameter
();
auto
conv_param
=
std
::
make_unique
<
ConvParameter
>
();
{
conv_param
->
input_batch_
=
1
;
conv_param
->
input_h_
=
4
;
...
...
@@ -212,12 +194,12 @@ TEST_F(TestConvolutionDwOpenCL, NoPadNC4HW4Fp32) {
float
gnd_data
[]
=
{
3.3848767
,
1.4446403
,
1.8428744
,
1.3194335
,
2.5873442
,
2.1384869
,
2.04022
,
1.1872686
,
2.2294958
,
1.6570128
,
2.465089
,
1.4294086
,
2.7941442
,
1.7871612
,
2.188921
,
1.0601988
};
DepthWiseTestMain
(
conv_param
,
input_data
,
weight_data
,
gnd_data
,
schema
::
Format_NC4HW4
);
DepthWiseTestMain
(
conv_param
.
get
()
,
input_data
,
weight_data
,
gnd_data
,
schema
::
Format_NC4HW4
);
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
TEST_F
(
TestConvolutionDwOpenCL
,
PadNC4HW4Fp32
)
{
ConvParameter
*
conv_param
=
new
ConvParameter
();
auto
conv_param
=
std
::
make_unique
<
ConvParameter
>
();
{
conv_param
->
input_batch_
=
1
;
conv_param
->
input_h_
=
3
;
...
...
@@ -285,12 +267,12 @@ TEST_F(TestConvolutionDwOpenCL, PadNC4HW4Fp32) {
0.8749627
,
0.8953936
,
0.5093431
,
1.5496738
,
0.54936385
,
0.7683113
,
1.165742
,
1.3682933
,
1.0517888
,
0.59817517
,
0.75649744
,
1.2075498
,
0.38804203
};
DepthWiseTestMain
(
conv_param
,
input_data
,
weight_data
,
gnd_data
,
schema
::
Format_NC4HW4
);
DepthWiseTestMain
(
conv_param
.
get
()
,
input_data
,
weight_data
,
gnd_data
,
schema
::
Format_NC4HW4
);
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
TEST_F
(
TestConvolutionDwOpenCL
,
NoPadNHWC4Fp32
)
{
ConvParameter
*
conv_param
=
new
ConvParameter
();
auto
conv_param
=
std
::
make_unique
<
ConvParameter
>
();
{
conv_param
->
input_batch_
=
1
;
conv_param
->
input_h_
=
4
;
...
...
@@ -331,12 +313,12 @@ TEST_F(TestConvolutionDwOpenCL, NoPadNHWC4Fp32) {
float
gnd_data
[]
=
{
3.3848767
,
1.4446403
,
1.8428744
,
1.3194335
,
2.5873442
,
2.1384869
,
2.04022
,
1.1872686
,
2.2294958
,
1.6570128
,
2.465089
,
1.4294086
,
2.7941442
,
1.7871612
,
2.188921
,
1.0601988
};
DepthWiseTestMain
(
conv_param
,
input_data
,
weight_data
,
gnd_data
,
schema
::
Format_NHWC4
);
DepthWiseTestMain
(
conv_param
.
get
()
,
input_data
,
weight_data
,
gnd_data
,
schema
::
Format_NHWC4
);
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
TEST_F
(
TestConvolutionDwOpenCL
,
PadNHWC4Fp32
)
{
ConvParameter
*
conv_param
=
new
ConvParameter
();
auto
conv_param
=
std
::
make_unique
<
ConvParameter
>
();
{
conv_param
->
input_batch_
=
1
;
conv_param
->
input_h_
=
3
;
...
...
@@ -404,15 +386,14 @@ TEST_F(TestConvolutionDwOpenCL, PadNHWC4Fp32) {
0.8749627
,
0.8953936
,
0.5093431
,
1.5496738
,
0.54936385
,
0.7683113
,
1.165742
,
1.3682933
,
1.0517888
,
0.59817517
,
0.75649744
,
1.2075498
,
0.38804203
};
DepthWiseTestMain
(
conv_param
,
input_data
,
weight_data
,
gnd_data
,
schema
::
Format_NHWC4
);
DepthWiseTestMain
(
conv_param
.
get
()
,
input_data
,
weight_data
,
gnd_data
,
schema
::
Format_NHWC4
);
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
TEST_F
(
TestConvolutionDwOpenCL
,
ConvDwNoPadFp32
)
{
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
Init
();
ConvParameter
*
conv_param
=
new
ConvParameter
();
auto
conv_param
=
std
::
make_unique
<
ConvParameter
>
();
{
conv_param
->
input_batch_
=
1
;
conv_param
->
input_h_
=
4
;
...
...
@@ -470,29 +451,26 @@ TEST_F(TestConvolutionDwOpenCL, ConvDwNoPadFp32) {
std
::
vector
<
int
>
shape_bias
=
{
conv_param
->
output_channel_
};
std
::
vector
<
int
>
shape_out
=
{
conv_param
->
output_batch_
,
conv_param
->
output_h_
,
conv_param
->
output_w_
,
conv_param
->
output_channel_
};
lite
::
tensor
::
Tensor
*
tensor_a
=
new
lite
::
tensor
::
Tensor
(
TypeId
(
kNumberTypeFloat32
),
shape_in
,
schema
::
Format_NC4HW4
);
// Note!!!actual is NHWC4
lite
::
tensor
::
Tensor
*
tensor_b
=
new
lite
::
tensor
::
Tensor
(
TypeId
(
kNumberTypeFloat32
),
shape_filter
,
schema
::
Format_NHWC
);
lite
::
tensor
::
Tensor
*
tensor_c
=
new
lite
::
tensor
::
Tensor
(
TypeId
(
kNumberTypeFloat32
),
shape_bias
,
schema
::
Format_NHWC
);
lite
::
tensor
::
Tensor
*
tensor_d
=
new
lite
::
tensor
::
Tensor
(
TypeId
(
kNumberTypeFloat32
),
shape_out
,
schema
::
Format_NC4HW4
);
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
{
tensor_a
,
tensor_b
,
tensor_c
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
{
tensor_d
};
auto
tensor_a
=
std
::
make_unique
<
lite
::
tensor
::
Tensor
>
(
TypeId
(
kNumberTypeFloat32
),
shape_in
,
schema
::
Format_NC4HW4
);
// Note!!!actual is NHWC4
auto
tensor_b
=
std
::
make_unique
<
lite
::
tensor
::
Tensor
>
(
TypeId
(
kNumberTypeFloat32
),
shape_filter
,
schema
::
Format_NHWC
);
auto
tensor_c
=
std
::
make_unique
<
lite
::
tensor
::
Tensor
>
(
TypeId
(
kNumberTypeFloat32
),
shape_bias
,
schema
::
Format_NHWC
);
auto
tensor_d
=
std
::
make_unique
<
lite
::
tensor
::
Tensor
>
(
TypeId
(
kNumberTypeFloat32
),
shape_out
,
schema
::
Format_NC4HW4
);
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
{
tensor_a
.
get
(),
tensor_b
.
get
(),
tensor_c
.
get
()};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
{
tensor_d
.
get
()};
// freamework to do!!!
inputs
[
1
]
->
SetData
(
packed_weight
);
inputs
[
2
]
->
SetData
(
bias_data
);
OpParameter
*
parameter
=
reinterpret_cast
<
OpParameter
*>
(
conv_param
);
auto
*
pKernel
=
new
kernel
::
DepthwiseConv2dOpenCLKernel
(
parameter
,
inputs
,
outputs
);
OpParameter
*
parameter
=
reinterpret_cast
<
OpParameter
*>
(
conv_param
.
get
()
);
auto
pKernel
=
std
::
make_unique
<
kernel
::
DepthwiseConv2dOpenCLKernel
>
(
parameter
,
inputs
,
outputs
);
pKernel
->
Init
();
std
::
vector
<
kernel
::
LiteKernel
*>
kernels
{
pKernel
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs_
{
tensor_a
};
std
::
vector
<
kernel
::
LiteKernel
*>
kernels
{
pKernel
.
get
()
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs_
{
tensor_a
.
get
()
};
inputs
[
0
]
->
MallocData
();
auto
*
pGraph
=
new
kernel
::
SubGraphOpenCLKernel
(
inputs_
,
outputs
,
kernels
,
kernels
,
kernels
);
auto
pGraph
=
std
::
make_unique
<
kernel
::
SubGraphOpenCLKernel
>
(
inputs_
,
outputs
,
kernels
,
kernels
,
kernels
);
pGraph
->
Init
();
// freamework to do!!!
...
...
@@ -533,14 +511,6 @@ TEST_F(TestConvolutionDwOpenCL, ConvDwNoPadFp32) {
inputs
[
1
]
->
SetData
(
nullptr
);
inputs
[
2
]
->
SetData
(
nullptr
);
for
(
auto
tensor
:
inputs
)
{
SAFE_DELETE_PTR
(
tensor
)
}
for
(
auto
tensor
:
outputs
)
{
SAFE_DELETE_PTR
(
tensor
)
}
SAFE_DELETE_PTR
(
pKernel
)
SAFE_DELETE_PTR
(
pGraph
)
MS_LOG
(
INFO
)
<<
"TestConvolutionDwNoPadFp32 passed"
;
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
...
...
@@ -548,7 +518,7 @@ TEST_F(TestConvolutionDwOpenCL, ConvDwNoPadFp32) {
TEST_F
(
TestConvolutionDwOpenCL
,
ConvDwPadFp32
)
{
auto
ocl_runtime
=
lite
::
opencl
::
OpenCLRuntime
::
GetInstance
();
ocl_runtime
->
Init
();
ConvParameter
*
conv_param
=
new
ConvParameter
();
auto
conv_param
=
std
::
make_unique
<
ConvParameter
>
();
{
conv_param
->
input_batch_
=
1
;
conv_param
->
input_h_
=
3
;
...
...
@@ -589,10 +559,10 @@ TEST_F(TestConvolutionDwOpenCL, ConvDwPadFp32) {
// pack input
int
IC4
=
UP_DIV
(
conv_param
->
input_channel_
,
C4NUM
);
int
pack_input_size
=
C4NUM
*
IC4
*
conv_param
->
input_h_
*
conv_param
->
input_w_
;
float
*
packed_input
=
new
float
[
pack_input_size
]
;
memset
(
packed_input
,
0
,
pack_input_size
*
sizeof
(
float
));
auto
packed_input
=
std
::
make_unique
<
float
>
(
pack_input_size
)
;
memset
(
packed_input
.
get
()
,
0
,
pack_input_size
*
sizeof
(
float
));
int
plane
=
conv_param
->
input_w_
*
conv_param
->
input_h_
;
PackNHWCToNC4HW4Fp32
(
input_data
,
packed_input
,
1
,
plane
,
conv_param
->
input_channel_
);
PackNHWCToNC4HW4Fp32
(
input_data
,
packed_input
.
get
()
,
1
,
plane
,
conv_param
->
input_channel_
);
// co h w ci
float
weight_data
[]
=
{
0.67063785
,
0.21038257
,
0.12892629
,
0.31542835
,
0.36371076
,
0.57019675
,
0.43860152
,
0.9883738
,
...
...
@@ -634,33 +604,30 @@ TEST_F(TestConvolutionDwOpenCL, ConvDwPadFp32) {
std
::
vector
<
int
>
shape_bias
=
{
conv_param
->
output_channel_
};
std
::
vector
<
int
>
shape_out
=
{
conv_param
->
output_batch_
,
conv_param
->
output_h_
,
conv_param
->
output_w_
,
conv_param
->
output_channel_
};
lite
::
tensor
::
Tensor
*
tensor_a
=
new
lite
::
tensor
::
Tensor
(
TypeId
(
kNumberTypeFloat32
),
shape_in
,
schema
::
Format_NC4HW4
);
// Note!!!actual is NHWC4
lite
::
tensor
::
Tensor
*
tensor_b
=
new
lite
::
tensor
::
Tensor
(
TypeId
(
kNumberTypeFloat32
),
shape_filter
,
schema
::
Format_NHWC
);
lite
::
tensor
::
Tensor
*
tensor_c
=
new
lite
::
tensor
::
Tensor
(
TypeId
(
kNumberTypeFloat32
),
shape_bias
,
schema
::
Format_NHWC
);
lite
::
tensor
::
Tensor
*
tensor_d
=
new
lite
::
tensor
::
Tensor
(
TypeId
(
kNumberTypeFloat32
),
shape_out
,
schema
::
Format_NC4HW4
);
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
{
tensor_a
,
tensor_b
,
tensor_c
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
{
tensor_d
};
auto
tensor_a
=
std
::
make_unique
<
lite
::
tensor
::
Tensor
>
(
TypeId
(
kNumberTypeFloat32
),
shape_in
,
schema
::
Format_NC4HW4
);
// Note!!!actual is NHWC4
auto
tensor_b
=
std
::
make_unique
<
lite
::
tensor
::
Tensor
>
(
TypeId
(
kNumberTypeFloat32
),
shape_filter
,
schema
::
Format_NHWC
);
auto
tensor_c
=
std
::
make_unique
<
lite
::
tensor
::
Tensor
>
(
TypeId
(
kNumberTypeFloat32
),
shape_bias
,
schema
::
Format_NHWC
);
auto
tensor_d
=
std
::
make_unique
<
lite
::
tensor
::
Tensor
>
(
TypeId
(
kNumberTypeFloat32
),
shape_out
,
schema
::
Format_NC4HW4
);
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs
{
tensor_a
.
get
(),
tensor_b
.
get
(),
tensor_c
.
get
()};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs
{
tensor_d
.
get
()};
// freamework to do!!!
inputs
[
1
]
->
SetData
(
packed_weight
);
inputs
[
2
]
->
SetData
(
bias_data
);
OpParameter
*
parameter
=
reinterpret_cast
<
OpParameter
*>
(
conv_param
);
auto
*
pKernel
=
new
kernel
::
DepthwiseConv2dOpenCLKernel
(
parameter
,
inputs
,
outputs
);
OpParameter
*
parameter
=
reinterpret_cast
<
OpParameter
*>
(
conv_param
.
get
()
);
auto
pKernel
=
std
::
make_unique
<
kernel
::
DepthwiseConv2dOpenCLKernel
>
(
parameter
,
inputs
,
outputs
);
pKernel
->
Init
();
std
::
vector
<
kernel
::
LiteKernel
*>
kernels
{
pKernel
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs_
{
tensor_a
};
std
::
vector
<
kernel
::
LiteKernel
*>
kernels
{
pKernel
.
get
()
};
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs_
{
tensor_a
.
get
()
};
inputs
[
0
]
->
MallocData
();
auto
*
pGraph
=
new
kernel
::
SubGraphOpenCLKernel
(
inputs_
,
outputs
,
kernels
,
kernels
,
kernels
);
auto
pGraph
=
std
::
make_unique
<
kernel
::
SubGraphOpenCLKernel
>
(
inputs_
,
outputs
,
kernels
,
kernels
,
kernels
);
pGraph
->
Init
();
// freamework to do!!!
memcpy
(
inputs
[
0
]
->
Data
(),
packed_input
,
sizeof
(
float
)
*
pack_input_size
);
memcpy
(
inputs
[
0
]
->
Data
(),
packed_input
.
get
()
,
sizeof
(
float
)
*
pack_input_size
);
pGraph
->
Run
();
float
*
packed_output
=
reinterpret_cast
<
float
*>
(
outputs
[
0
]
->
Data
());
...
...
@@ -672,15 +639,15 @@ TEST_F(TestConvolutionDwOpenCL, ConvDwPadFp32) {
2.3769147
,
2.3185873
,
0.6133741
,
0.9687358
,
0.9987654
,
1.0254729
,
0.8368954
,
0.74171704
,
0.8749627
,
0.8953936
,
0.5093431
,
1.5496738
,
0.54936385
,
0.7683113
,
1.165742
,
1.3682933
,
1.0517888
,
0.59817517
,
0.75649744
,
1.2075498
,
0.38804203
};
float
*
packed_correct_data
=
new
float
[
packed_output_size
]
;
memset
(
packed_correct_data
,
0
,
packed_output_size
*
sizeof
(
float
));
PackNHWCToNC4HW4Fp32
(
correct_data
,
packed_correct_data
,
conv_param
->
output_batch_
,
auto
packed_correct_data
=
std
::
make_unique
<
float
>
(
packed_output_size
)
;
memset
(
packed_correct_data
.
get
()
,
0
,
packed_output_size
*
sizeof
(
float
));
PackNHWCToNC4HW4Fp32
(
correct_data
,
packed_correct_data
.
get
()
,
conv_param
->
output_batch_
,
conv_param
->
output_h_
*
conv_param
->
output_w_
,
conv_param
->
output_channel_
);
printf
(
"==================input_data=================
\n
"
);
std
::
cout
<<
std
::
endl
;
for
(
int
i
=
0
;
i
<
pack_input_size
;
i
++
)
{
std
::
cout
<<
packed_input
[
i
]
<<
", "
;
std
::
cout
<<
packed_input
.
get
()
[
i
]
<<
", "
;
}
std
::
cout
<<
std
::
endl
;
printf
(
"==================weight data=================
\n
"
);
...
...
@@ -697,93 +664,59 @@ TEST_F(TestConvolutionDwOpenCL, ConvDwPadFp32) {
std
::
cout
<<
std
::
endl
;
printf
(
"==================expected output data=================
\n
"
);
for
(
int
i
=
0
;
i
<
packed_output_size
;
i
++
)
{
std
::
cout
<<
packed_correct_data
[
i
]
<<
", "
;
std
::
cout
<<
packed_correct_data
.
get
()
[
i
]
<<
", "
;
}
std
::
cout
<<
std
::
endl
;
// compare
CommonTest
::
CompareOutputData
(
packed_output
,
packed_correct_data
,
packed_output_size
,
0.00001
);
CommonTest
::
CompareOutputData
(
packed_output
,
packed_correct_data
.
get
()
,
packed_output_size
,
0.00001
);
inputs
[
1
]
->
SetData
(
nullptr
);
inputs
[
2
]
->
SetData
(
nullptr
);
SAFE_DELETE_ARRAY
(
packed_input
);
SAFE_DELETE_ARRAY
(
packed_correct_data
)
for
(
auto
tensor
:
inputs
)
{
SAFE_DELETE_PTR
(
tensor
)
}
for
(
auto
tensor
:
outputs
)
{
SAFE_DELETE_PTR
(
tensor
)
}
SAFE_DELETE_PTR
(
pKernel
)
SAFE_DELETE_PTR
(
pGraph
)
MS_LOG
(
INFO
)
<<
"TestConvolutionDwPadFp32 passed"
;
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
TEST_F
(
TestConvolutionDwOpenCL
,
ProfilingMobilenetv2
)
{
std
::
vector
<
std
::
vector
<
int
>>
src_shape
{
{
1
,
32
,
112
,
112
},
{
1
,
96
,
112
,
112
},
{
1
,
144
,
56
,
56
},
{
1
,
144
,
56
,
56
},
{
1
,
192
,
28
,
28
},
{
1
,
192
,
28
,
28
},
{
1
,
384
,
14
,
14
},
{
1
,
576
,
14
,
14
},
{
1
,
576
,
14
,
14
},
{
1
,
960
,
7
,
7
},
{
1
,
32
,
112
,
112
},
{
1
,
96
,
112
,
112
},
{
1
,
144
,
56
,
56
},
{
1
,
144
,
56
,
56
},
{
1
,
192
,
28
,
28
},
{
1
,
192
,
28
,
28
},
{
1
,
384
,
14
,
14
},
{
1
,
576
,
14
,
14
},
{
1
,
576
,
14
,
14
},
{
1
,
960
,
7
,
7
},
};
std
::
vector
<
std
::
vector
<
int
>>
dst_shape
{
{
1
,
32
,
112
,
112
},
{
1
,
96
,
56
,
56
},
{
1
,
144
,
56
,
56
},
{
1
,
144
,
28
,
28
},
{
1
,
192
,
28
,
28
},
{
1
,
192
,
14
,
14
},
{
1
,
384
,
14
,
14
},
{
1
,
576
,
14
,
14
},
{
1
,
576
,
7
,
7
},
{
1
,
960
,
7
,
7
},
{
1
,
32
,
112
,
112
},
{
1
,
96
,
56
,
56
},
{
1
,
144
,
56
,
56
},
{
1
,
144
,
28
,
28
},
{
1
,
192
,
28
,
28
},
{
1
,
192
,
14
,
14
},
{
1
,
384
,
14
,
14
},
{
1
,
576
,
14
,
14
},
{
1
,
576
,
7
,
7
},
{
1
,
960
,
7
,
7
},
};
std
::
vector
<
std
::
vector
<
int
>>
filter_shape
{
{
32
,
1
,
1
,
1
},
{
96
,
3
,
3
,
1
},
{
144
,
1
,
1
,
1
},
{
144
,
3
,
3
,
1
},
{
192
,
1
,
1
,
1
},
{
192
,
3
,
3
,
1
},
{
384
,
1
,
1
,
1
},
{
576
,
1
,
1
,
1
},
{
576
,
3
,
3
,
1
},
{
960
,
1
,
1
,
1
},
{
32
,
1
,
1
,
1
},
{
96
,
3
,
3
,
1
},
{
144
,
1
,
1
,
1
},
{
144
,
3
,
3
,
1
},
{
192
,
1
,
1
,
1
},
{
192
,
3
,
3
,
1
},
{
384
,
1
,
1
,
1
},
{
576
,
1
,
1
,
1
},
{
576
,
3
,
3
,
1
},
{
960
,
1
,
1
,
1
},
};
// nhwc
size_t
in_size
=
96
*
112
*
112
;
float_t
*
input_data
=
new
float_t
[
in_size
]
;
memset
(
input_data
,
0
,
in_size
);
size_t
in_size
=
96
*
112
*
112
;
auto
input_data
=
std
::
make_unique
<
float_t
>
(
in_size
)
;
memset
(
input_data
.
get
()
,
0
,
in_size
);
for
(
auto
i
=
0
;
i
<
in_size
;
++
i
)
{
input_data
[
i
]
=
1
;
input_data
.
get
()
[
i
]
=
1
;
}
// co h w ci
size_t
wt_size
=
576
*
3
*
3
;
float_t
*
weight_data
=
new
float_t
[
wt_size
]
;
memset
(
weight_data
,
0
,
wt_size
);
size_t
wt_size
=
576
*
3
*
3
;
auto
weight_data
=
std
::
make_unique
<
float_t
>
(
wt_size
)
;
memset
(
weight_data
.
get
()
,
0
,
wt_size
);
for
(
auto
i
=
0
;
i
<
wt_size
;
++
i
)
{
weight_data
[
i
]
=
1
;
}
size_t
out_size
=
96
*
112
*
112
;
float_t
*
gnd_data
=
new
float_t
[
out_size
]
;
memset
(
gnd_data
,
0
,
out_size
);
// for (auto i = 0; i < in_size; ++i) {
// gnd_data[i] = 1;
// }
weight_data
.
get
()
[
i
]
=
1
;
}
size_t
out_size
=
96
*
112
*
112
;
auto
gnd_data
=
std
::
make_unique
<
float_t
>
(
out_size
)
;
memset
(
gnd_data
.
get
()
,
0
,
out_size
);
// for (auto i = 0; i < in_size; ++i) {
// gnd_data[i] = 1;
// }
for
(
size_t
i
=
0
;
i
<
src_shape
.
size
();
++
i
)
{
const
int
MAX_RUN_TIMES
=
1
;
for
(
int
j
=
0
;
j
<
MAX_RUN_TIMES
;
++
j
)
{
printf
(
"========profiling depthwise, in shape(%d,%d,%d,%d), out shape(%d,%d,%d,%d), iter%d========
\n
"
,
src_shape
[
i
][
0
],
src_shape
[
i
][
1
],
src_shape
[
i
][
2
],
src_shape
[
i
][
3
],
dst_shape
[
i
][
0
],
dst_shape
[
i
][
1
],
dst_shape
[
i
][
2
],
dst_shape
[
i
][
3
],
j
);
ConvParameter
*
conv_param
=
new
ConvParameter
();
src_shape
[
i
][
0
],
src_shape
[
i
][
1
],
src_shape
[
i
][
2
],
src_shape
[
i
][
3
],
dst_shape
[
i
][
0
],
dst_shape
[
i
][
1
],
dst_shape
[
i
][
2
],
dst_shape
[
i
][
3
],
j
);
auto
conv_param
=
std
::
make_unique
<
ConvParameter
>
();
{
conv_param
->
input_batch_
=
1
;
conv_param
->
input_h_
=
src_shape
[
i
][
2
];
...
...
@@ -795,19 +728,17 @@ TEST_F(TestConvolutionDwOpenCL, ProfilingMobilenetv2) {
conv_param
->
output_channel_
=
dst_shape
[
i
][
1
];
conv_param
->
kernel_h_
=
filter_shape
[
i
][
1
];
conv_param
->
kernel_w_
=
filter_shape
[
i
][
2
];
conv_param
->
stride_h_
=
conv_param
->
output_h_
/
conv_param
->
input_h_
;
conv_param
->
stride_w_
=
conv_param
->
output_w_
/
conv_param
->
input_w_
;
conv_param
->
pad_h_
=
(
conv_param
->
kernel_h_
-
1
)
/
2
;
conv_param
->
pad_w_
=
(
conv_param
->
kernel_w_
-
1
)
/
2
;
conv_param
->
stride_h_
=
conv_param
->
output_h_
/
conv_param
->
input_h_
;
conv_param
->
stride_w_
=
conv_param
->
output_w_
/
conv_param
->
input_w_
;
conv_param
->
pad_h_
=
(
conv_param
->
kernel_h_
-
1
)
/
2
;
conv_param
->
pad_w_
=
(
conv_param
->
kernel_w_
-
1
)
/
2
;
conv_param
->
dilation_h_
=
1
;
conv_param
->
dilation_w_
=
1
;
}
// DepthWiseTestMain(conv_param, input_data, weight_data, gnd_data, schema::Format_NC4HW4, false);
DepthWiseTestMain
(
conv_param
,
input_data
,
weight_data
,
nullptr
,
schema
::
Format_NHWC4
,
false
);
// DepthWiseTestMain(conv_param, input_data, weight_data, gnd_data, schema::Format_NC4HW4, false);
DepthWiseTestMain
(
conv_param
.
get
(),
input_data
.
get
(),
weight_data
.
get
()
,
nullptr
,
schema
::
Format_NHWC4
,
false
);
}
}
SAFE_DELETE_ARRAY
(
input_data
);
SAFE_DELETE_ARRAY
(
weight_data
);
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
...
...
@@ -817,26 +748,26 @@ TEST_F(TestConvolutionDwOpenCL, Buffer2Image) {
std
::
vector
<
int
>
filter_shape
{
96
,
3
,
3
,
1
};
// nhwc
size_t
in_size
=
96
*
112
*
112
;
float_t
*
input_data
=
new
float_t
[
in_size
]
;
memset
(
input_data
,
0
,
in_size
);
size_t
in_size
=
96
*
112
*
112
;
auto
input_data
=
std
::
make_unique
<
float_t
>
(
in_size
)
;
memset
(
input_data
.
get
()
,
0
,
in_size
);
for
(
auto
i
=
0
;
i
<
in_size
;
++
i
)
{
input_data
[
i
]
=
1
;
input_data
.
get
()
[
i
]
=
1
;
}
// co h w ci
size_t
wt_size
=
576
*
3
*
3
;
float_t
*
weight_data
=
new
float_t
[
wt_size
]
;
memset
(
weight_data
,
0
,
wt_size
);
size_t
wt_size
=
576
*
3
*
3
;
auto
weight_data
=
std
::
make_unique
<
float_t
>
(
wt_size
)
;
memset
(
weight_data
.
get
()
,
0
,
wt_size
);
for
(
auto
i
=
0
;
i
<
wt_size
;
++
i
)
{
weight_data
[
i
]
=
1
;
}
size_t
out_size
=
96
*
112
*
112
;
float_t
*
gnd_data
=
new
float_t
[
out_size
]
;
memset
(
gnd_data
,
0
,
out_size
);
// for (auto i = 0; i < in_size; ++i) {
// gnd_data[i] = 1;
// }
ConvParameter
*
conv_param
=
new
ConvParameter
();
weight_data
.
get
()
[
i
]
=
1
;
}
size_t
out_size
=
96
*
112
*
112
;
auto
gnd_data
=
std
::
make_unique
<
float_t
>
(
out_size
)
;
memset
(
gnd_data
.
get
()
,
0
,
out_size
);
// for (auto i = 0; i < in_size; ++i) {
// gnd_data[i] = 1;
// }
auto
conv_param
=
std
::
make_unique
<
ConvParameter
>
();
{
conv_param
->
input_batch_
=
1
;
conv_param
->
input_h_
=
src_shape
[
2
];
...
...
@@ -848,17 +779,15 @@ TEST_F(TestConvolutionDwOpenCL, Buffer2Image) {
conv_param
->
output_channel_
=
dst_shape
[
1
];
conv_param
->
kernel_h_
=
filter_shape
[
1
];
conv_param
->
kernel_w_
=
filter_shape
[
2
];
conv_param
->
stride_h_
=
conv_param
->
output_h_
/
conv_param
->
input_h_
;
conv_param
->
stride_w_
=
conv_param
->
output_w_
/
conv_param
->
input_w_
;
conv_param
->
pad_h_
=
(
conv_param
->
kernel_h_
-
1
)
/
2
;
conv_param
->
pad_w_
=
(
conv_param
->
kernel_w_
-
1
)
/
2
;
conv_param
->
stride_h_
=
conv_param
->
output_h_
/
conv_param
->
input_h_
;
conv_param
->
stride_w_
=
conv_param
->
output_w_
/
conv_param
->
input_w_
;
conv_param
->
pad_h_
=
(
conv_param
->
kernel_h_
-
1
)
/
2
;
conv_param
->
pad_w_
=
(
conv_param
->
kernel_w_
-
1
)
/
2
;
conv_param
->
dilation_h_
=
1
;
conv_param
->
dilation_w_
=
1
;
}
// DepthWiseTestMain(conv_param, input_data, weight_data, gnd_data, schema::Format_NC4HW4, true);
DepthWiseTestMain
(
conv_param
,
input_data
,
weight_data
,
gnd_data
,
schema
::
Format_NHWC4
,
true
);
SAFE_DELETE_ARRAY
(
input_data
);
SAFE_DELETE_ARRAY
(
weight_data
);
// DepthWiseTestMain(conv_param, input_data, weight_data, gnd_data, schema::Format_NC4HW4, true);
DepthWiseTestMain
(
conv_param
.
get
(),
input_data
.
get
(),
weight_data
.
get
(),
gnd_data
.
get
(),
schema
::
Format_NHWC4
,
true
);
lite
::
opencl
::
OpenCLRuntime
::
DeleteInstance
();
}
}
// namespace mindspore
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录