Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
冰之2023
Mace
提交
eefc9d01
Mace
项目概览
冰之2023
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
eefc9d01
编写于
3月 29, 2018
作者:
Y
yejianwu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update cl source code format, fix pooling op core dump
上级
08a362e7
变更
42
隐藏空白更改
内联
并排
Showing
42 changed file
with
355 addition
and
362 deletion
+355
-362
mace/kernels/opencl/activation_opencl.cc
mace/kernels/opencl/activation_opencl.cc
+5
-3
mace/kernels/opencl/addn.cc
mace/kernels/opencl/addn.cc
+4
-2
mace/kernels/opencl/batch_norm_opencl.cc
mace/kernels/opencl/batch_norm_opencl.cc
+5
-3
mace/kernels/opencl/bias_add_opencl.cc
mace/kernels/opencl/bias_add_opencl.cc
+5
-3
mace/kernels/opencl/buffer_to_image.cc
mace/kernels/opencl/buffer_to_image.cc
+4
-2
mace/kernels/opencl/channel_shuffle.cc
mace/kernels/opencl/channel_shuffle.cc
+5
-3
mace/kernels/opencl/cl/activation.cl
mace/kernels/opencl/cl/activation.cl
+7
-9
mace/kernels/opencl/cl/addn.cl
mace/kernels/opencl/cl/addn.cl
+6
-8
mace/kernels/opencl/cl/batch_norm.cl
mace/kernels/opencl/cl/batch_norm.cl
+7
-9
mace/kernels/opencl/cl/bias_add.cl
mace/kernels/opencl/cl/bias_add.cl
+5
-6
mace/kernels/opencl/cl/buffer_to_image.cl
mace/kernels/opencl/cl/buffer_to_image.cl
+72
-96
mace/kernels/opencl/cl/channel_shuffle.cl
mace/kernels/opencl/cl/channel_shuffle.cl
+8
-10
mace/kernels/opencl/cl/concat.cl
mace/kernels/opencl/cl/concat.cl
+11
-15
mace/kernels/opencl/cl/conv_2d.cl
mace/kernels/opencl/cl/conv_2d.cl
+7
-9
mace/kernels/opencl/cl/conv_2d_1x1.cl
mace/kernels/opencl/cl/conv_2d_1x1.cl
+7
-9
mace/kernels/opencl/cl/conv_2d_3x3.cl
mace/kernels/opencl/cl/conv_2d_3x3.cl
+7
-9
mace/kernels/opencl/cl/depth_to_space.cl
mace/kernels/opencl/cl/depth_to_space.cl
+12
-15
mace/kernels/opencl/cl/depthwise_conv2d.cl
mace/kernels/opencl/cl/depthwise_conv2d.cl
+14
-18
mace/kernels/opencl/cl/eltwise.cl
mace/kernels/opencl/cl/eltwise.cl
+6
-8
mace/kernels/opencl/cl/matmul.cl
mace/kernels/opencl/cl/matmul.cl
+6
-8
mace/kernels/opencl/cl/pooling.cl
mace/kernels/opencl/cl/pooling.cl
+7
-8
mace/kernels/opencl/cl/resize_bilinear.cl
mace/kernels/opencl/cl/resize_bilinear.cl
+7
-8
mace/kernels/opencl/cl/slice.cl
mace/kernels/opencl/cl/slice.cl
+5
-7
mace/kernels/opencl/cl/softmax.cl
mace/kernels/opencl/cl/softmax.cl
+6
-8
mace/kernels/opencl/cl/space_to_batch.cl
mace/kernels/opencl/cl/space_to_batch.cl
+14
-18
mace/kernels/opencl/cl/winograd_transform.cl
mace/kernels/opencl/cl/winograd_transform.cl
+12
-16
mace/kernels/opencl/concat.cc
mace/kernels/opencl/concat.cc
+10
-6
mace/kernels/opencl/conv_2d_opencl_1x1.cc
mace/kernels/opencl/conv_2d_opencl_1x1.cc
+5
-3
mace/kernels/opencl/conv_2d_opencl_3x3.cc
mace/kernels/opencl/conv_2d_opencl_3x3.cc
+5
-3
mace/kernels/opencl/conv_2d_opencl_general.cc
mace/kernels/opencl/conv_2d_opencl_general.cc
+5
-3
mace/kernels/opencl/depth_to_space_opencl.cc
mace/kernels/opencl/depth_to_space_opencl.cc
+10
-9
mace/kernels/opencl/depthwise_conv_opencl.cc
mace/kernels/opencl/depthwise_conv_opencl.cc
+5
-3
mace/kernels/opencl/eltwise_opencl.cc
mace/kernels/opencl/eltwise_opencl.cc
+4
-2
mace/kernels/opencl/matmul.cc
mace/kernels/opencl/matmul.cc
+4
-2
mace/kernels/opencl/pooling_opencl.cc
mace/kernels/opencl/pooling_opencl.cc
+17
-3
mace/kernels/opencl/resize_bilinear_opencl.cc
mace/kernels/opencl/resize_bilinear_opencl.cc
+5
-3
mace/kernels/opencl/slice.cc
mace/kernels/opencl/slice.cc
+5
-3
mace/kernels/opencl/softmax_opencl.cc
mace/kernels/opencl/softmax_opencl.cc
+5
-3
mace/kernels/opencl/space_to_batch_opencl.cc
mace/kernels/opencl/space_to_batch_opencl.cc
+5
-3
mace/kernels/opencl/winograd_transform.cc
mace/kernels/opencl/winograd_transform.cc
+8
-4
tools/mace_tools.py
tools/mace_tools.py
+2
-1
tools/packaging_lib.sh
tools/packaging_lib.sh
+6
-1
未找到文件。
mace/kernels/opencl/activation_opencl.cc
浏览文件 @
eefc9d01
...
...
@@ -71,6 +71,11 @@ void ActivationFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
int
idx
=
0
;
if
(
!
is_non_uniform_work_groups_supported_
)
{
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
kernel_
.
setArg
(
idx
++
,
gws
[
2
]);
}
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
if
(
activation_
==
PRELU
)
{
MACE_CHECK_NOTNULL
(
alpha
);
...
...
@@ -78,9 +83,6 @@ void ActivationFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
}
kernel_
.
setArg
(
idx
++
,
static_cast
<
float
>
(
relux_max_limit_
));
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
kernel_
.
setArg
(
idx
++
,
gws
[
2
]);
input_shape_
=
input
->
shape
();
...
...
mace/kernels/opencl/addn.cc
浏览文件 @
eefc9d01
...
...
@@ -70,12 +70,14 @@ void AddNFunctor<DeviceType::OPENCL, T>::operator()(
output_tensor
->
ResizeImage
(
output_shape
,
output_image_shape
);
uint32_t
idx
=
0
;
if
(
!
is_non_uniform_work_groups_supported_
)
{
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
}
for
(
auto
input
:
input_tensors
)
{
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
}
kernel_
.
setArg
(
idx
++
,
*
(
output_tensor
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
input_shape_
=
input_tensors
[
0
]
->
shape
();
...
...
mace/kernels/opencl/batch_norm_opencl.cc
浏览文件 @
eefc9d01
...
...
@@ -75,6 +75,11 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
}
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
if
(
!
is_non_uniform_work_groups_supported_
)
{
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
kernel_
.
setArg
(
idx
++
,
gws
[
2
]);
}
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
scale
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
offset
->
opencl_image
()));
...
...
@@ -85,9 +90,6 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
}
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
relux_max_limit_
);
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
kernel_
.
setArg
(
idx
++
,
gws
[
2
]);
input_shape_
=
input
->
shape
();
...
...
mace/kernels/opencl/bias_add_opencl.cc
浏览文件 @
eefc9d01
...
...
@@ -45,12 +45,14 @@ void BiasAddFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
}
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
if
(
!
is_non_uniform_work_groups_supported_
)
{
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
kernel_
.
setArg
(
idx
++
,
gws
[
2
]);
}
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
bias
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
kernel_
.
setArg
(
idx
++
,
gws
[
2
]);
input_shape_
=
input
->
shape
();
kwg_size_
=
...
...
mace/kernels/opencl/buffer_to_image.cc
浏览文件 @
eefc9d01
...
...
@@ -87,6 +87,10 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(
obfuscated_kernel_name
,
built_options
);
uint32_t
idx
=
0
;
if
(
!
is_non_uniform_work_groups_supported
)
{
b2f_kernel
.
setArg
(
idx
++
,
gws
[
0
]);
b2f_kernel
.
setArg
(
idx
++
,
gws
[
1
]);
}
b2f_kernel
.
setArg
(
idx
++
,
*
(
buffer
->
opencl_buffer
()));
if
(
!
i2b_
)
{
MACE_CHECK
(
buffer
->
buffer_offset
()
%
GetEnumTypeSize
(
buffer
->
dtype
())
==
0
,
...
...
@@ -112,8 +116,6 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(
b2f_kernel
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
buffer
->
dim
(
3
)));
}
b2f_kernel
.
setArg
(
idx
++
,
*
(
image
->
opencl_image
()));
b2f_kernel
.
setArg
(
idx
++
,
gws
[
0
]);
b2f_kernel
.
setArg
(
idx
++
,
gws
[
1
]);
const
uint32_t
kwg_size
=
static_cast
<
uint32_t
>
(
runtime
->
GetKernelMaxWorkGroupSize
(
b2f_kernel
));
...
...
mace/kernels/opencl/channel_shuffle.cc
浏览文件 @
eefc9d01
...
...
@@ -54,13 +54,15 @@ void ChannelShuffleFunctor<DeviceType::OPENCL, T>::operator()(
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
if
(
!
is_non_uniform_work_groups_supported_
)
{
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
kernel_
.
setArg
(
idx
++
,
gws
[
2
]);
}
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
groups_
);
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
channels_per_group
));
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
kernel_
.
setArg
(
idx
++
,
gws
[
2
]);
input_shape_
=
input
->
shape
();
...
...
mace/kernels/opencl/cl/activation.cl
浏览文件 @
eefc9d01
#
include
<common.h>
__kernel
void
activation
(
__read_only
image2d_t
input,
__kernel
void
activation
(
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2,
#
endif
__read_only
image2d_t
input,
#
ifdef
USE_PRELU
__read_only
image2d_t
alpha,
#
endif
__private
const
float
relux_max_limit,
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__write_only
image2d_t
output,
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2
)
{
#
else
__write_only
image2d_t
output
)
{
#
endif
const
int
ch_blk
=
get_global_id
(
0
)
;
const
int
w
=
get_global_id
(
1
)
;
const
int
hb
=
get_global_id
(
2
)
;
...
...
mace/kernels/opencl/cl/addn.cl
浏览文件 @
eefc9d01
#
include
<common.h>
__kernel
void
addn
(
__read_only
image2d_t
input0,
/*
[c%4
*
w
*
c/4,
h
*
b]
*/
__kernel
void
addn
(
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
#
endif
__read_only
image2d_t
input0,
/*
[c%4
*
w
*
c/4,
h
*
b]
*/
__read_only
image2d_t
input1,
#
if
INPUT_NUM
>
2
__read_only
image2d_t
input2,
...
...
@@ -8,14 +13,7 @@ __kernel void addn(__read_only image2d_t input0, /* [c%4 * w * c/4, h * b] */
#
if
INPUT_NUM
>
3
__read_only
image2d_t
input3,
#
endif
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__write_only
image2d_t
output,
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1
)
{
#
else
__write_only
image2d_t
output
)
{
#
endif
const
int
w
=
get_global_id
(
0
)
;
const
int
hb
=
get_global_id
(
1
)
;
...
...
mace/kernels/opencl/cl/batch_norm.cl
浏览文件 @
eefc9d01
#
include
<common.h>
//
Supported
data
types:
half/float
__kernel
void
batch_norm
(
__read_only
image2d_t
input,
__kernel
void
batch_norm
(
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2,
#
endif
__read_only
image2d_t
input,
__read_only
image2d_t
scale,
__read_only
image2d_t
offset,
#
ifndef
FOLDED_CONSTANT
...
...
@@ -9,15 +15,7 @@ __kernel void batch_norm(__read_only image2d_t input,
__private
const
float
epsilon,
#
endif
__write_only
image2d_t
output,
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__private
const
float
relux_max_limit,
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2
)
{
#
else
__private
const
float
relux_max_limit
)
{
#
endif
const
int
ch_blk
=
get_global_id
(
0
)
;
const
int
w
=
get_global_id
(
1
)
;
const
int
hb
=
get_global_id
(
2
)
;
...
...
mace/kernels/opencl/cl/bias_add.cl
浏览文件 @
eefc9d01
#
include
<common.h>
//
Supported
data
types:
half/float
__kernel
void
bias_add
(
__read_only
image2d_t
input,
__read_only
image2d_t
bias,
__kernel
void
bias_add
(
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__write_only
image2d_t
output,
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2
)
{
#
else
__write_only
image2d_t
output
)
{
__private
const
int
global_size_dim2,
#
endif
__read_only
image2d_t
input,
__read_only
image2d_t
bias,
__write_only
image2d_t
output
)
{
const
int
ch_blk
=
get_global_id
(
0
)
;
const
int
w
=
get_global_id
(
1
)
;
const
int
hb
=
get_global_id
(
2
)
;
...
...
mace/kernels/opencl/cl/buffer_to_image.cl
浏览文件 @
eefc9d01
#
include
<common.h>
__kernel
void
filter_buffer_to_image
(
__global
const
DATA_TYPE
*input,
/*
h,
w,
oc,
ic
*/
__kernel
void
filter_buffer_to_image
(
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
#
endif
__global
const
DATA_TYPE
*input,
/*
h,
w,
oc,
ic
*/
__private
const
int
input_offset,
__private
const
int
filter_h,
__private
const
int
filter_w,
__private
const
int
out_channel,
__private
const
int
in_channel,
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__write_only
image2d_t
output,
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1
)
{
#
else
__write_only
image2d_t
output
)
{
#
endif
int
w
=
get_global_id
(
0
)
;
int
h
=
get_global_id
(
1
)
;
...
...
@@ -58,19 +56,17 @@ __kernel void filter_buffer_to_image(__global const DATA_TYPE *input, /* h, w, o
WRITE_IMAGET(output, coord, values);
}
__kernel void filter_image_to_buffer(__global DATA_TYPE *output, /* h, w, oc, ic */
__kernel void filter_image_to_buffer(
#ifndef USE_QUALCOMM_OPENCL_2_0
__private const int global_size_dim0,
__private const int global_size_dim1,
#endif
__global DATA_TYPE *output, /* h, w, oc, ic */
__private const int filter_h,
__private const int filter_w,
__private const int out_channel,
__private const int in_channel,
#ifndef USE_QUALCOMM_OPENCL_2_0
__read_only image2d_t input,
__private const int global_size_dim0,
__private const int global_size_dim1) {
#else
__read_only image2d_t input) {
#endif
int w = get_global_id(0);
int h = get_global_id(1);
...
...
@@ -112,19 +108,17 @@ __kernel void filter_image_to_buffer(__global DATA_TYPE *output, /* h, w, oc, ic
}
}
__kernel void dw_filter_buffer_to_image(__global const DATA_TYPE *input, /* h, w, ic, m */
__kernel void dw_filter_buffer_to_image(
#ifndef USE_QUALCOMM_OPENCL_2_0
__private const int global_size_dim0,
__private const int global_size_dim1,
#endif
__global const DATA_TYPE *input, /* h, w, ic, m */
__private const int input_offset,
__private const int filter_w,
__private const int in_channel,
__private const int multiplier,
#ifndef USE_QUALCOMM_OPENCL_2_0
__write_only image2d_t output,
__private const int global_size_dim0,
__private const int global_size_dim1) { /* ic%4 * kh * kw * m, ic/4 */
#else
__write_only image2d_t output) {
#endif
__write_only image2d_t output) { /* ic%4 * kh * kw * m, ic/4 */
const int w = get_global_id(0);
const int h = get_global_id(1);
...
...
@@ -175,19 +169,17 @@ __kernel void dw_filter_buffer_to_image(__global const DATA_TYPE *input, /* h, w
WRITE_IMAGET(output, coord, values);
}
__kernel void in_out_buffer_to_image(__global const DATA_TYPE *input, /* nhwc */
__kernel void in_out_buffer_to_image(
#ifndef USE_QUALCOMM_OPENCL_2_0
__private const int global_size_dim0,
__private const int global_size_dim1,
#endif
__global const DATA_TYPE *input, /* nhwc */
__private const int input_offset,
__private const int height,
__private const int width,
__private const int channels,
#ifndef USE_QUALCOMM_OPENCL_2_0
__write_only image2d_t output,
__private const int global_size_dim0,
__private const int global_size_dim1) {
#else
__write_only image2d_t output) {
#endif
int w = get_global_id(0);
int h = get_global_id(1);
...
...
@@ -222,18 +214,16 @@ __kernel void in_out_buffer_to_image(__global const DATA_TYPE *input, /* nhwc */
WRITE_IMAGET(output, coord, values);
}
__kernel void in_out_image_to_buffer(__global DATA_TYPE *output, /* nhwc */
__kernel void in_out_image_to_buffer(
#ifndef USE_QUALCOMM_OPENCL_2_0
__private const int global_size_dim0,
__private const int global_size_dim1,
#endif
__global DATA_TYPE *output, /* nhwc */
__private const int height,
__private const int width,
__private const int channels,
#ifndef USE_QUALCOMM_OPENCL_2_0
__read_only image2d_t input,
__private const int global_size_dim0,
__private const int global_size_dim1) {
#else
__read_only image2d_t input) {
#endif
int w = get_global_id(0);
int h = get_global_id(1);
...
...
@@ -267,17 +257,15 @@ __kernel void in_out_image_to_buffer(__global DATA_TYPE *output, /* nhwc */
}
}
__kernel void arg_buffer_to_image(__global const DATA_TYPE *input, /* nhwc */
__private const int input_offset,
__private const int count,
__kernel void arg_buffer_to_image(
#ifndef USE_QUALCOMM_OPENCL_2_0
__write_only image2d_t output,
__private const int global_size_dim0,
__private const int global_size_dim1) {
#else
__write_only image2d_t output) {
__private const int global_size_dim1,
#endif
__global const DATA_TYPE *input, /* nhwc */
__private const int input_offset,
__private const int count,
__write_only image2d_t output) {
int w = get_global_id(0);
int h = get_global_id(1);
...
...
@@ -308,16 +296,14 @@ __kernel void arg_buffer_to_image(__global const DATA_TYPE *input, /* nhwc */
WRITE_IMAGET(output, coord, values);
}
__kernel void arg_image_to_buffer(__global DATA_TYPE *output, /* nhwc */
__private const int count,
__kernel void arg_image_to_buffer(
#ifndef USE_QUALCOMM_OPENCL_2_0
__read_only image2d_t input,
__private const int global_size_dim0,
__private const int global_size_dim1) {
#else
__read_only image2d_t input) {
__private const int global_size_dim1,
#endif
__global DATA_TYPE *output, /* nhwc */
__private const int count,
__read_only image2d_t input) {
int w = get_global_id(0);
int h = get_global_id(1);
...
...
@@ -347,19 +333,17 @@ __kernel void arg_image_to_buffer(__global DATA_TYPE *output, /* nhwc */
}
__kernel void in_out_height_buffer_to_image(__global const DATA_TYPE *input, //nhwc
__kernel void in_out_height_buffer_to_image(
#ifndef USE_QUALCOMM_OPENCL_2_0
__private const int global_size_dim0,
__private const int global_size_dim1,
#endif
__global const DATA_TYPE *input, //nhwc
__private const int input_offset,
__private const int height,
__private const int width,
__private const int channels,
#ifndef USE_QUALCOMM_OPENCL_2_0
__write_only image2d_t output,
__private const int global_size_dim0,
__private const int global_size_dim1) {
#else
__write_only image2d_t output) {
#endif
int w = get_global_id(0);
int h = get_global_id(1);
...
...
@@ -395,18 +379,16 @@ __kernel void in_out_height_buffer_to_image(__global const DATA_TYPE *input, //n
WRITE_IMAGET(output, coord, values);
}
__kernel void in_out_height_image_to_buffer(__global DATA_TYPE *output, //nhwc
__kernel void in_out_height_image_to_buffer(
#ifndef USE_QUALCOMM_OPENCL_2_0
__private const int global_size_dim0,
__private const int global_size_dim1,
#endif
__global DATA_TYPE *output, //nhwc
__private const int height,
__private const int width,
__private const int channels,
#ifndef USE_QUALCOMM_OPENCL_2_0
__read_only image2d_t input,
__private const int global_size_dim0,
__private const int global_size_dim1) {
#else
__read_only image2d_t input) {
#endif
int w = get_global_id(0);
int h = get_global_id(1);
...
...
@@ -439,19 +421,17 @@ __kernel void in_out_height_image_to_buffer(__global DATA_TYPE *output, //nhwc
}
__kernel void in_out_width_buffer_to_image(__global const DATA_TYPE *input, /* nhwc */
__kernel void in_out_width_buffer_to_image(
#ifndef USE_QUALCOMM_OPENCL_2_0
__private const int global_size_dim0,
__private const int global_size_dim1,
#endif
__global const DATA_TYPE *input, /* nhwc */
__private const int input_offset,
__private const int height,
__private const int width,
__private const int channels,
#ifndef USE_QUALCOMM_OPENCL_2_0
__write_only image2d_t output,
__private const int global_size_dim0,
__private const int global_size_dim1) {
#else
__write_only image2d_t output) {
#endif
int w = get_global_id(0);
int h = get_global_id(1);
...
...
@@ -487,19 +467,17 @@ __kernel void in_out_width_buffer_to_image(__global const DATA_TYPE *input, /* n
}
// only support 3x3 now
__kernel void winograd_filter_buffer_to_image(__global const DATA_TYPE *input, //Oc, Ic, H, W
__kernel void winograd_filter_buffer_to_image(
#ifndef USE_QUALCOMM_OPENCL_2_0
__private const int global_size_dim0,
__private const int global_size_dim1,
#endif
__global const DATA_TYPE *input, //Oc, Ic, H, W
__private const int input_offset,
__private const int in_channels,
__private const int height,
__private const int width,
#ifndef USE_QUALCOMM_OPENCL_2_0
__write_only image2d_t output,
__private const int global_size_dim0,
__private const int global_size_dim1) {
#else
__write_only image2d_t output) {
#endif
int w = get_global_id(0);
int h = get_global_id(1);
...
...
@@ -584,18 +562,16 @@ __kernel void winograd_filter_buffer_to_image(__global const DATA_TYPE *input, /
}
// only support 3x3 now
__kernel void winograd_filter_image_to_buffer(__global DATA_TYPE *output, //Oc, Ic, H, W
__kernel void winograd_filter_image_to_buffer(
#ifndef USE_QUALCOMM_OPENCL_2_0
__private const int global_size_dim0,
__private const int global_size_dim1,
#endif
__global DATA_TYPE *output, //Oc, Ic, H, W
__private const int height,
__private const int width,
__private const int channel,
#ifndef USE_QUALCOMM_OPENCL_2_0
__read_only image2d_t input,
__private const int global_size_dim0,
__private const int global_size_dim1) {
#else
__read_only image2d_t input) {
#endif
const int w = get_global_id(0);
const int h = get_global_id(1);
...
...
mace/kernels/opencl/cl/channel_shuffle.cl
浏览文件 @
eefc9d01
#
include
<common.h>
//
assume
channes_per_group
mod
4
=
0
&&
groups
mod
4
==
0
__kernel
void
channel_shuffle
(
__read_only
image2d_t
input,
__private
const
int
groups,
__private
const
int
channels_per_group,
__kernel
void
channel_shuffle
(
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__write_only
image2d_t
output,
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2
)
{
#
else
__write_only
image2d_t
output
)
{
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2,
#
endif
__read_only
image2d_t
input,
__private
const
int
groups,
__private
const
int
channels_per_group,
__write_only
image2d_t
output
)
{
const
int
group_chan_blk_idx
=
get_global_id
(
0
)
;
const
int
width_idx
=
get_global_id
(
1
)
;
const
int
hb_idx
=
get_global_id
(
2
)
;
...
...
mace/kernels/opencl/cl/concat.cl
浏览文件 @
eefc9d01
...
...
@@ -22,18 +22,16 @@ DATA_TYPE4 stitch_vector(DATA_TYPE4 left,
}
//
Supported
data
type:
half/float
__kernel
void
concat_channel
(
__read_only
image2d_t
input0,
__read_only
image2d_t
input1,
__private
const
int
input0_chan,
__kernel
void
concat_channel
(
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__write_only
image2d_t
output,
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2
)
{
#
else
__write_only
image2d_t
output
)
{
__private
const
int
global_size_dim2,
#
endif
__read_only
image2d_t
input0,
__read_only
image2d_t
input1,
__private
const
int
input0_chan,
__write_only
image2d_t
output
)
{
const
int
chan_blk_idx
=
get_global_id
(
0
)
;
const
int
width_idx
=
get_global_id
(
1
)
;
const
int
hb_idx
=
get_global_id
(
2
)
;
...
...
@@ -90,17 +88,15 @@ __kernel void concat_channel(__read_only image2d_t input0,
}
// Required: All input channels are divisible by 4
__kernel void concat_channel_multi(__read_only image2d_t input,
__private const int chan_blk_offset,
__kernel void concat_channel_multi(
#ifndef USE_QUALCOMM_OPENCL_2_0
__write_only image2d_t output,
__private const int global_size_dim0,
__private const int global_size_dim1,
__private const int global_size_dim2) {
#else
__write_only image2d_t output) {
__private const int global_size_dim2,
#endif
__read_only image2d_t input,
__private const int chan_blk_offset,
__write_only image2d_t output) {
const int chan_blk_idx = get_global_id(0);
const int width_idx = get_global_id(1);
const int hb_idx = get_global_id(2);
...
...
mace/kernels/opencl/cl/conv_2d.cl
浏览文件 @
eefc9d01
#
include
<common.h>
__kernel
void
conv_2d
(
__read_only
image2d_t
input,
/*
[c%4
*
w
*
c/4,
h
*
b]
*/
__kernel
void
conv_2d
(
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2,
#
endif
__read_only
image2d_t
input,
/*
[c%4
*
w
*
c/4,
h
*
b]
*/
__read_only
image2d_t
filter,
/*
cout%4
*
cin,
kh
*
kw
*
cout/4
*/
#
ifdef
BIAS
__read_only
image2d_t
bias,
/*
cout%4
*
cout/4
*/
...
...
@@ -18,15 +24,7 @@ __kernel void conv_2d(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] */
__private
const
int
padding_top,
__private
const
int
padding_left,
__private
const
int
dilation_h,
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__private
const
int
dilation_w,
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2
)
{
#
else
__private
const
int
dilation_w
)
{
#
endif
const
int
out_ch_blk
=
get_global_id
(
0
)
;
const
int
out_w_blk
=
get_global_id
(
1
)
;
const
int
out_hb
=
get_global_id
(
2
)
;
...
...
mace/kernels/opencl/cl/conv_2d_1x1.cl
浏览文件 @
eefc9d01
#
include
<common.h>
__kernel
void
conv_2d_1x1
(
__read_only
image2d_t
input,
/*
[c%4
*
w
*
c/4,
h
*
b]
*/
__kernel
void
conv_2d_1x1
(
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2,
#
endif
__read_only
image2d_t
input,
/*
[c%4
*
w
*
c/4,
h
*
b]
*/
__read_only
image2d_t
filter,
/*
cout%4
*
cin,
cout/4
*/
#
ifdef
BIAS
__read_only
image2d_t
bias,
/*
cout%4
*
cout/4
*/
...
...
@@ -12,15 +18,7 @@ __kernel void conv_2d_1x1(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
__private
const
int
in_ch_blks,
__private
const
int
height,
__private
const
int
width,
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__private
const
int
stride,
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2
)
{
#
else
__private
const
int
stride
)
{
#
endif
const
int
out_ch_blk
=
get_global_id
(
0
)
;
const
int
out_w_blk
=
get_global_id
(
1
)
;
const
int
out_hb
=
get_global_id
(
2
)
;
...
...
mace/kernels/opencl/cl/conv_2d_3x3.cl
浏览文件 @
eefc9d01
#
include
<common.h>
__kernel
void
conv_2d_3x3
(
__read_only
image2d_t
input,
/*
[c%4
*
w
*
c/4,
h
*
b]
*/
__kernel
void
conv_2d_3x3
(
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2,
#
endif
__read_only
image2d_t
input,
/*
[c%4
*
w
*
c/4,
h
*
b]
*/
__read_only
image2d_t
filter,
/*
cout%4
*
cin
,
kh
*
kw
*
cout/4
*/
#
ifdef
BIAS
__read_only
image2d_t
bias,
/*
cout%4
*
cout/4
*/
...
...
@@ -16,15 +22,7 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
__private
const
int
padding_top,
__private
const
int
padding_left,
__private
const
int
dilation_h,
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__private
const
int
dilation_w,
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2
)
{
#
else
__private
const
int
dilation_w
)
{
#
endif
const
int
out_ch_blk
=
get_global_id
(
0
)
;
const
int
out_w_blk
=
get_global_id
(
1
)
;
const
int
out_hb
=
get_global_id
(
2
)
;
...
...
mace/kernels/opencl/cl/depth_to_space.cl
浏览文件 @
eefc9d01
#
include
<common.h>
__kernel
void
depth_to_space
(
__read_only
image2d_t
input,
__private
const
int
block_size,
__private
const
int
output_depth,
__kernel
void
depth_to_space
(
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__write_only
image2d_t
output,
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2
)
{
#
else
__write_only
image2d_t
output
)
{
__private
const
int
global_size_dim2,
#
endif
__read_only
image2d_t
input,
__private
const
int
block_size,
__private
const
int
output_depth,
__write_only
image2d_t
output
)
{
const
int
out_d
=
get_global_id
(
0
)
;
const
int
out_w
=
get_global_id
(
1
)
;
const
int
out_h
=
get_global_id
(
2
)
;
...
...
@@ -44,17 +42,16 @@ __kernel void depth_to_space(__read_only image2d_t input,
WRITE_IMAGET(output, (int2)(out_pos, out_h), in_data);
}
__kernel void space_to_depth(__read_only image2d_t input,
__private const int block_size,
__private const int input_depth,
__kernel void space_to_depth(
#ifndef USE_QUALCOMM_OPENCL_2_0
__write_only image2d_t output,
__private const int global_size_dim0,
__private const int global_size_dim1,
__private const int global_size_dim2) {
#else
__write_only image2d_t output) {
__private const int global_size_dim2,
#endif
__read_only image2d_t input,
__private const int block_size,
__private const int input_depth,
__write_only image2d_t output) {
const int d = get_global_id(0);
const int w = get_global_id(1);
...
...
mace/kernels/opencl/cl/depthwise_conv2d.cl
浏览文件 @
eefc9d01
#
include
<common.h>
//
Only
multiplier
=
1
is
supported
__kernel
void
depthwise_conv2d
(
__read_only
image2d_t
input,
/*
[c%4
*
w
*
c/4,
h
*
b]
*/
__kernel
void
depthwise_conv2d
(
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2,
#
endif
__read_only
image2d_t
input,
/*
[c%4
*
w
*
c/4,
h
*
b]
*/
__read_only
image2d_t
filter,
/*
cout%4
*
kh
*
kw
*
m,
cin/4
*/
#
ifdef
BIAS
__read_only
image2d_t
bias,
/*
cout%4
*
cout/4
*/
...
...
@@ -18,15 +24,7 @@ __kernel void depthwise_conv2d(__read_only image2d_t input, /* [c%4 * w * c/4, h
__private
const
short
padding_top,
__private
const
short
padding_left,
__private
const
short
dilation_h,
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__private
const
short
dilation_w,
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2
)
{
#
else
__private
const
short
dilation_w
)
{
#
endif
const
short
out_ch_blk
=
get_global_id
(
0
)
;
const
short
out_w_blk
=
get_global_id
(
1
)
;
const
short
out_hb
=
get_global_id
(
2
)
;
...
...
@@ -144,7 +142,13 @@ __kernel void depthwise_conv2d(__read_only image2d_t input, /* [c%4 * w * c/4, h
WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out3);
}
__kernel void depthwise_conv2d_s1(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] */
__kernel void depthwise_conv2d_s1(
#ifndef USE_QUALCOMM_OPENCL_2_0
__private const int global_size_dim0,
__private const int global_size_dim1,
__private const int global_size_dim2,
#endif
__read_only image2d_t input, /* [c%4 * w * c/4, h * b] */
__read_only image2d_t filter, /* cout%4 * kh * kw * m, cin/4 */
#ifdef BIAS
__read_only image2d_t bias, /* cout%4 * cout/4 */
...
...
@@ -159,15 +163,7 @@ __kernel void depthwise_conv2d_s1(__read_only image2d_t input, /* [c%4 * w * c/4
__private const short filter_height,
__private const short filter_width,
__private const short padding_top,
#ifndef USE_QUALCOMM_OPENCL_2_0
__private const short padding_left,
__private const int global_size_dim0,
__private const int global_size_dim1,
__private const int global_size_dim2) {
#else
__private const short padding_left) {
#endif
const short out_ch_blk = get_global_id(0);
const short out_w_blk = get_global_id(1) << 2;
const short out_hb = get_global_id(2);
...
...
mace/kernels/opencl/cl/eltwise.cl
浏览文件 @
eefc9d01
#
include
<common.h>
__kernel
void
eltwise
(
__read_only
image2d_t
input0,
/*
[c%4
*
w
*
c/4,
h
*
b]
*/
__kernel
void
eltwise
(
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
#
endif
__read_only
image2d_t
input0,
/*
[c%4
*
w
*
c/4,
h
*
b]
*/
__read_only
image2d_t
input1,
#
ifdef
COEFF_SUM
__private
const
float
coeff0,
__private
const
float
coeff1,
#
endif
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__write_only
image2d_t
output,
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1
)
{
#
else
__write_only
image2d_t
output
)
{
#
endif
const
int
w
=
get_global_id
(
0
)
;
const
int
hb
=
get_global_id
(
1
)
;
...
...
mace/kernels/opencl/cl/matmul.cl
浏览文件 @
eefc9d01
#
include
<common.h>
//
C
=
A
*
B
__kernel
void
matmul
(
__read_only
image2d_t
A,
__kernel
void
matmul
(
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
#
endif
__read_only
image2d_t
A,
__read_only
image2d_t
B,
__write_only
image2d_t
C,
__private
const
int
M,
__private
const
int
N,
__private
const
int
K,
__private
const
int
height_blocks,
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__private
const
int
k_blocks,
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1
)
{
#
else
__private
const
int
k_blocks
)
{
#
endif
const
int
gx
=
get_global_id
(
0
)
<<
2
;
const
int
hb
=
get_global_id
(
1
)
;
...
...
mace/kernels/opencl/cl/pooling.cl
浏览文件 @
eefc9d01
...
...
@@ -19,7 +19,13 @@ inline int calculate_avg_block_size(const int pool_size,
}
//
Supported
data
type:
half/float
__kernel
void
pooling
(
__read_only
image2d_t
input,
__kernel
void
pooling
(
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2,
#
endif
__read_only
image2d_t
input,
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
int
out_height,
...
...
@@ -27,14 +33,7 @@ __kernel void pooling(__read_only image2d_t input,
__private
const
int
pad_left,
__private
const
int
stride,
__private
const
int
pooling_size,
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__write_only
image2d_t
output,
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2
)
{
#
else
__write_only
image2d_t
output
)
{
#
endif
const
int
out_chan_idx
=
get_global_id
(
0
)
;
const
int
out_width_idx
=
get_global_id
(
1
)
;
...
...
mace/kernels/opencl/cl/resize_bilinear.cl
浏览文件 @
eefc9d01
#
include
<common.h>
__kernel
void
resize_bilinear_nocache
(
__read_only
image2d_t
input,
/*
[c%4
*
w
*
c/4,
h
*
b]
*/
__kernel
void
resize_bilinear_nocache
(
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2,
#
endif
__read_only
image2d_t
input,
/*
[c%4
*
w
*
c/4,
h
*
b]
*/
__write_only
image2d_t
output,
__private
const
float
height_scale,
__private
const
float
width_scale,
__private
const
int
in_height,
__private
const
int
in_width,
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__private
const
int
out_height,
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2
)
{
#
else
__private
const
int
out_height
)
{
#
endif
const
int
ch_blk
=
get_global_id
(
0
)
;
const
int
w
=
get_global_id
(
1
)
;
...
...
mace/kernels/opencl/cl/slice.cl
浏览文件 @
eefc9d01
#
include
<common.h>
__kernel
void
slice
(
__read_only
image2d_t
input,
__private
const
int
chan_blk_offset,
__kernel
void
slice
(
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__write_only
image2d_t
output,
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2
)
{
#
else
__write_only
image2d_t
output
)
{
__private
const
int
global_size_dim2,
#
endif
__read_only
image2d_t
input,
__private
const
int
chan_blk_offset,
__write_only
image2d_t
output
)
{
const
int
chan_blk_idx
=
get_global_id
(
0
)
;
const
int
width_idx
=
get_global_id
(
1
)
;
const
int
hb_idx
=
get_global_id
(
2
)
;
...
...
mace/kernels/opencl/cl/softmax.cl
浏览文件 @
eefc9d01
#
include
<common.h>
__kernel
void
softmax
(
__read_only
image2d_t
input,
__private
const
int
channels,
__private
const
int
remain_channels,
__kernel
void
softmax
(
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__write_only
image2d_t
output,
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2
)
{
#
else
__write_only
image2d_t
output
)
{
__private
const
int
global_size_dim2,
#
endif
__read_only
image2d_t
input,
__private
const
int
channels,
__private
const
int
remain_channels,
__write_only
image2d_t
output
)
{
const
int
chan_blk_idx
=
get_global_id
(
0
)
;
const
int
width_idx
=
get_global_id
(
1
)
;
const
int
hb_idx
=
get_global_id
(
2
)
;
...
...
mace/kernels/opencl/cl/space_to_batch.cl
浏览文件 @
eefc9d01
#
include
<common.h>
__kernel
void
space_to_batch
(
__read_only
image2d_t
space_data,
__kernel
void
space_to_batch
(
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2,
#
endif
__read_only
image2d_t
space_data,
__write_only
image2d_t
batch_data,
__private
const
int
block_height,
__private
const
int
block_width,
...
...
@@ -9,15 +15,7 @@ __kernel void space_to_batch(__read_only image2d_t space_data,
__private
const
int
space_height,
__private
const
int
space_width,
__private
const
int
batch_height,
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__private
const
int
batch_width,
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2
)
{
#
else
__private
const
int
batch_width
)
{
#
endif
const
int
chan_idx
=
get_global_id
(
0
)
;
const
int
batch_w_idx
=
get_global_id
(
1
)
;
const
int
batch_hb_idx
=
get_global_id
(
2
)
;
...
...
@@ -54,7 +52,13 @@ __kernel void space_to_batch(__read_only image2d_t space_data,
WRITE_IMAGET(batch_data, batch_coord, value);
}
__kernel void batch_to_space(__read_only image2d_t batch_data,
__kernel void batch_to_space(
#ifndef USE_QUALCOMM_OPENCL_2_0
__private const int global_size_dim0,
__private const int global_size_dim1,
__private const int global_size_dim2,
#endif
__read_only image2d_t batch_data,
__write_only image2d_t space_data,
__private const int block_height,
__private const int block_width,
...
...
@@ -63,15 +67,7 @@ __kernel void batch_to_space(__read_only image2d_t batch_data,
__private const int space_height,
__private const int space_width,
__private const int batch_height,
#ifndef USE_QUALCOMM_OPENCL_2_0
__private const int batch_width,
__private const int global_size_dim0,
__private const int global_size_dim1,
__private const int global_size_dim2) {
#else
__private const int batch_width) {
#endif
const int chan_idx = get_global_id(0);
const int batch_w_idx = get_global_id(1);
const int batch_hb_idx = get_global_id(2);
...
...
mace/kernels/opencl/cl/winograd_transform.cl
浏览文件 @
eefc9d01
#
include
<common.h>
__kernel
void
winograd_transform_2x2
(
__read_only
image2d_t
input,
__kernel
void
winograd_transform_2x2
(
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
#
endif
__read_only
image2d_t
input,
__write_only
image2d_t
output,
__private
const
int
in_height,
__private
const
int
in_width,
...
...
@@ -8,14 +13,7 @@ __kernel void winograd_transform_2x2(__read_only image2d_t input,
__private
const
int
round_hw,
__private
const
int
round_w,
__private
const
int
padding_top,
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__private
const
int
padding_left,
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1
)
{
#
else
__private
const
int
padding_left
)
{
#
endif
int
out_width_idx
=
get_global_id
(
0
)
;
int
chan_blk_idx
=
get_global_id
(
1
)
;
...
...
@@ -121,7 +119,12 @@ __kernel void winograd_transform_2x2(__read_only image2d_t input,
}
}
__kernel void winograd_inverse_transform_2x2(__read_only image2d_t input,
__kernel void winograd_inverse_transform_2x2(
#ifndef USE_QUALCOMM_OPENCL_2_0
__private const int global_size_dim0,
__private const int global_size_dim1,
#endif
__read_only image2d_t input,
#ifdef BIAS
__read_only image2d_t bias, /* cout%4 * cout/4 */
#endif
...
...
@@ -130,14 +133,7 @@ __kernel void winograd_inverse_transform_2x2(__read_only image2d_t input,
__private const int out_width,
__private const int round_hw,
__private const int round_w,
#ifndef USE_QUALCOMM_OPENCL_2_0
__private const float relux_max_limit,
__private const int global_size_dim0,
__private const int global_size_dim1) {
#else
__private const float relux_max_limit) {
#endif
const int width_idx = get_global_id(0);
const int height_idx = get_global_id(1);
...
...
mace/kernels/opencl/concat.cc
浏览文件 @
eefc9d01
...
...
@@ -56,6 +56,11 @@ static void Concat2(cl::Kernel *kernel,
}
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input0
->
shape
()))
{
uint32_t
idx
=
0
;
if
(
!
(
*
is_non_uniform_work_groups_supported
))
{
kernel
->
setArg
(
idx
++
,
gws
[
0
]);
kernel
->
setArg
(
idx
++
,
gws
[
1
]);
kernel
->
setArg
(
idx
++
,
gws
[
2
]);
}
kernel
->
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
input0
->
opencl_image
())));
kernel
->
setArg
(
idx
++
,
...
...
@@ -63,9 +68,6 @@ static void Concat2(cl::Kernel *kernel,
kernel
->
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input0
->
dim
(
3
)));
kernel
->
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
output
->
opencl_image
())));
kernel
->
setArg
(
idx
++
,
gws
[
0
]);
kernel
->
setArg
(
idx
++
,
gws
[
1
]);
kernel
->
setArg
(
idx
++
,
gws
[
2
]);
*
prev_input_shape
=
input0
->
shape
();
...
...
@@ -119,12 +121,14 @@ static void ConcatN(cl::Kernel *kernel,
};
uint32_t
idx
=
0
;
if
(
!
(
*
is_non_uniform_work_groups_supported
))
{
kernel
->
setArg
(
idx
++
,
gws
[
0
]);
kernel
->
setArg
(
idx
++
,
gws
[
1
]);
kernel
->
setArg
(
idx
++
,
gws
[
2
]);
}
kernel
->
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
chan_blk_offset
));
kernel
->
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
gws
[
0
]);
kernel
->
setArg
(
idx
++
,
gws
[
1
]);
kernel
->
setArg
(
idx
++
,
gws
[
2
]);
chan_blk_offset
+=
input_channel_blk
;
*
kwg_size
=
...
...
mace/kernels/opencl/conv_2d_opencl_1x1.cc
浏览文件 @
eefc9d01
...
...
@@ -84,6 +84,11 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
if
(
!
(
*
is_non_uniform_work_groups_supported
))
{
kernel
->
setArg
(
idx
++
,
gws
[
0
]);
kernel
->
setArg
(
idx
++
,
gws
[
1
]);
kernel
->
setArg
(
idx
++
,
gws
[
2
]);
}
kernel
->
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
filter
->
opencl_image
()));
if
(
bias
!=
nullptr
)
{
...
...
@@ -98,9 +103,6 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
height
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
width
));
kernel
->
setArg
(
idx
++
,
stride
);
kernel
->
setArg
(
idx
++
,
gws
[
0
]);
kernel
->
setArg
(
idx
++
,
gws
[
1
]);
kernel
->
setArg
(
idx
++
,
gws
[
2
]);
*
prev_input_shape
=
input
->
shape
();
...
...
mace/kernels/opencl/conv_2d_opencl_3x3.cc
浏览文件 @
eefc9d01
...
...
@@ -79,6 +79,11 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel,
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
if
(
!
(
*
is_non_uniform_work_groups_supported
))
{
kernel
->
setArg
(
idx
++
,
gws
[
0
]);
kernel
->
setArg
(
idx
++
,
gws
[
1
]);
kernel
->
setArg
(
idx
++
,
gws
[
2
]);
}
kernel
->
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
filter
->
opencl_image
()));
if
(
bias
!=
nullptr
)
{
...
...
@@ -96,9 +101,6 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel,
kernel
->
setArg
(
idx
++
,
padding
[
1
]
/
2
);
kernel
->
setArg
(
idx
++
,
dilations
[
0
]);
kernel
->
setArg
(
idx
++
,
dilations
[
1
]);
kernel
->
setArg
(
idx
++
,
gws
[
0
]);
kernel
->
setArg
(
idx
++
,
gws
[
1
]);
kernel
->
setArg
(
idx
++
,
gws
[
2
]);
*
prev_input_shape
=
input
->
shape
();
...
...
mace/kernels/opencl/conv_2d_opencl_general.cc
浏览文件 @
eefc9d01
...
...
@@ -79,6 +79,11 @@ extern void Conv2dOpencl(cl::Kernel *kernel,
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
if
(
!
(
*
is_non_uniform_work_groups_supported
))
{
kernel
->
setArg
(
idx
++
,
gws
[
0
]);
kernel
->
setArg
(
idx
++
,
gws
[
1
]);
kernel
->
setArg
(
idx
++
,
gws
[
2
]);
}
kernel
->
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
filter
->
opencl_image
()));
if
(
bias
!=
nullptr
)
{
...
...
@@ -98,9 +103,6 @@ extern void Conv2dOpencl(cl::Kernel *kernel,
kernel
->
setArg
(
idx
++
,
padding
[
1
]
/
2
);
kernel
->
setArg
(
idx
++
,
dilations
[
0
]);
kernel
->
setArg
(
idx
++
,
dilations
[
1
]);
kernel
->
setArg
(
idx
++
,
gws
[
0
]);
kernel
->
setArg
(
idx
++
,
gws
[
1
]);
kernel
->
setArg
(
idx
++
,
gws
[
2
]);
*
prev_input_shape
=
input
->
shape
();
...
...
mace/kernels/opencl/depth_to_space_opencl.cc
浏览文件 @
eefc9d01
...
...
@@ -68,12 +68,6 @@ void DepthToSpaceOpFunctor<DeviceType::OPENCL, T>::operator()(
uint32_t
gws
[
3
];
std
::
stringstream
ss
;
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
block_size_
);
kernel_
.
setArg
(
idx
++
,
depth_blocks
);
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
if
(
d2s_
)
{
gws
[
0
]
=
static_cast
<
uint32_t
>
(
depth_blocks
);
gws
[
1
]
=
static_cast
<
uint32_t
>
(
output_width
);
...
...
@@ -88,9 +82,16 @@ void DepthToSpaceOpFunctor<DeviceType::OPENCL, T>::operator()(
<<
input
->
dim
(
1
)
<<
"_"
<<
input
->
dim
(
2
)
<<
"_"
<<
input
->
dim
(
3
);
}
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
kernel_
.
setArg
(
idx
++
,
gws
[
2
]);
uint32_t
idx
=
0
;
if
(
!
is_non_uniform_work_groups_supported_
)
{
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
kernel_
.
setArg
(
idx
++
,
gws
[
2
]);
}
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
block_size_
);
kernel_
.
setArg
(
idx
++
,
depth_blocks
);
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
input_shape_
=
input
->
shape
();
...
...
mace/kernels/opencl/depthwise_conv_opencl.cc
浏览文件 @
eefc9d01
...
...
@@ -97,6 +97,11 @@ void DepthwiseConv2d(cl::Kernel *kernel,
input_channels
);
uint32_t
idx
=
0
;
if
(
!
(
*
is_non_uniform_work_groups_supported
))
{
kernel
->
setArg
(
idx
++
,
gws
[
0
]);
kernel
->
setArg
(
idx
++
,
gws
[
1
]);
kernel
->
setArg
(
idx
++
,
gws
[
2
]);
}
kernel
->
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
filter
->
opencl_image
()));
if
(
bias
!=
nullptr
)
{
...
...
@@ -117,9 +122,6 @@ void DepthwiseConv2d(cl::Kernel *kernel,
kernel
->
setArg
(
idx
++
,
static_cast
<
int16_t
>
(
dilations
[
0
]));
kernel
->
setArg
(
idx
++
,
static_cast
<
int16_t
>
(
dilations
[
1
]));
}
kernel
->
setArg
(
idx
++
,
gws
[
0
]);
kernel
->
setArg
(
idx
++
,
gws
[
1
]);
kernel
->
setArg
(
idx
++
,
gws
[
2
]);
*
prev_input_shape
=
input
->
shape
();
...
...
mace/kernels/opencl/eltwise_opencl.cc
浏览文件 @
eefc9d01
...
...
@@ -47,6 +47,10 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
}
if
(
!
IsVecEqual
(
input_shape_
,
input0
->
shape
()))
{
uint32_t
idx
=
0
;
if
(
!
is_non_uniform_work_groups_supported_
)
{
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
}
kernel_
.
setArg
(
idx
++
,
*
(
input0
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
input1
->
opencl_image
()));
if
(
!
coeff_
.
empty
())
{
...
...
@@ -54,8 +58,6 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
kernel_
.
setArg
(
idx
++
,
coeff_
[
1
]);
}
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
input_shape_
=
input0
->
shape
();
...
...
mace/kernels/opencl/matmul.cc
浏览文件 @
eefc9d01
...
...
@@ -48,6 +48,10 @@ void MatMulFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *A,
kernel_
=
runtime
->
BuildKernel
(
"matmul"
,
kernel_name
,
built_options
);
}
uint32_t
idx
=
0
;
if
(
!
is_non_uniform_work_groups_supported_
)
{
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
}
kernel_
.
setArg
(
idx
++
,
*
(
A
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
B
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
C
->
opencl_image
()));
...
...
@@ -56,8 +60,6 @@ void MatMulFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *A,
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
A
->
dim
(
2
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
height_blocks
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
RoundUpDiv4
(
A
->
dim
(
2
))));
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
kwg_size_
=
static_cast
<
uint32_t
>
(
runtime
->
GetKernelMaxWorkGroupSize
(
kernel_
));
...
...
mace/kernels/opencl/pooling_opencl.cc
浏览文件 @
eefc9d01
...
...
@@ -81,6 +81,11 @@ void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
};
uint32_t
idx
=
0
;
if
(
!
is_non_uniform_work_groups_supported_
)
{
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
kernel_
.
setArg
(
idx
++
,
gws
[
2
]);
}
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input
->
dim
(
1
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input
->
dim
(
2
)));
...
...
@@ -90,14 +95,23 @@ void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
kernel_
.
setArg
(
idx
++
,
strides_
[
0
]);
kernel_
.
setArg
(
idx
++
,
kernels_
[
0
]);
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
kernel_
.
setArg
(
idx
++
,
gws
[
2
]);
input_shape_
=
input
->
shape
();
kwg_size_
=
static_cast
<
uint32_t
>
(
runtime
->
GetKernelMaxWorkGroupSize
(
kernel_
));
}
else
{
index_t
batch
=
output
->
dim
(
0
);
index_t
out_height
=
output
->
dim
(
1
);
index_t
out_width
=
output
->
dim
(
2
);
index_t
channels
=
output
->
dim
(
3
);
index_t
channel_blocks
=
(
channels
+
3
)
/
4
;
gws
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
static_cast
<
uint32_t
>
(
out_width
),
static_cast
<
uint32_t
>
(
batch
*
out_height
),
};
}
std
::
vector
<
uint32_t
>
lws
=
{
8
,
kwg_size_
/
64
,
8
,
1
};
...
...
mace/kernels/opencl/resize_bilinear_opencl.cc
浏览文件 @
eefc9d01
...
...
@@ -60,6 +60,11 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
CalculateResizeScale
(
in_width
,
out_width
,
align_corners_
);
uint32_t
idx
=
0
;
if
(
!
is_non_uniform_work_groups_supported_
)
{
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
kernel_
.
setArg
(
idx
++
,
gws
[
2
]);
}
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
height_scale
);
...
...
@@ -67,9 +72,6 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
in_height
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
in_width
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
out_height
));
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
kernel_
.
setArg
(
idx
++
,
gws
[
2
]);
input_shape_
=
input
->
shape
();
...
...
mace/kernels/opencl/slice.cc
浏览文件 @
eefc9d01
...
...
@@ -65,12 +65,14 @@ void SliceFunctor<DeviceType::OPENCL, T>::operator()(
<<
outputs_count
;
for
(
int
i
=
0
;
i
<
outputs_count
;
++
i
)
{
uint32_t
idx
=
0
;
if
(
!
is_non_uniform_work_groups_supported_
)
{
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
kernel_
.
setArg
(
idx
++
,
gws
[
2
]);
}
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
channel_blk
*
i
));
kernel_
.
setArg
(
idx
++
,
*
(
output_list
[
i
]
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
kernel_
.
setArg
(
idx
++
,
gws
[
2
]);
TuningOrRun3DKernel
(
kernel_
,
ss
.
str
(),
gws
,
lws
,
future
);
}
...
...
mace/kernels/opencl/softmax_opencl.cc
浏览文件 @
eefc9d01
...
...
@@ -45,13 +45,15 @@ void SoftmaxFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *logits,
}
if
(
!
IsVecEqual
(
input_shape_
,
logits
->
shape
()))
{
uint32_t
idx
=
0
;
if
(
!
is_non_uniform_work_groups_supported_
)
{
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
kernel_
.
setArg
(
idx
++
,
gws
[
2
]);
}
kernel_
.
setArg
(
idx
++
,
*
(
logits
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
channels
));
kernel_
.
setArg
(
idx
++
,
remain_channels
);
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
kernel_
.
setArg
(
idx
++
,
gws
[
2
]);
input_shape_
=
logits
->
shape
();
...
...
mace/kernels/opencl/space_to_batch_opencl.cc
浏览文件 @
eefc9d01
...
...
@@ -57,6 +57,11 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, T>::operator()(
}
if
(
!
IsVecEqual
(
space_shape_
,
space_tensor
->
shape
()))
{
uint32_t
idx
=
0
;
if
(
!
is_non_uniform_work_groups_supported_
)
{
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
kernel_
.
setArg
(
idx
++
,
gws
[
2
]);
}
if
(
b2s_
)
{
kernel_
.
setArg
(
idx
++
,
*
(
batch_tensor
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
space_tensor
->
opencl_image
()));
...
...
@@ -72,9 +77,6 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, T>::operator()(
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
space_tensor
->
dim
(
2
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
batch_tensor
->
dim
(
1
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
batch_tensor
->
dim
(
2
)));
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
kernel_
.
setArg
(
idx
++
,
gws
[
2
]);
space_shape_
=
space_tensor
->
shape
();
...
...
mace/kernels/opencl/winograd_transform.cc
浏览文件 @
eefc9d01
...
...
@@ -61,6 +61,10 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(
output_tensor
->
ResizeImage
(
output_shape
,
image_shape
);
uint32_t
idx
=
0
;
if
(
!
is_non_uniform_work_groups_supported_
)
{
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
}
kernel_
.
setArg
(
idx
++
,
*
(
input_tensor
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
output_tensor
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
input_tensor
->
dim
(
1
)));
...
...
@@ -70,8 +74,6 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
round_w
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
paddings
[
0
]
/
2
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
paddings
[
1
]
/
2
));
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
input_shape_
=
input_tensor
->
shape
();
...
...
@@ -151,6 +153,10 @@ void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(
const
uint32_t
round_h
=
(
height_
+
1
)
/
2
;
const
uint32_t
round_w
=
(
width_
+
1
)
/
2
;
uint32_t
idx
=
0
;
if
(
!
is_non_uniform_work_groups_supported_
)
{
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
}
kernel_
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
input_tensor
->
opencl_image
())));
...
...
@@ -165,8 +171,6 @@ void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
round_h
*
round_w
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
round_w
));
kernel_
.
setArg
(
idx
++
,
relux_max_limit_
);
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
input_shape_
=
input_tensor
->
shape
();
...
...
tools/mace_tools.py
浏览文件 @
eefc9d01
...
...
@@ -376,7 +376,8 @@ def main(unused_args):
build_run_throughput_test
(
target_soc
,
FLAGS
.
run_seconds
,
merged_lib_file
,
FLAGS
.
output_dir
)
packaging_lib_file
(
FLAGS
.
output_dir
)
if
FLAGS
.
mode
==
"build"
or
FLAGS
.
mode
==
"all"
:
packaging_lib_file
(
FLAGS
.
output_dir
)
if
__name__
==
"__main__"
:
...
...
tools/packaging_lib.sh
浏览文件 @
eefc9d01
...
...
@@ -14,8 +14,13 @@ source ${CURRENT_DIR}/env.sh
LIBMACE_BUILD_DIR
=
$1
TAR_PACKAGE_NAME
=
libmace_
${
PROJECT_NAME
}
.tar.gz
pushd
$LIBMACE_BUILD_DIR
/
$PROJECT_NAME
ls
|
grep
-v
build | xargs
tar
cvzf libmace_
${
PROJECT_NAME
}
.tar.gz
if
[
-f
$TAR_PACKAGE_NAME
]
;
then
rm
-f
$TAR_PACKAGE_NAME
fi
ls
|
grep
-v
build | xargs
tar
cvzf
$TAR_PACKAGE_NAME
popd
echo
"Packaging done!"
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录