Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
3a415052
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
3a415052
编写于
4月 24, 2018
作者:
L
Liangliang He
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'cwise-overflow' into 'master'
Fix cwise and filter b2i write overflow bug. See merge request !414
上级
7457a5cb
6054df4f
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
26 addition
and
16 deletion
+26
-16
mace/kernels/opencl/cl/conv_2d.cl
mace/kernels/opencl/cl/conv_2d.cl
+1
-1
mace/kernels/opencl/cl/conv_2d_3x3.cl
mace/kernels/opencl/cl/conv_2d_3x3.cl
+1
-1
mace/kernels/opencl/cl/cwise.cl
mace/kernels/opencl/cl/cwise.cl
+20
-12
mace/kernels/opencl/cwise_opencl.cc
mace/kernels/opencl/cwise_opencl.cc
+2
-0
mace/kernels/opencl/helper.cc
mace/kernels/opencl/helper.cc
+2
-2
未找到文件。
mace/kernels/opencl/cl/conv_2d.cl
浏览文件 @
3a415052
...
@@ -87,7 +87,7 @@ __kernel void conv_2d(KERNEL_ERROR_PARAMS
...
@@ -87,7 +87,7 @@ __kernel void conv_2d(KERNEL_ERROR_PARAMS
#undef READ_INPUT
#undef READ_INPUT
// int filter_idx = (hb_idx * filter_width + width_idx) *
rounded_
in_ch + (in_ch_blk << 2);
// int filter_idx = (hb_idx * filter_width + width_idx) * in_ch + (in_ch_blk << 2);
weights0 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 0, filter_y_idx));
weights0 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 0, filter_y_idx));
weights1 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 1, filter_y_idx));
weights1 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 1, filter_y_idx));
weights2 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 2, filter_y_idx));
weights2 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 2, filter_y_idx));
...
...
mace/kernels/opencl/cl/conv_2d_3x3.cl
浏览文件 @
3a415052
...
@@ -87,7 +87,7 @@ __kernel void conv_2d_3x3(KERNEL_ERROR_PARAMS
...
@@ -87,7 +87,7 @@ __kernel void conv_2d_3x3(KERNEL_ERROR_PARAMS
#undef READ_INPUT
#undef READ_INPUT
// int filter_idx = (hb_idx * 3 + width_idx) *
rounded_
in_ch + (in_ch_blk << 2);
// int filter_idx = (hb_idx * 3 + width_idx) * in_ch + (in_ch_blk << 2);
weights0 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 0, filter_y_idx));
weights0 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 0, filter_y_idx));
weights1 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 1, filter_y_idx));
weights1 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 1, filter_y_idx));
weights2 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 2, filter_y_idx));
weights2 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 2, filter_y_idx));
...
...
mace/kernels/opencl/cl/cwise.cl
浏览文件 @
3a415052
...
@@ -3,6 +3,8 @@
...
@@ -3,6 +3,8 @@
__kernel
void
cwise
(
KERNEL_ERROR_PARAMS
__kernel
void
cwise
(
KERNEL_ERROR_PARAMS
GLOBAL_WORK_GROUP_SIZE_DIM2
GLOBAL_WORK_GROUP_SIZE_DIM2
__read_only
image2d_t
input,
/*
[c%4
*
w
*
c/4,
h
*
b]
*/
__read_only
image2d_t
input,
/*
[c%4
*
w
*
c/4,
h
*
b]
*/
__private
const
int
width,
__private
const
int
channel,
__private
const
float
value,
__private
const
float
value,
__write_only
image2d_t
output
)
{
__write_only
image2d_t
output
)
{
const
int
w
=
get_global_id
(
0
)
;
const
int
w
=
get_global_id
(
0
)
;
...
@@ -12,6 +14,8 @@ __kernel void cwise(KERNEL_ERROR_PARAMS
...
@@ -12,6 +14,8 @@ __kernel void cwise(KERNEL_ERROR_PARAMS
if
(
w
>=
global_size_dim0
|
| hb >= global_size_dim1) return;
if
(
w
>=
global_size_dim0
|
| hb >= global_size_dim1) return;
#endif
#endif
const int remain_chan = channel - mul24((w / width), 4);
DATA_TYPE4 in0 = READ_IMAGET(input, SAMPLER, (int2)(w, hb));
DATA_TYPE4 in0 = READ_IMAGET(input, SAMPLER, (int2)(w, hb));
DATA_TYPE4 in1 = (DATA_TYPE4){value, value, value, value};
DATA_TYPE4 in1 = (DATA_TYPE4){value, value, value, value};
DATA_TYPE4 out;
DATA_TYPE4 out;
...
@@ -21,15 +25,9 @@ __kernel void cwise(KERNEL_ERROR_PARAMS
...
@@ -21,15 +25,9 @@ __kernel void cwise(KERNEL_ERROR_PARAMS
#elif CWISE_TYPE == 1
#elif CWISE_TYPE == 1
out = in0 + in1;
out = in0 + in1;
#elif CWISE_TYPE == 2
#elif CWISE_TYPE == 2
out.x
=
fmax
(
in0.x,
value
)
;
out = fmax(in0, in1);
out.y
=
fmax
(
in0.y,
value
)
;
out.z
=
fmax
(
in0.z,
value
)
;
out.z
=
fmax
(
in0.w,
value
)
;
#elif CWISE_TYPE == 3
#elif CWISE_TYPE == 3
out.x
=
fmin
(
in0.x,
value
)
;
out = fmin(in0, in1);
out.y
=
fmin
(
in0.y,
value
)
;
out.z
=
fmin
(
in0.z,
value
)
;
out.z
=
fmin
(
in0.w,
value
)
;
#elif CWISE_TYPE == 4
#elif CWISE_TYPE == 4
out = in0 - in1;
out = in0 - in1;
#elif CWISE_TYPE == 5
#elif CWISE_TYPE == 5
...
@@ -38,10 +36,20 @@ __kernel void cwise(KERNEL_ERROR_PARAMS
...
@@ -38,10 +36,20 @@ __kernel void cwise(KERNEL_ERROR_PARAMS
in1 = (DATA_TYPE4)(0, 0, 0, 0);
in1 = (DATA_TYPE4)(0, 0, 0, 0);
out = in1 - in0;
out = in1 - in0;
#elif CWISE_TYPE == 7
#elif CWISE_TYPE == 7
out.x
=
fabs
(
in0.x
)
;
out = fabs(in0);
out.y
=
fabs
(
in0.y
)
;
#endif
out.z
=
fabs
(
in0.z
)
;
out.w
=
fabs
(
in0.w
)
;
#if CWISE_TYPE == 1 || CWISE_TYPE == 2 || CWISE_TYPE == 3 |
|
CWISE_TYPE
==
4
if
(
remain_chan
<
4
)
{
switch
(
remain_chan
)
{
case
1:
out.y
=
0
;
case
2:
out.z
=
0
;
case
3:
out.w
=
0
;
}
}
#
endif
#
endif
WRITE_IMAGET
(
output,
(
int2
)(
w,
hb
)
,
out
)
;
WRITE_IMAGET
(
output,
(
int2
)(
w,
hb
)
,
out
)
;
...
...
mace/kernels/opencl/cwise_opencl.cc
浏览文件 @
3a415052
...
@@ -71,6 +71,8 @@ void CWiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
...
@@ -71,6 +71,8 @@ void CWiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
}
}
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
width
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
channels
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
float
>
(
coeff_
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
float
>
(
coeff_
));
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
input_shape_
=
input
->
shape
();
input_shape_
=
input
->
shape
();
...
...
mace/kernels/opencl/helper.cc
浏览文件 @
3a415052
...
@@ -34,12 +34,12 @@ void CalInOutputImageShape(const std::vector<index_t> &shape, /* NHWC */
...
@@ -34,12 +34,12 @@ void CalInOutputImageShape(const std::vector<index_t> &shape, /* NHWC */
(
*
image_shape
)[
1
]
=
shape
[
0
]
*
shape
[
1
];
(
*
image_shape
)[
1
]
=
shape
[
0
]
*
shape
[
1
];
}
}
// [
RoundUp<4>(Ic)
, H * W * (Oc + 3) / 4]
// [
Ic
, H * W * (Oc + 3) / 4]
void
CalConv2dFilterImageShape
(
const
std
::
vector
<
index_t
>
&
shape
,
/* HWOI */
void
CalConv2dFilterImageShape
(
const
std
::
vector
<
index_t
>
&
shape
,
/* HWOI */
std
::
vector
<
size_t
>
*
image_shape
)
{
std
::
vector
<
size_t
>
*
image_shape
)
{
MACE_CHECK
(
shape
.
size
()
==
4
);
MACE_CHECK
(
shape
.
size
()
==
4
);
image_shape
->
resize
(
2
);
image_shape
->
resize
(
2
);
(
*
image_shape
)[
0
]
=
RoundUp
<
index_t
>
(
shape
[
3
],
4
)
;
(
*
image_shape
)[
0
]
=
shape
[
3
]
;
(
*
image_shape
)[
1
]
=
shape
[
0
]
*
shape
[
1
]
*
RoundUpDiv4
(
shape
[
2
]);
(
*
image_shape
)[
1
]
=
shape
[
0
]
*
shape
[
1
]
*
RoundUpDiv4
(
shape
[
2
]);
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录