Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
毕竟曾有刹那
Mace
提交
fbc1d019
Mace
项目概览
毕竟曾有刹那
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
fbc1d019
编写于
11月 14, 2018
作者:
刘
刘琦
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'fix_wino-4x4_bug' into 'master'
fix wino 4x4 bug See merge request !869
上级
6089ec6a
97f38876
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
18 addition
and
35 deletion
+18
-35
mace/ops/opencl/cl/winograd_transform.cl
mace/ops/opencl/cl/winograd_transform.cl
+16
-24
mace/ops/opencl/image/winograd_transform.h
mace/ops/opencl/image/winograd_transform.h
+0
-9
mace/ops/winograd_convolution_test.cc
mace/ops/winograd_convolution_test.cc
+2
-2
未找到文件。
mace/ops/opencl/cl/winograd_transform.cl
浏览文件 @
fbc1d019
...
...
@@ -8,9 +8,7 @@ __kernel void winograd_transform_2x2(OUT_OF_RANGE_PARAMS
__private
const
int
in_width,
__private
const
int
in_channel,
__private
const
int
round_hw,
__private
const
float
round_hw_r,
__private
const
int
round_w,
__private
const
float
round_w_r,
__private
const
int
padding_top,
__private
const
int
padding_left
)
{
int
out_width_idx
=
get_global_id
(
0
)
;
...
...
@@ -23,10 +21,10 @@ __kernel void winograd_transform_2x2(OUT_OF_RANGE_PARAMS
#endif
const int chan_blk_size = global_size_dim1;
const int batch_idx = out_width_idx
* round_hw_r
;
const int t_idx =
mad24(batch_idx, -round_hw, out_width_idx
);
const int n_round_w = t_idx
* round_w_r
;
const int mod_round_w =
mad24(n_round_w, -round_w, t_idx
);
const int batch_idx = out_width_idx
/ round_hw
;
const int t_idx =
out_width_idx - mul24(batch_idx, round_hw
);
const int n_round_w = t_idx
/ round_w
;
const int mod_round_w =
t_idx - mul24(n_round_w, round_w
);
const int height_idx = (n_round_w << 1) - padding_top;
const int width_idx = (mod_round_w << 1) - padding_left;
...
...
@@ -128,9 +126,7 @@ __kernel void winograd_inverse_transform_2x2(OUT_OF_RANGE_PARAMS
__private const int out_height,
__private const int out_width,
__private const int round_hw,
__private const float round_hw_r,
__private const int round_w,
__private const float round_w_r,
__private const float relux_max_limit) {
const int width_idx = get_global_id(0);
const int height_idx = get_global_id(1);
...
...
@@ -145,10 +141,10 @@ __kernel void winograd_inverse_transform_2x2(OUT_OF_RANGE_PARAMS
int width = width_idx;
int height = height_idx;
const int batch = width_idx
* round_hw_r
;
int t =
mad24(batch, -round_hw, width_idx
);
const int n_round_w = t
* round_w_r
;
const int mod_round_w =
mad24(n_round_w, -round_w, t
);
const int batch = width_idx
/ round_hw
;
int t =
width_idx - mul24(batch, round_hw
);
const int n_round_w = t
/ round_w
;
const int mod_round_w =
t - mul24(n_round_w, round_w
);
const int out_height_idx = n_round_w << 1;
const int out_width_idx = mod_round_w << 1;
const int out_chan_idx = height_idx;
...
...
@@ -239,9 +235,7 @@ __kernel void winograd_transform_4x4(OUT_OF_RANGE_PARAMS
__private const int in_width,
__private const int in_channel,
__private const int round_hw,
__private const float round_hw_r,
__private const int round_w,
__private const float round_w_r,
__private const int padding_top,
__private const int padding_left) {
int out_width_idx = get_global_id(0);
...
...
@@ -254,10 +248,10 @@ __kernel void winograd_transform_4x4(OUT_OF_RANGE_PARAMS
#endif
const int chan_blk_size = global_size_dim1;
const int batch_idx = out_width_idx
* round_hw_r
;
const int t_idx =
mad24(batch_idx, -round_hw, out_width_idx
);
const int n_round_w = t_idx
* round_w_r
;
const int mod_round_w =
mad24(n_round_w, -round_w, t_idx
);
const int batch_idx = out_width_idx
/ round_hw
;
const int t_idx =
out_width_idx - mul24(batch_idx, round_hw
);
const int n_round_w = t_idx
/ round_w
;
const int mod_round_w =
t_idx - mul24(n_round_w, round_w
);
const int height_idx = (n_round_w << 2) - padding_top;
const int width_idx = (mod_round_w << 2) - padding_left;
...
...
@@ -400,9 +394,7 @@ __kernel void winograd_inverse_transform_4x4(OUT_OF_RANGE_PARAMS
__private const int out_height,
__private const int out_width,
__private const int round_hw,
__private const float round_hw_r,
__private const int round_w,
__private const float round_w_r,
__private const float relux_max_limit) {
const int width_idx = get_global_id(0);
const int height_idx = get_global_id(1);
...
...
@@ -414,10 +406,10 @@ __kernel void winograd_inverse_transform_4x4(OUT_OF_RANGE_PARAMS
#endif
const int out_channel = global_size_dim1;
const int batch = width_idx
* round_hw_r
;
int h =
mad24(batch, -round_hw, width_idx
);
int n_round_w = h
* round_w_r
;
int mod_round_w =
mad24(n_round_w, -round_w, h
);
const int batch = width_idx
/ round_hw
;
int h =
width_idx - mul24(batch, round_hw
);
int n_round_w = h
/ round_w
;
int mod_round_w =
h - mul24(n_round_w, round_w
);
const int out_height_idx = n_round_w << 2;
const int out_width_idx = mod_round_w << 2;
const int coord_x = mad24(height_idx, out_width, out_width_idx);
...
...
mace/ops/opencl/image/winograd_transform.h
浏览文件 @
fbc1d019
...
...
@@ -118,8 +118,6 @@ MaceStatus WinogradTransformKernel<T>::Compute(
(
output_shape
[
2
]
+
wino_blk_size_
-
1
)
/
wino_blk_size_
;
const
index_t
out_width
=
input_tensor
->
dim
(
0
)
*
round_h
*
round_w
;
const
float
round_hw_r
=
1.
f
/
static_cast
<
float
>
(
round_h
*
round_w
);
const
float
round_w_r
=
1.
f
/
static_cast
<
float
>
(
round_w
);
const
index_t
blk_sqr
=
(
wino_blk_size_
+
2
)
*
(
wino_blk_size_
+
2
);
const
uint32_t
gws
[
2
]
=
{
...
...
@@ -148,9 +146,7 @@ MaceStatus WinogradTransformKernel<T>::Compute(
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
input_tensor
->
dim
(
2
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
input_tensor
->
dim
(
3
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
round_h
*
round_w
));
kernel_
.
setArg
(
idx
++
,
round_hw_r
);
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
round_w
));
kernel_
.
setArg
(
idx
++
,
round_w_r
);
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
paddings
[
0
]
/
2
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
paddings
[
1
]
/
2
));
...
...
@@ -281,9 +277,6 @@ MaceStatus WinogradInverseTransformKernel<T>::Compute(
const
index_t
round_h
=
(
height
+
wino_blk_size_
-
1
)
/
wino_blk_size_
;
const
index_t
round_w
=
(
width
+
wino_blk_size_
-
1
)
/
wino_blk_size_
;
const
float
round_hw_r
=
1.
f
/
static_cast
<
float
>
(
round_h
*
round_w
);
const
float
round_w_r
=
1.
f
/
static_cast
<
float
>
(
round_w
);
uint32_t
idx
=
0
;
MACE_OUT_OF_RANGE_SET_ARGS
(
kernel_
);
MACE_SET_2D_GWS_ARGS
(
kernel_
,
gws
);
...
...
@@ -299,9 +292,7 @@ MaceStatus WinogradInverseTransformKernel<T>::Compute(
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
output_shape
[
1
]));
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
output_shape
[
2
]));
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
round_h
*
round_w
));
kernel_
.
setArg
(
idx
++
,
round_hw_r
);
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
round_w
));
kernel_
.
setArg
(
idx
++
,
round_w_r
);
kernel_
.
setArg
(
idx
++
,
relux_max_limit_
);
input_shape_
=
input_tensor
->
shape
();
...
...
mace/ops/winograd_convolution_test.cc
浏览文件 @
fbc1d019
...
...
@@ -169,9 +169,9 @@ TEST_F(WinogradConvolutionTest, UnAlignedConvolutionM4) {
}
TEST_F
(
WinogradConvolutionTest
,
BatchConvolutionM4
)
{
WinogradConvolution
<
DeviceType
::
GPU
,
float
>
(
3
,
64
,
64
,
32
,
32
,
WinogradConvolution
<
DeviceType
::
GPU
,
float
>
(
3
,
107
,
113
,
5
,
7
,
Padding
::
VALID
,
4
);
WinogradConvolution
<
DeviceType
::
GPU
,
float
>
(
5
,
61
,
67
,
37
,
31
,
WinogradConvolution
<
DeviceType
::
GPU
,
float
>
(
5
,
107
,
113
,
5
,
7
,
Padding
::
SAME
,
4
);
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录