Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
6ffd80bf
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
6ffd80bf
编写于
6月 15, 2018
作者:
L
liutuo
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix mace-models resnet-V2-50 bug
上级
fea5af4f
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
18 addition
and
15 deletion
+18
-15
mace/kernels/deconv_2d.h
mace/kernels/deconv_2d.h
+1
-1
mace/kernels/opencl/cl/reduce_mean.cl
mace/kernels/opencl/cl/reduce_mean.cl
+10
-9
mace/kernels/opencl/reduce_mean_opencl.cc
mace/kernels/opencl/reduce_mean_opencl.cc
+7
-5
未找到文件。
mace/kernels/deconv_2d.h
浏览文件 @
6ffd80bf
...
...
@@ -146,7 +146,7 @@ struct Deconv2dFunctorBase {
static
void
CalcDeconvPaddingAndInputSize
(
const
index_t
*
input_shape
,
// NHWC
const
index_t
*
filter_shape
,
//
HWOI
const
index_t
*
filter_shape
,
//
OIHW
const
int
*
strides
,
Padding
padding
,
const
index_t
*
output_shape
,
...
...
mace/kernels/opencl/cl/reduce_mean.cl
浏览文件 @
6ffd80bf
...
...
@@ -10,9 +10,10 @@ __kernel void reduce_mean(KERNEL_ERROR_PARAMS
__private
const
int
batch,
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
float
i
n_height_r
,
__private
const
float
in_width_r,
__private
const
float
i
mage_size_reciprocal
,
__private
const
float
in_width_r
eciprocal
,
__private
const
int
channel_blocks,
__private
const
float
channel_blocks_reciprocal,
__write_only
image2d_t
output
)
{
const
int
i
=
get_local_id
(
0
)
;
const
int
j
=
get_local_id
(
1
)
;
...
...
@@ -24,23 +25,23 @@ __kernel void reduce_mean(KERNEL_ERROR_PARAMS
#
endif
const
int
dim0_size
=
get_local_size
(
0
)
;
DATA_TYPE4
tmp
=
(
DATA_TYPE4
)
{0,
0
,
0
,
0}
;
const
int
index
=
j
*
dim0_size
+
i
;
const
int
b
=
k
/
channel_blocks
;
const
int
ch
=
k
-
b
*
channel_blocks
;
const
int
index
=
mad24
(
j,
dim0_size,
i
)
;
const
int
b
=
floor
(
k
*
channel_blocks_reciprocal
)
;
const
int
ch
=
mad24
(
b,
-channel_blocks,
k
)
;
DATA_TYPE4
in
;
const
int
valid_part_len
=
select
(
partial_len,
partial_len
-
1
,
remain_index
>
0
&&
index
>=
remain_index
)
;
const
int
full_offset
=
index
*
partial_len
;
const
int
full_offset
=
mul24
(
index,
partial_len
)
;
const
int
base_offset
=
select
(
full_offset,
full_offset
-
(
index
-
remain_index
)
,
valid_part_len
<
partial_len
)
;
#
pragma
unroll
for
(
int
l
=
0
; l < valid_part_len; ++l) {
int
offset
=
base_offset
+
l
;
int
h_id
=
floor
(
offset
*
in_width_r
)
;
int
w_id
=
offset
-
h_id
*
in_width
;
int
h_id
=
floor
(
offset
*
in_width_r
eciprocal
)
;
int
w_id
=
mad24
(
h_id,
-in_width,
offset
)
;
int
pos_x
=
mad24
(
ch,
in_width,
w_id
)
;
int
pos_y
=
mad24
(
b,
in_height,
h_id
)
;
in
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
pos_x,
pos_y
))
;
...
...
@@ -58,7 +59,7 @@ __kernel void reduce_mean(KERNEL_ERROR_PARAMS
for
(
int
l
=
0
; l < group_size; ++l) {
out
=
out
+
group_sum[l]
;
}
out
=
out
*
i
n_height_r
*
in_width_r
;
out
=
out
*
i
mage_size_reciprocal
;
WRITE_IMAGET
(
output,
(
int2
)(
ch,
b
)
,
out
)
;
}
}
mace/kernels/opencl/reduce_mean_opencl.cc
浏览文件 @
6ffd80bf
...
...
@@ -17,7 +17,7 @@ MaceStatus ReduceMeanFunctor<DeviceType::GPU, T>::operator()(
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_CHECK_NOTNULL
(
input
);
MACE_CHECK
(
keep_dims_
,
"reduce mean gpu only support keep dims."
);
//
MACE_CHECK(keep_dims_, "reduce mean gpu only support keep dims.");
MACE_CHECK
(
input
->
dim_size
()
==
4
,
"reduce mean gpu only support 4-dim input"
);
MACE_CHECK
(
axis_
.
size
()
==
2
&&
axis_
[
0
]
==
1
&&
axis_
[
1
]
==
2
,
...
...
@@ -83,8 +83,9 @@ MaceStatus ReduceMeanFunctor<DeviceType::GPU, T>::operator()(
const
int
group_size
=
lws
[
0
]
*
lws
[
1
]
*
lws
[
2
];
const
int
partial_len
=
(
image_size
+
group_size
-
1
)
/
group_size
;
const
int
remain_index
=
image_size
%
group_size
;
const
float
in_width_r
=
1.
f
/
in_width
;
const
float
in_height_r
=
1.
f
/
in_height
;
const
float
in_width_reciprocal
=
1.
f
/
in_width
;
const
float
img_size_reciprocal
=
1.
f
/
(
in_width
*
in_height
);
const
float
channel_blk_reciprocal
=
1.
f
/
channel_blocks
;
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
...
...
@@ -106,9 +107,10 @@ MaceStatus ReduceMeanFunctor<DeviceType::GPU, T>::operator()(
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
batch
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
in_height
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
in_width
));
kernel_
.
setArg
(
idx
++
,
i
n_height_r
);
kernel_
.
setArg
(
idx
++
,
in_width_r
);
kernel_
.
setArg
(
idx
++
,
i
mg_size_reciprocal
);
kernel_
.
setArg
(
idx
++
,
in_width_r
eciprocal
);
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
channel_blocks
));
kernel_
.
setArg
(
idx
++
,
channel_blk_reciprocal
);
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
input_shape_
=
input
->
shape
();
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录