Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
83017e2b
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
83017e2b
编写于
5月 30, 2018
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Optimize convolution opencl kernel remove unused select.
上级
4aa83602
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
25 addition
and
15 deletion
+25
-15
mace/kernels/opencl/cl/conv_2d.cl
mace/kernels/opencl/cl/conv_2d.cl
+13
-7
mace/kernels/opencl/cl/conv_2d_3x3.cl
mace/kernels/opencl/cl/conv_2d_3x3.cl
+12
-8
未找到文件。
mace/kernels/opencl/cl/conv_2d.cl
浏览文件 @
83017e2b
...
...
@@ -53,22 +53,28 @@ __kernel void conv_2d(KERNEL_ERROR_PARAMS
int in_width1 = in_width0 + in_width_stride;
int in_width2 = in_width1 + in_width_stride;
int in_width3 = in_width2 + in_width_stride;
const int height_idx = mad24((out_hb % out_height), stride, -padding_top);
const int height_start = mad24((out_hb % out_height), stride, -padding_top);
int in_height_gap = select(
0,
(-height_start + dilation_h - 1) / dilation_h,
height_start < 0);
int in_height_start = mad24(in_height_gap, dilation_h, height_start);
int in_height_end = min(mad24(filter_height, dilation_h, height_start),
in_height);
const int batch_idx = mul24((out_hb / out_height), in_height);
const int filter_hw = mul24(filter_width, filter_height);
const int filter_y_idx_start = mul24(out_ch_blk, filter_hw)
+ mul24(in_height_gap, filter_width);
DATA_TYPE4 in0, in1, in2, in3;
DATA_TYPE4 weights0, weights1, weights2, weights3;
for (short in_ch_blk = 0; in_ch_blk < in_ch_blks; ++in_ch_blk) {
const int in_idx = mul24(in_ch_blk, in_width);
int filter_x_idx = in_ch_blk << 2;
int filter_y_idx = mul24(out_ch_blk, filter_hw);
for (short hb_idx = 0; hb_idx < filter_height; ++hb_idx) {
int in_hb_value = height_idx + mul24(hb_idx, dilation_h);
in_hb_value = select(in_hb_value + batch_idx,
-1,
(in_hb_value < 0 || in_hb_value >= in_height));
int filter_y_idx = filter_y_idx_start;
for (int hb_idx = in_height_start; hb_idx < in_height_end; hb_idx += dilation_h) {
int in_hb_value = hb_idx + batch_idx;
#pragma unroll
for (short width_idx = 0; width_idx < filter_width; ++width_idx) {
...
...
mace/kernels/opencl/cl/conv_2d_3x3.cl
浏览文件 @
83017e2b
...
...
@@ -54,21 +54,26 @@ __kernel void conv_2d_3x3(KERNEL_ERROR_PARAMS
int in_width2 = in_width1 + in_width_stride;
int in_width3 = in_width2 + in_width_stride;
int in_width4 = in_width3 + in_width_stride;
const int height_idx = mad24((out_hb % out_height), stride, -padding_top);
const int height_start = mad24((out_hb % out_height), stride, -padding_top);
int in_height_gap = select(
0,
(-height_start + dilation_h - 1) / dilation_h,
height_start < 0);
int in_height_start = mad24(in_height_gap, dilation_h, height_start);
int in_height_end = min(mad24(3, dilation_h, height_start),
in_height);
const int batch_idx = mul24((out_hb / out_height), in_height);
const int filter_y_idx_start = mul24(out_ch_blk, 9) + mul24(in_height_gap, 3);
DATA_TYPE4 in0, in1, in2, in3, in4;
DATA_TYPE4 weights0, weights1, weights2, weights3;
for (short in_ch_blk = 0; in_ch_blk < in_ch_blks; ++in_ch_blk) {
const int in_idx = mul24(in_ch_blk, in_width);
int filter_x_idx = in_ch_blk << 2;
int filter_y_idx = mul24(out_ch_blk, 9);
int in_hb_idx = height_idx;
for (short hb_idx = 0; hb_idx < 3; ++hb_idx) {
int in_hb_value = select(in_hb_idx + batch_idx,
-1,
(in_hb_idx < 0 || in_hb_idx >= in_height));
int filter_y_idx = filter_y_idx_start;
for (int hb_idx = in_height_start; hb_idx < in_height_end; hb_idx += dilation_h) {
int in_hb_value = hb_idx + batch_idx;
int in_width_idx = 0;
for (short width_idx = 0; width_idx < 3; ++width_idx) {
int in_width_value;
...
...
@@ -122,7 +127,6 @@ __kernel void conv_2d_3x3(KERNEL_ERROR_PARAMS
in_width_idx += dilation_w;
filter_y_idx += 1;
}
in_hb_idx += dilation_h;
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录