Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
9efc54d6
Mace
项目概览
Xiaomi
/
Mace
通知
107
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
9efc54d6
编写于
8月 29, 2018
作者:
赵
赵奇可
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix unefficient code, delete unused code
上级
f0521e5e
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
47 addition
and
69 deletion
+47
-69
mace/kernels/opencl/cl/resize_bicubic.cl
mace/kernels/opencl/cl/resize_bicubic.cl
+46
-68
mace/kernels/opencl/resize_bicubic.cc
mace/kernels/opencl/resize_bicubic.cc
+1
-0
mace/kernels/resize_bicubic.h
mace/kernels/resize_bicubic.h
+0
-1
未找到文件。
mace/kernels/opencl/cl/resize_bicubic.cl
浏览文件 @
9efc54d6
#
include
<common.h>
const
int
kTableSize
=
(
1
<<
10
)
;
inline
float
ComputeCoeffs
(
int
i
)
{
const
float
A
=
-0.75
;
float
x
=
(
i
/
2
)
*
1.0
/
kTableSize
;
if
(
i
%
2
==
0
)
{
float
coeff
=
((
A
+
2
)
*
x
-
(
A
+
3
))
*
x
*
x
+
1
;
return
coeff
;
}
else
{
x
+=
1.0
;
float
coeff
=
((
A
*
x
-
5
*
A
)
*
x
+
8
*
A
)
*
x
-
4
*
A
;
return
coeff
;
}
inline
float
coeff_even
(
float
i
)
{
float
x
=
i
/
TABLE_SIZE
;
return
(
1.25f
*
x
-
2.25f
)
*
x
*
x
+
1.0f
;
}
#
define
BOUND
(
val,
limit
)
min
(
limit
-
1
,
max
(
0
,
val
))
inline
float
coeff_odd
(
float
i
)
{
float
x
=
i
/
TABLE_SIZE
+
1.0f
;
return
((
-0.75f
*
x
+
3.75f
)
*
x
-
6.0f
)
*
x
+
3.0f
;
}
__kernel
void
resize_bicubic_nocache
(
KERNEL_ERROR_PARAMS
GLOBAL_WORK_GROUP_SIZE_DIM3
...
...
@@ -27,7 +19,6 @@ __kernel void resize_bicubic_nocache(KERNEL_ERROR_PARAMS
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
int
out_height
)
{
const
int
ch_blk
=
get_global_id
(
0
)
;
const
int
w
=
get_global_id
(
1
)
;
const
int
hb
=
get_global_id
(
2
)
;
...
...
@@ -53,72 +44,59 @@ __kernel void resize_bicubic_nocache(KERNEL_ERROR_PARAMS
const
int
in_w_offset
=
mul24
(
ch_blk,
in_width
)
;
const
int
in_h_offset
=
mul24
(
b,
in_height
)
;
const
int
h_in_loc
=
height_scale
*
h
;
const
float
h_delta
=
height_scale
*
h
-
h_in_loc
;
const
int
h_offset
=
h_delta
*
kTableSize
+
0.5
;
const
int
w_in_loc
=
width_scale
*
w
;
const
float
w_delta
=
width_scale
*
w
-
w_in_loc
;
const
int
w_offset
=
w_delta
*
kTableSize
+
0.5
;
float4
y_weights
=
{ComputeCoeffs
(
h_offset
*
2
+
1
)
,
ComputeCoeffs
(
h_offset
*
2
)
,
ComputeCoeffs
((
kTableSize
-
h_offset
)
*
2
)
,
ComputeCoeffs
((
kTableSize
-
h_offset
)
*
2
+
1
)
}
;
int4
y_indices
=
{BOUND
(
h_in_loc
-
1
,
in_height
)
,
BOUND
(
h_in_loc,
in_height
)
,
BOUND
(
h_in_loc
+
1
,
in_height
)
,
BOUND
(
h_in_loc
+
2
,
in_height
)
}
;
float4
x_weights
=
{ComputeCoeffs
(
w_offset
*
2
+
1
)
,
ComputeCoeffs
(
w_offset
*
2
)
,
ComputeCoeffs
((
kTableSize
-
w_offset
)
*
2
)
,
ComputeCoeffs
((
kTableSize
-
w_offset
)
*
2
+
1
)
}
;
int4
x_indices
=
{BOUND
(
w_in_loc
-
1
,
in_width
)
,
BOUND
(
w_in_loc,
in_width
)
,
BOUND
(
w_in_loc
+
1
,
in_width
)
,
BOUND
(
w_in_loc
+
2
,
in_width
)
}
;
float4
coeffs0
=
{0,
0
,
0
,
0}
;
float4
coeffs1
=
{0,
0
,
0
,
0}
;
float4
coeffs2
=
{0,
0
,
0
,
0}
;
float4
coeffs3
=
{0,
0
,
0
,
0}
;
const
int
h_in_loc
=
(
int
)
h_in
;
const
float
h_delta
=
h_in
-
h_in_loc
;
const
int
h_offset
=
h_delta
*
TABLE_SIZE
+
0.5f
;
const
int
w_in_loc
=
(
int
)
w_in
;
const
float
w_delta
=
w_in
-
w_in_loc
;
const
int
w_offset
=
w_delta
*
TABLE_SIZE
+
0.5f
;
const
float
h_offset_l
=
h_offset
;
const
float
h_offset_r
=
TABLE_SIZE
-
h_offset_l
;
float4
y_weights
=
{coeff_odd
(
h_offset_l
)
,
coeff_even
(
h_offset_l
)
,
coeff_even
(
h_offset_r
)
,
coeff_odd
(
h_offset_r
)
}
;
int4
y_indices
=
{h_in_loc
-
1
,
h_in_loc,
h_in_loc
+
1
,
h_in_loc
+
2}
;
y_indices
=
min
(
max
(
y_indices,
0
)
,
in_height
-
1
)
;
const
float
w_offset_l
=
w_offset
;
const
float
w_offset_r
=
TABLE_SIZE
-
w_offset_l
;
float4
x_weights
=
{coeff_odd
(
w_offset_l
)
,
coeff_even
(
w_offset_l
)
,
coeff_even
(
w_offset_r
)
,
coeff_odd
(
w_offset_r
)
}
;
int4
x_indices
=
{w_in_loc
-
1
,
w_in_loc,
w_in_loc
+
1
,
w_in_loc
+
2}
;
x_indices
=
min
(
max
(
x_indices,
0
)
,
in_width
-
1
)
;
float4
coeffs0
=
0
,
coeffs1
=
0
,
coeffs2
=
0
,
coeffs3
=
0
;
for
(
int
i
=
0
; i < 4; ++i) {
int
y_index
=
y_indices.s0
;
if
(
i
==
1
)
{
y_index
=
y_indices.s1
; }
if
(
i
==
2
)
{
y_index
=
y_indices.s2
; }
if
(
i
==
3
)
{
y_index
=
y_indices.s3
; }
const
int
in_h_index
=
in_h_offset
+
y_index
;
DATA_TYPE4
data0
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
in_w_offset
+
x_indices.s0,
in_h_
offset
+
y_
index
))
;
(
int2
)(
in_w_offset
+
x_indices.s0,
in_h_index
))
;
DATA_TYPE4
data1
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
in_w_offset
+
x_indices.s1,
in_h_
offset
+
y_
index
))
;
(
int2
)(
in_w_offset
+
x_indices.s1,
in_h_index
))
;
DATA_TYPE4
data2
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
in_w_offset
+
x_indices.s2,
in_h_
offset
+
y_
index
))
;
(
int2
)(
in_w_offset
+
x_indices.s2,
in_h_index
))
;
DATA_TYPE4
data3
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
in_w_offset
+
x_indices.s3,
in_h_offset
+
y_index
))
;
float4
xw0
=
{
x_weights.s0,
x_weights.s0,
x_weights.s0,
x_weights.s0
}
;
float4
xw1
=
{
x_weights.s1,
x_weights.s1,
x_weights.s1,
x_weights.s1
}
;
float4
xw2
=
{
x_weights.s2,
x_weights.s2,
x_weights.s2,
x_weights.s2
}
;
float4
xw3
=
{
x_weights.s3,
x_weights.s3,
x_weights.s3,
x_weights.s3
}
;
float4
res
=
{
0
,
0
,
0
,
0
}
;
res
=
mad
(
xw0,
data0,
res
)
;
res
=
mad
(
xw1,
data1,
res
)
;
res
=
mad
(
xw2,
data2,
res
)
;
res
=
mad
(
xw3,
data3,
res
)
;
(
int2
)(
in_w_offset
+
x_indices.s3,
in_h_index
))
;
float4
res
=
0
;
res
=
mad
(
data0,
x_weights.s0,
res
)
;
res
=
mad
(
data1,
x_weights.s1,
res
)
;
res
=
mad
(
data2,
x_weights.s2,
res
)
;
res
=
mad
(
data3,
x_weights.s3,
res
)
;
if
(
i
==
0
)
{
coeffs0
=
res
; }
if
(
i
==
1
)
{
coeffs1
=
res
; }
if
(
i
==
2
)
{
coeffs2
=
res
; }
if
(
i
==
3
)
{
coeffs3
=
res
; }
}
float4
yw0
=
{
y_weights.s0,
y_weights.s0,
y_weights.s0,
y_weights.s0
}
;
float4
yw1
=
{
y_weights.s1,
y_weights.s1,
y_weights.s1,
y_weights.s1
}
;
float4
yw2
=
{
y_weights.s2,
y_weights.s2,
y_weights.s2,
y_weights.s2
}
;
float4
yw3
=
{
y_weights.s3,
y_weights.s3,
y_weights.s3,
y_weights.s3
}
;
DATA_TYPE4
outdata
=
{
0
,
0
,
0
,
0
}
;
outdata
=
mad
(
yw0,
coeffs0,
outdata
)
;
outdata
=
mad
(
yw1,
coeffs1,
outdata
)
;
outdata
=
mad
(
yw2,
coeffs2,
outdata
)
;
outdata
=
mad
(
yw3,
coeffs3,
outdata
)
;
DATA_TYPE4
outdata
=
0
;
outdata
=
mad
(
coeffs0,
y_weights.s0,
outdata
)
;
outdata
=
mad
(
coeffs1,
y_weights.s1,
outdata
)
;
outdata
=
mad
(
coeffs2,
y_weights.s2,
outdata
)
;
outdata
=
mad
(
coeffs3,
y_weights.s3,
outdata
)
;
const
int
out_w_offset
=
mul24
(
ch_blk,
out_width
)
;
const
int
out_h_offset
=
mul24
(
b,
out_height
)
;
...
...
mace/kernels/opencl/resize_bicubic.cc
浏览文件 @
9efc54d6
...
...
@@ -76,6 +76,7 @@ MaceStatus ResizeBicubicFunctor<DeviceType::GPU, T>::operator()(
auto
dt
=
DataTypeToEnum
<
T
>::
value
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpCompatibleCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpCompatibleCLCMDDt
(
dt
));
built_options
.
emplace
(
MakeString
(
"-DTABLE_SIZE="
,
kTableSize
));
MACE_RETURN_IF_ERROR
(
runtime
->
BuildKernel
(
"resize_bicubic"
,
kernel_name
,
...
...
mace/kernels/resize_bicubic.h
浏览文件 @
9efc54d6
...
...
@@ -103,7 +103,6 @@ inline void ResizeImage(const float *images,
std
::
array
<
index_t
,
4
>
y_indices
;
GetWeightsAndIndices
(
height_scale
,
y
,
in_height
,
&
y_weights
,
&
y_indices
);
std
::
stringstream
ss
;
for
(
index_t
x
=
0
;
x
<
out_width
;
++
x
)
{
std
::
array
<
float
,
4
>
x_weights
;
std
::
array
<
index_t
,
4
>
x_indices
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录