Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
项目经理老王
Mace
提交
3604f69e
Mace
项目概览
项目经理老王
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
3604f69e
编写于
8月 22, 2018
作者:
赵
赵奇可
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix code style, delete unused comment
上级
d34eb9c2
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
47 addition
and
83 deletion
+47
-83
mace/kernels/opencl/cl/resize_bicubic.cl
mace/kernels/opencl/cl/resize_bicubic.cl
+15
-42
mace/kernels/opencl/resize_bicubic.cc
mace/kernels/opencl/resize_bicubic.cc
+1
-8
mace/kernels/resize_bicubic.h
mace/kernels/resize_bicubic.h
+14
-18
mace/ops/resize_bicubic_benchmark.cc
mace/ops/resize_bicubic_benchmark.cc
+9
-9
mace/ops/resize_bicubic_test.cc
mace/ops/resize_bicubic_test.cc
+8
-6
未找到文件。
mace/kernels/opencl/cl/resize_bicubic.cl
浏览文件 @
3604f69e
...
...
@@ -4,7 +4,6 @@
const
int
kTableSize
=
(
1
<<
10
)
;
inline
float
ComputeCoeffs
(
int
i
)
{
//
const
int
kTableSize
=
(
1
<<
10
)
;
const
float
A
=
-0.75
;
float
x
=
(
i
/
2
)
*
1.0
/
kTableSize
;
if
(
i
%
2
==
0
)
{
...
...
@@ -18,39 +17,11 @@ inline float ComputeCoeffs(int i) {
}
}
//#define
GET_COEFFS
(
coeffs_tab,
i
)
coeffs_tab[i]
//float
getCoeffs
(
const
float
*coeffs_tab,
int
i
)
{
//
return
coeffs_tab[i]
;
//}
#
define
BOUND
(
val,
limit
)
min
(
limit
-
1
,
max
(
0
,
val
))
//int
Bound
(
int
val,
int
limit
)
{
//
return
min
(
limit
-
1
,
max
(
0
,
val
))
;
//}
//float4
GetWeights
(
const
float*
coeffs_tab,
float
scale,
int
out_loc,
int
limit
)
{
//
const
int
in_loc
=
scale
*
out_loc
;
//
const
float
delta
=
scale
*
out_loc
-
in_loc
;
//
const
int
offset
=
delta
*
kTableSize
+
0.5
; //lrintf not found in opencl;
//
float4
weights
=
{getCoeffs
(
coeffs_tab,
offset
*
2
+
1
)
,
//
getCoeffs
(
coeffs_tab,
offset
*
2
)
,
//
getCoeffs
(
coeffs_tab,
(
kTableSize
-
offset
)
*
2
)
,
//
getCoeffs
(
coeffs_tab,
(
kTableSize
-
offset
)
*
2
+
1
)
}
;
//
return
weights
;
//}
//
//int4
GetIndices
(
float
scale,
int
out_loc,
int
limit
)
{
//
const
int
in_loc
=
scale
*
out_loc
;
//
const
float
delta
=
scale
*
out_loc
-
in_loc
;
//
const
int
offset
=
delta
*
kTableSize
+
0.5
; //lrintf not found in opencl
//
int4
indices
=
{Bound
(
in_loc
-
1
,
limit
)
,
Bound
(
in_loc,
limit
)
,
//
Bound
(
in_loc
+
1
,
limit
)
,
Bound
(
in_loc
+
2
,
limit
)
}
;
//
return
indices
;
//}
__kernel
void
resize_bicubic_nocache
(
KERNEL_ERROR_PARAMS
GLOBAL_WORK_GROUP_SIZE_DIM3
__read_only
image2d_t
input,
/*
[c%4
*
w
*
c/4,
h
*
b]
*/
__read_only
image2d_t
input,
__write_only
image2d_t
output,
__private
const
float
height_scale,
__private
const
float
width_scale,
...
...
@@ -83,27 +54,30 @@ __kernel void resize_bicubic_nocache(KERNEL_ERROR_PARAMS
const
int
in_w_offset
=
mul24
(
ch_blk,
in_width
)
;
const
int
in_h_offset
=
mul24
(
b,
in_height
)
;
//begin
resize
bicubic
const
int
h_in_loc
=
height_scale
*
h
;
const
float
h_delta
=
height_scale
*
h
-
h_in_loc
;
const
int
h_offset
=
h_delta
*
kTableSize
+
0.5
;
//lrintf not found in opencl;
const
int
h_offset
=
h_delta
*
kTableSize
+
0.5
;
const
int
w_in_loc
=
width_scale
*
w
;
const
float
w_delta
=
width_scale
*
w
-
w_in_loc
;
const
int
w_offset
=
w_delta
*
kTableSize
+
0.5
;
//lrintf not found in opencl;
const
int
w_offset
=
w_delta
*
kTableSize
+
0.5
;
float4
y_weights
=
{ComputeCoeffs
(
h_offset
*
2
+
1
)
,
ComputeCoeffs
(
h_offset
*
2
)
,
ComputeCoeffs
((
kTableSize
-
h_offset
)
*
2
)
,
ComputeCoeffs
((
kTableSize
-
h_offset
)
*
2
+
1
)
}
;
int4
y_indices
=
{BOUND
(
h_in_loc
-
1
,
in_height
)
,
BOUND
(
h_in_loc,
in_height
)
,
BOUND
(
h_in_loc
+
1
,
in_height
)
,
BOUND
(
h_in_loc
+
2
,
in_height
)
}
;
int4
y_indices
=
{BOUND
(
h_in_loc
-
1
,
in_height
)
,
BOUND
(
h_in_loc,
in_height
)
,
BOUND
(
h_in_loc
+
1
,
in_height
)
,
BOUND
(
h_in_loc
+
2
,
in_height
)
}
;
float4
x_weights
=
{ComputeCoeffs
(
w_offset
*
2
+
1
)
,
ComputeCoeffs
(
w_offset
*
2
)
,
ComputeCoeffs
((
kTableSize
-
w_offset
)
*
2
)
,
ComputeCoeffs
((
kTableSize
-
w_offset
)
*
2
+
1
)
}
;
int4
x_indices
=
{BOUND
(
w_in_loc
-
1
,
in_width
)
,
BOUND
(
w_in_loc,
in_width
)
,
BOUND
(
w_in_loc
+
1
,
in_width
)
,
BOUND
(
w_in_loc
+
2
,
in_width
)
}
;
int4
x_indices
=
{BOUND
(
w_in_loc
-
1
,
in_width
)
,
BOUND
(
w_in_loc,
in_width
)
,
BOUND
(
w_in_loc
+
1
,
in_width
)
,
BOUND
(
w_in_loc
+
2
,
in_width
)
}
;
float4
coeffs0
=
{0,
0
,
0
,
0}
;
float4
coeffs1
=
{0,
0
,
0
,
0}
;
...
...
@@ -115,13 +89,13 @@ __kernel void resize_bicubic_nocache(KERNEL_ERROR_PARAMS
if
(
i
==
2
)
{
y_index
=
y_indices.s2
; }
if
(
i
==
3
)
{
y_index
=
y_indices.s3
; }
DATA_TYPE4
data0
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
in_w_offset
+
x_indices.s0,
in_h_offset
+
y_index
))
;
(
int2
)(
in_w_offset
+
x_indices.s0,
in_h_offset
+
y_index
))
;
DATA_TYPE4
data1
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
in_w_offset
+
x_indices.s1,
in_h_offset
+
y_index
))
;
(
int2
)(
in_w_offset
+
x_indices.s1,
in_h_offset
+
y_index
))
;
DATA_TYPE4
data2
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
in_w_offset
+
x_indices.s2,
in_h_offset
+
y_index
))
;
(
int2
)(
in_w_offset
+
x_indices.s2,
in_h_offset
+
y_index
))
;
DATA_TYPE4
data3
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
in_w_offset
+
x_indices.s3,
in_h_offset
+
y_index
))
;
(
int2
)(
in_w_offset
+
x_indices.s3,
in_h_offset
+
y_index
))
;
float4
xw0
=
{
x_weights.s0,
x_weights.s0,
x_weights.s0,
x_weights.s0
}
;
float4
xw1
=
{
x_weights.s1,
x_weights.s1,
x_weights.s1,
x_weights.s1
}
;
...
...
@@ -150,7 +124,6 @@ __kernel void resize_bicubic_nocache(KERNEL_ERROR_PARAMS
const
int
out_h_offset
=
mul24
(
b,
out_height
)
;
WRITE_IMAGET
(
output,
(
int2
)(
out_w_offset
+
w,
out_h_offset
+
h
)
,
outdata
)
;
//end
bicubic
}
...
...
mace/kernels/opencl/resize_bicubic.cc
浏览文件 @
3604f69e
...
...
@@ -85,13 +85,7 @@ MaceStatus ResizeBicubicFunctor<DeviceType::GPU, T>::operator()(
kwg_size_
=
static_cast
<
uint32_t
>
(
runtime
->
GetKernelMaxWorkGroupSize
(
kernel_
));
}
// //create buffer
// cl::Buffer coeffs_buf(runtime->context(), CL_MEM_READ_WRITE, (kTableSize + 1 ) * 2 * sizeof(float));
// if (runtime->command_queue().enqueueWriteBuffer(coeffs_buf,CL_TRUE,0,(kTableSize + 1 ) * 2 * sizeof(float),GetCoeffsTable()) != CL_SUCCESS) {
// std::cout << "Failed to write memory;n";
// exit(1);
// }
// //end create buffer
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
MACE_CHECK
(
out_height
>
0
&&
out_width
>
0
);
std
::
vector
<
index_t
>
output_shape
{
batch
,
out_height
,
out_width
,
channels
};
...
...
@@ -116,7 +110,6 @@ MaceStatus ResizeBicubicFunctor<DeviceType::GPU, T>::operator()(
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
in_height
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
in_width
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
out_height
));
// kernel_.setArg(idx++, coeffs_buf);
input_shape_
=
input
->
shape
();
}
...
...
mace/kernels/resize_bicubic.h
浏览文件 @
3604f69e
...
...
@@ -95,12 +95,6 @@ inline void ResizeImage(const float *images,
const
float
height_scale
,
const
float
width_scale
,
float
*
output
)
{
// std::stringstream tabss;
// for(int tabi = 0; tabi < 10; tabi++){
// tabss << GetCoeffsTable()[tabi] << " ";
// }
// LOG(WARNING) << tabss.str().c_str();
std
::
array
<
float
,
4
>
coeff
=
{{
0.0
,
0.0
,
0.0
,
0.0
}};
#pragma omp parallel for collapse(2)
for
(
index_t
b
=
0
;
b
<
batch_size
;
++
b
)
{
...
...
@@ -119,22 +113,24 @@ inline void ResizeImage(const float *images,
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
// Use a 4x4 patch to compute the interpolated output value at
// (b, y, x, c).
const
float
*
channel_input_ptr
=
images
+
(
b
*
channels
+
c
)
*
in_height
*
in_width
;
float
*
channel_output_ptr
=
output
+
(
b
*
channels
+
c
)
*
out_height
*
out_width
;
const
float
*
channel_input_ptr
=
images
+
(
b
*
channels
+
c
)
*
in_height
*
in_width
;
float
*
channel_output_ptr
=
output
+
(
b
*
channels
+
c
)
*
out_height
*
out_width
;
for
(
index_t
i
=
0
;
i
<
4
;
++
i
)
{
const
std
::
array
<
float
,
4
>
values
=
{
{
static_cast
<
float
>
(
channel_input_ptr
[
y_indices
[
i
]
*
in_width
+
x_indices
[
0
]]),
static_cast
<
float
>
(
channel_input_ptr
[
y_indices
[
i
]
*
in_width
+
x_indices
[
1
]]),
static_cast
<
float
>
(
channel_input_ptr
[
y_indices
[
i
]
*
in_width
+
x_indices
[
2
]]),
static_cast
<
float
>
(
channel_input_ptr
[
y_indices
[
i
]
*
in_width
+
x_indices
[
3
]])}};
{
static_cast
<
float
>
(
channel_input_ptr
[
y_indices
[
i
]
*
in_width
+
x_indices
[
0
]]),
static_cast
<
float
>
(
channel_input_ptr
[
y_indices
[
i
]
*
in_width
+
x_indices
[
1
]]),
static_cast
<
float
>
(
channel_input_ptr
[
y_indices
[
i
]
*
in_width
+
x_indices
[
2
]]),
static_cast
<
float
>
(
channel_input_ptr
[
y_indices
[
i
]
*
in_width
+
x_indices
[
3
]])}};
coeff
[
i
]
=
Interpolate1D
(
x_weights
,
values
);
}
channel_output_ptr
[
y
*
out_width
+
x
]
=
Interpolate1D
(
y_weights
,
coeff
);
channel_output_ptr
[
y
*
out_width
+
x
]
=
Interpolate1D
(
y_weights
,
coeff
);
}
}
}
...
...
mace/ops/resize_bicubic_benchmark.cc
浏览文件 @
3604f69e
...
...
@@ -83,17 +83,17 @@ void ResizeBicubicBenchmark(int iters,
}
// namespace
#define MACE_BM_RESIZE_BICUBIC_MACRO(N, C, H0, W0, H1, W1, TYPE, DEVICE) \
static void
\
static void \
MACE_BM_RESIZE_BICUBIC_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_\
##DEVICE(
\
int iters) {
\
const int64_t macc = static_cast<int64_t>(iters) * N * C * H1 * W1 * 3;
\
const int64_t tot = static_cast<int64_t>(iters) * N * C * H0 * W0;
\
mace::testing::MaccProcessed(macc);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE)));
\
##DEVICE( \
int iters) { \
const int64_t macc = static_cast<int64_t>(iters) * N * C * H1 * W1 * 3; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H0 * W0; \
mace::testing::MaccProcessed(macc); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
ResizeBicubicBenchmark<DEVICE, TYPE>(iters, N, C, H0, W0, H1, W1); \
}
\
MACE_BENCHMARK(
\
} \
MACE_BENCHMARK( \
MACE_BM_RESIZE_BICUBIC_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_\
##DEVICE)
...
...
mace/ops/resize_bicubic_test.cc
浏览文件 @
3604f69e
...
...
@@ -48,7 +48,6 @@ TEST_F(ResizeBicubicTest, CPUResizeBicubicWOAlignCorners) {
NHWC
);
// Check
//TODO change expected data
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
3
},
{
0
,
1
,
2
,
6
,
7
,
8
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
...
...
@@ -63,7 +62,8 @@ TEST_F(ResizeBicubicTest, CPUResizeBicubicWOAlignCorners1) {
std
::
vector
<
float
>
input
(
48
);
std
::
iota
(
begin
(
input
),
end
(
input
),
0
);
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
1
,
4
,
4
,
3
},
input
);
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
NHWC
,
"InputNCHW"
,
NCHW
);
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
NHWC
,
"InputNCHW"
,
NCHW
);
OpDefBuilder
(
"ResizeBicubic"
,
"ResizeBicubicTest"
)
.
Input
(
"InputNCHW"
)
...
...
@@ -73,11 +73,14 @@ TEST_F(ResizeBicubicTest, CPUResizeBicubicWOAlignCorners1) {
// Run
net
.
RunOp
();
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"OutputNCHW"
,
NCHW
,
"Output"
,
NHWC
);
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"OutputNCHW"
,
NCHW
,
"Output"
,
NHWC
);
// Check
//TODO change expected data
auto
expected
=
CreateTensor
<
float
>
({
1
,
2
,
3
,
3
},
{
0.
,
1.
,
2.
,
4.110297
,
5.110297
,
6.110297
,
8.223037
,
9.223036
,
10.223037
,
24.
,
25.
,
26.
,
28.110298
,
29.1103
,
30.110298
,
32.223038
,
33.223038
,
34.223038
});
auto
expected
=
CreateTensor
<
float
>
({
1
,
2
,
3
,
3
},
{
0.
,
1.
,
2.
,
4.110297
,
5.110297
,
6.110297
,
8.223037
,
9.223036
,
10.223037
,
24.
,
25.
,
26.
,
28.110298
,
29.1103
,
30.110298
,
32.223038
,
33.223038
,
34.223038
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
}
...
...
@@ -167,7 +170,6 @@ void TestRandomResizeBicubic() {
// Check
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"DeviceOutput"
),
1e-5
,
1e-4
);
}
}
}
// namespace
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录