Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
2944863e
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
2944863e
编写于
3月 19, 2018
作者:
Y
yejianwu
浏览文件
操作
浏览文件
下载
差异文件
merge with origin master, fix mace/ops code format
上级
5ba3dd5b
feb234de
变更
138
隐藏空白更改
内联
并排
Showing
138 changed file
with
917 addition
and
387 deletion
+917
-387
mace/core/operator.cc
mace/core/operator.cc
+29
-25
mace/kernels/activation.h
mace/kernels/activation.h
+1
-0
mace/kernels/addn.h
mace/kernels/addn.h
+1
-0
mace/kernels/batch_norm.h
mace/kernels/batch_norm.h
+1
-0
mace/kernels/bias_add.h
mace/kernels/bias_add.h
+1
-0
mace/kernels/channel_shuffle.h
mace/kernels/channel_shuffle.h
+1
-0
mace/kernels/concat.h
mace/kernels/concat.h
+1
-0
mace/kernels/conv_2d.h
mace/kernels/conv_2d.h
+1
-0
mace/kernels/depthwise_conv2d.h
mace/kernels/depthwise_conv2d.h
+1
-0
mace/kernels/eltwise.h
mace/kernels/eltwise.h
+1
-0
mace/kernels/fully_connected.h
mace/kernels/fully_connected.h
+1
-0
mace/kernels/opencl/activation_opencl.cc
mace/kernels/opencl/activation_opencl.cc
+5
-0
mace/kernels/opencl/addn.cc
mace/kernels/opencl/addn.cc
+15
-9
mace/kernels/opencl/batch_norm_opencl.cc
mace/kernels/opencl/batch_norm_opencl.cc
+4
-1
mace/kernels/opencl/bias_add_opencl.cc
mace/kernels/opencl/bias_add_opencl.cc
+3
-0
mace/kernels/opencl/channel_shuffle.cc
mace/kernels/opencl/channel_shuffle.cc
+8
-4
mace/kernels/opencl/concat.cc
mace/kernels/opencl/concat.cc
+5
-1
mace/kernels/opencl/conv_2d_opencl.cc
mace/kernels/opencl/conv_2d_opencl.cc
+7
-4
mace/kernels/opencl/conv_2d_opencl_1x1.cc
mace/kernels/opencl/conv_2d_opencl_1x1.cc
+5
-0
mace/kernels/opencl/conv_2d_opencl_3x3.cc
mace/kernels/opencl/conv_2d_opencl_3x3.cc
+5
-1
mace/kernels/opencl/conv_2d_opencl_general.cc
mace/kernels/opencl/conv_2d_opencl_general.cc
+5
-1
mace/kernels/opencl/depthwise_conv_opencl.cc
mace/kernels/opencl/depthwise_conv_opencl.cc
+16
-15
mace/kernels/opencl/eltwise_opencl.cc
mace/kernels/opencl/eltwise_opencl.cc
+3
-0
mace/kernels/opencl/fully_connected_opencl.cc
mace/kernels/opencl/fully_connected_opencl.cc
+20
-7
mace/kernels/opencl/helper.h
mace/kernels/opencl/helper.h
+7
-0
mace/kernels/opencl/matmul.cc
mace/kernels/opencl/matmul.cc
+9
-10
mace/kernels/opencl/pooling_opencl.cc
mace/kernels/opencl/pooling_opencl.cc
+32
-26
mace/kernels/opencl/resize_bilinear_opencl.cc
mace/kernels/opencl/resize_bilinear_opencl.cc
+15
-10
mace/kernels/opencl/softmax_opencl.cc
mace/kernels/opencl/softmax_opencl.cc
+3
-0
mace/kernels/opencl/space_to_batch_opencl.cc
mace/kernels/opencl/space_to_batch_opencl.cc
+4
-0
mace/kernels/opencl/winograd_transform.cc
mace/kernels/opencl/winograd_transform.cc
+27
-19
mace/kernels/pooling.h
mace/kernels/pooling.h
+1
-0
mace/kernels/resize_bilinear.h
mace/kernels/resize_bilinear.h
+1
-0
mace/kernels/softmax.h
mace/kernels/softmax.h
+1
-0
mace/kernels/space_to_batch.h
mace/kernels/space_to_batch.h
+1
-0
mace/kernels/winograd_transform.h
mace/kernels/winograd_transform.h
+2
-0
mace/ops/activation.cc
mace/ops/activation.cc
+2
-0
mace/ops/activation.h
mace/ops/activation.h
+4
-0
mace/ops/activation_benchmark.cc
mace/ops/activation_benchmark.cc
+6
-0
mace/ops/activation_test.cc
mace/ops/activation_test.cc
+4
-0
mace/ops/addn.cc
mace/ops/addn.cc
+2
-0
mace/ops/addn.h
mace/ops/addn.h
+2
-0
mace/ops/addn_benchmark.cc
mace/ops/addn_benchmark.cc
+6
-0
mace/ops/addn_test.cc
mace/ops/addn_test.cc
+10
-6
mace/ops/batch_norm.cc
mace/ops/batch_norm.cc
+2
-0
mace/ops/batch_norm.h
mace/ops/batch_norm.h
+2
-0
mace/ops/batch_norm_benchmark.cc
mace/ops/batch_norm_benchmark.cc
+5
-0
mace/ops/batch_norm_test.cc
mace/ops/batch_norm_test.cc
+18
-17
mace/ops/batch_to_space.cc
mace/ops/batch_to_space.cc
+2
-0
mace/ops/batch_to_space.h
mace/ops/batch_to_space.h
+6
-3
mace/ops/batch_to_space_benchmark.cc
mace/ops/batch_to_space_benchmark.cc
+6
-0
mace/ops/bias_add.cc
mace/ops/bias_add.cc
+2
-0
mace/ops/bias_add.h
mace/ops/bias_add.h
+5
-3
mace/ops/bias_add_benchmark.cc
mace/ops/bias_add_benchmark.cc
+6
-0
mace/ops/bias_add_test.cc
mace/ops/bias_add_test.cc
+16
-13
mace/ops/buffer_to_image.cc
mace/ops/buffer_to_image.cc
+2
-0
mace/ops/buffer_to_image.h
mace/ops/buffer_to_image.h
+2
-0
mace/ops/buffer_to_image_test.cc
mace/ops/buffer_to_image_test.cc
+7
-1
mace/ops/channel_shuffle.cc
mace/ops/channel_shuffle.cc
+2
-0
mace/ops/channel_shuffle.h
mace/ops/channel_shuffle.h
+2
-0
mace/ops/channel_shuffle_benchmark.cc
mace/ops/channel_shuffle_benchmark.cc
+25
-20
mace/ops/channel_shuffle_test.cc
mace/ops/channel_shuffle_test.cc
+20
-13
mace/ops/concat.cc
mace/ops/concat.cc
+2
-0
mace/ops/concat.h
mace/ops/concat.h
+5
-0
mace/ops/concat_benchmark.cc
mace/ops/concat_benchmark.cc
+5
-0
mace/ops/concat_test.cc
mace/ops/concat_test.cc
+15
-6
mace/ops/conv_2d.cc
mace/ops/conv_2d.cc
+2
-0
mace/ops/conv_2d.h
mace/ops/conv_2d.h
+2
-0
mace/ops/conv_2d_benchmark.cc
mace/ops/conv_2d_benchmark.cc
+30
-24
mace/ops/conv_2d_test.cc
mace/ops/conv_2d_test.cc
+14
-7
mace/ops/conv_pool_2d_base.h
mace/ops/conv_pool_2d_base.h
+4
-0
mace/ops/core_test.cc
mace/ops/core_test.cc
+4
-0
mace/ops/depthwise_conv2d.cc
mace/ops/depthwise_conv2d.cc
+2
-0
mace/ops/depthwise_conv2d.h
mace/ops/depthwise_conv2d.h
+3
-0
mace/ops/depthwise_conv2d_benchmark.cc
mace/ops/depthwise_conv2d_benchmark.cc
+31
-25
mace/ops/depthwise_conv2d_test.cc
mace/ops/depthwise_conv2d_test.cc
+9
-8
mace/ops/eltwise.cc
mace/ops/eltwise.cc
+2
-0
mace/ops/eltwise.h
mace/ops/eltwise.h
+5
-3
mace/ops/eltwise_benchmark.cc
mace/ops/eltwise_benchmark.cc
+7
-1
mace/ops/eltwise_test.cc
mace/ops/eltwise_test.cc
+4
-0
mace/ops/folded_batch_norm.cc
mace/ops/folded_batch_norm.cc
+2
-0
mace/ops/folded_batch_norm.h
mace/ops/folded_batch_norm.h
+4
-0
mace/ops/folded_batch_norm_test.cc
mace/ops/folded_batch_norm_test.cc
+23
-22
mace/ops/fully_connected.cc
mace/ops/fully_connected.cc
+2
-0
mace/ops/fully_connected.h
mace/ops/fully_connected.h
+2
-0
mace/ops/fully_connected_benchmark.cc
mace/ops/fully_connected_benchmark.cc
+6
-0
mace/ops/fully_connected_test.cc
mace/ops/fully_connected_test.cc
+5
-0
mace/ops/fused_conv_2d.cc
mace/ops/fused_conv_2d.cc
+2
-0
mace/ops/fused_conv_2d.h
mace/ops/fused_conv_2d.h
+2
-0
mace/ops/fused_conv_2d_test.cc
mace/ops/fused_conv_2d_test.cc
+6
-4
mace/ops/global_avg_pooling.cc
mace/ops/global_avg_pooling.cc
+2
-0
mace/ops/global_avg_pooling.h
mace/ops/global_avg_pooling.h
+2
-0
mace/ops/global_avg_pooling_benchmark.cc
mace/ops/global_avg_pooling_benchmark.cc
+4
-2
mace/ops/global_avg_pooling_test.cc
mace/ops/global_avg_pooling_test.cc
+4
-0
mace/ops/image_to_buffer.cc
mace/ops/image_to_buffer.cc
+2
-0
mace/ops/image_to_buffer.h
mace/ops/image_to_buffer.h
+3
-0
mace/ops/matmul.cc
mace/ops/matmul.cc
+2
-0
mace/ops/matmul.h
mace/ops/matmul.h
+2
-0
mace/ops/matmul_benchmark.cc
mace/ops/matmul_benchmark.cc
+7
-0
mace/ops/matmul_test.cc
mace/ops/matmul_test.cc
+6
-0
mace/ops/ops_test_util.h
mace/ops/ops_test_util.h
+4
-0
mace/ops/pooling.cc
mace/ops/pooling.cc
+2
-0
mace/ops/pooling.h
mace/ops/pooling.h
+2
-0
mace/ops/pooling_benchmark.cc
mace/ops/pooling_benchmark.cc
+4
-2
mace/ops/pooling_test.cc
mace/ops/pooling_test.cc
+4
-0
mace/ops/reshape.cc
mace/ops/reshape.cc
+2
-0
mace/ops/reshape.h
mace/ops/reshape.h
+2
-0
mace/ops/reshape_test.cc
mace/ops/reshape_test.cc
+4
-0
mace/ops/resize_bilinear.cc
mace/ops/resize_bilinear.cc
+2
-0
mace/ops/resize_bilinear.h
mace/ops/resize_bilinear.h
+2
-0
mace/ops/resize_bilinear_benchmark.cc
mace/ops/resize_bilinear_benchmark.cc
+5
-0
mace/ops/resize_bilinear_test.cc
mace/ops/resize_bilinear_test.cc
+7
-4
mace/ops/slice.cc
mace/ops/slice.cc
+2
-0
mace/ops/slice.h
mace/ops/slice.h
+3
-0
mace/ops/slice_benchmark.cc
mace/ops/slice_benchmark.cc
+5
-1
mace/ops/slice_test.cc
mace/ops/slice_test.cc
+6
-2
mace/ops/softmax.cc
mace/ops/softmax.cc
+2
-0
mace/ops/softmax.h
mace/ops/softmax.h
+2
-0
mace/ops/softmax_benchmark.cc
mace/ops/softmax_benchmark.cc
+7
-0
mace/ops/softmax_test.cc
mace/ops/softmax_test.cc
+4
-0
mace/ops/space_to_batch.cc
mace/ops/space_to_batch.cc
+2
-0
mace/ops/space_to_batch.h
mace/ops/space_to_batch.h
+2
-0
mace/ops/space_to_batch_benchmark.cc
mace/ops/space_to_batch_benchmark.cc
+6
-0
mace/ops/space_to_batch_test.cc
mace/ops/space_to_batch_test.cc
+5
-0
mace/ops/winograd_convolution_test.cc
mace/ops/winograd_convolution_test.cc
+7
-7
mace/ops/winograd_inverse_transform.cc
mace/ops/winograd_inverse_transform.cc
+2
-0
mace/ops/winograd_inverse_transform.h
mace/ops/winograd_inverse_transform.h
+2
-0
mace/ops/winograd_transform.cc
mace/ops/winograd_transform.cc
+2
-0
mace/ops/winograd_transform.h
mace/ops/winograd_transform.h
+2
-0
mace/ops/winograd_transform_benchmark.cc
mace/ops/winograd_transform_benchmark.cc
+5
-0
tools/benchmark.sh
tools/benchmark.sh
+7
-1
tools/build_run_throughput_test.sh
tools/build_run_throughput_test.sh
+7
-1
tools/clear_env.sh
tools/clear_env.sh
+7
-1
tools/env.sh
tools/env.sh
+4
-0
tools/generate_production_code.sh
tools/generate_production_code.sh
+7
-1
tools/tuning_run.sh
tools/tuning_run.sh
+7
-1
tools/validate_tools.sh
tools/validate_tools.sh
+7
-1
tools/wino_conv.py
tools/wino_conv.py
+122
-54
未找到文件。
mace/core/operator.cc
浏览文件 @
2944863e
...
...
@@ -62,6 +62,8 @@ std::unique_ptr<OperatorBase> OperatorRegistry::CreateOperator(
}
}
namespace
ops
{
extern
void
Register_Activation
(
OperatorRegistry
*
op_registry
);
extern
void
Register_AddN
(
OperatorRegistry
*
op_registry
);
extern
void
Register_BatchNorm
(
OperatorRegistry
*
op_registry
);
...
...
@@ -88,32 +90,34 @@ extern void Register_Eltwise(OperatorRegistry *op_registry);
extern
void
Register_FullyConnected
(
OperatorRegistry
*
op_registry
);
extern
void
Register_Slice
(
OperatorRegistry
*
op_registry
);
}
// namespace ops
OperatorRegistry
::
OperatorRegistry
()
{
Register_Activation
(
this
);
Register_AddN
(
this
);
Register_BatchNorm
(
this
);
Register_BatchToSpaceND
(
this
);
Register_BiasAdd
(
this
);
Register_BufferToImage
(
this
);
Register_ChannelShuffle
(
this
);
Register_Concat
(
this
);
Register_Conv2D
(
this
);
Register_DepthwiseConv2d
(
this
);
Register_FoldedBatchNorm
(
this
);
Register_FusedConv2D
(
this
);
Register_GlobalAvgPooling
(
this
);
Register_ImageToBuffer
(
this
);
Register_Pooling
(
this
);
Register_ResizeBilinear
(
this
);
Register_Softmax
(
this
);
Register_SpaceToBatchND
(
this
);
Register_MatMul
(
this
);
Register_WinogradTransform
(
this
);
Register_WinogradInverseTransform
(
this
);
Register_Reshape
(
this
);
Register_Eltwise
(
this
);
Register_FullyConnected
(
this
);
Register_Slice
(
this
);
ops
::
Register_Activation
(
this
);
ops
::
Register_AddN
(
this
);
ops
::
Register_BatchNorm
(
this
);
ops
::
Register_BatchToSpaceND
(
this
);
ops
::
Register_BiasAdd
(
this
);
ops
::
Register_BufferToImage
(
this
);
ops
::
Register_ChannelShuffle
(
this
);
ops
::
Register_Concat
(
this
);
ops
::
Register_Conv2D
(
this
);
ops
::
Register_DepthwiseConv2d
(
this
);
ops
::
Register_FoldedBatchNorm
(
this
);
ops
::
Register_FusedConv2D
(
this
);
ops
::
Register_GlobalAvgPooling
(
this
);
ops
::
Register_ImageToBuffer
(
this
);
ops
::
Register_Pooling
(
this
);
ops
::
Register_ResizeBilinear
(
this
);
ops
::
Register_Softmax
(
this
);
ops
::
Register_SpaceToBatchND
(
this
);
ops
::
Register_MatMul
(
this
);
ops
::
Register_WinogradTransform
(
this
);
ops
::
Register_WinogradInverseTransform
(
this
);
ops
::
Register_Reshape
(
this
);
ops
::
Register_Eltwise
(
this
);
ops
::
Register_FullyConnected
(
this
);
ops
::
Register_Slice
(
this
);
}
}
// namespace mace
mace/kernels/activation.h
浏览文件 @
2944863e
...
...
@@ -152,6 +152,7 @@ class ActivationFunctor<DeviceType::OPENCL, T> {
T
relux_max_limit_
;
cl
::
Kernel
kernel_
;
std
::
string
tuning_key_prefix_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namespace kernels
...
...
mace/kernels/addn.h
浏览文件 @
2944863e
...
...
@@ -91,6 +91,7 @@ struct AddNFunctor<DeviceType::OPENCL, T> {
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namespace kernels
...
...
mace/kernels/batch_norm.h
浏览文件 @
2944863e
...
...
@@ -156,6 +156,7 @@ struct BatchNormFunctor<DeviceType::OPENCL, T> : BatchNormFunctorBase {
Tensor
*
output
,
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namepsace kernels
...
...
mace/kernels/bias_add.h
浏览文件 @
2944863e
...
...
@@ -62,6 +62,7 @@ struct BiasAddFunctor<DeviceType::OPENCL, T> {
Tensor
*
output
,
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namepsace kernels
...
...
mace/kernels/channel_shuffle.h
浏览文件 @
2944863e
...
...
@@ -55,6 +55,7 @@ struct ChannelShuffleFunctor<DeviceType::OPENCL, T> {
cl
::
Kernel
kernel_
;
const
int
groups_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namespace kernels
...
...
mace/kernels/concat.h
浏览文件 @
2944863e
...
...
@@ -83,6 +83,7 @@ struct ConcatFunctor<DeviceType::OPENCL, T> : ConcatFunctorBase {
Tensor
*
output
,
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namepsace kernels
...
...
mace/kernels/conv_2d.h
浏览文件 @
2944863e
...
...
@@ -401,6 +401,7 @@ struct Conv2dFunctor<DeviceType::OPENCL, T> : Conv2dFunctorBase {
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namespace kernels
...
...
mace/kernels/depthwise_conv2d.h
浏览文件 @
2944863e
...
...
@@ -439,6 +439,7 @@ struct DepthwiseConv2dFunctor<DeviceType::OPENCL, T>
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namespace kernels
...
...
mace/kernels/eltwise.h
浏览文件 @
2944863e
...
...
@@ -94,6 +94,7 @@ struct EltwiseFunctor<DeviceType::OPENCL, T> : EltwiseFunctorBase {
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namespace kernels
...
...
mace/kernels/fully_connected.h
浏览文件 @
2944863e
...
...
@@ -90,6 +90,7 @@ struct FullyConnectedFunctor<DeviceType::OPENCL, T> : FullyConnectedBase {
cl
::
Kernel
kernel_
;
std
::
vector
<
uint32_t
>
gws_
;
std
::
vector
<
uint32_t
>
lws_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namespace kernels
...
...
mace/kernels/opencl/activation_opencl.cc
浏览文件 @
2944863e
...
...
@@ -58,6 +58,9 @@ void ActivationFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
LOG
(
FATAL
)
<<
"Unknown activation type: "
<<
activation_
;
}
kernel_
=
runtime
->
BuildKernel
(
"activation"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
int
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
if
(
activation_
==
PRELU
)
{
...
...
@@ -66,6 +69,8 @@ void ActivationFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
}
kernel_
.
setArg
(
idx
++
,
static_cast
<
float
>
(
relux_max_limit_
));
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
input_shape_
=
input
->
shape
();
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
...
...
mace/kernels/opencl/addn.cc
浏览文件 @
2944863e
...
...
@@ -32,15 +32,6 @@ void AddNFunctor<DeviceType::OPENCL, T>::operator()(
MACE_CHECK
(
channels
==
input_tensors
[
i
]
->
dim
(
3
));
}
std
::
vector
<
index_t
>
output_shape
=
input_tensors
[
0
]
->
shape
();
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
output_image_shape
);
output_tensor
->
ResizeImage
(
output_shape
,
output_image_shape
);
const
index_t
channel_blocks
=
RoundUpDiv4
(
channels
);
const
index_t
width_pixels
=
channel_blocks
*
width
;
const
index_t
batch_height_pixels
=
batch
*
height
;
if
(
kernel_
.
get
()
==
nullptr
)
{
if
(
input_tensors
.
size
()
>
4
)
{
MACE_NOT_IMPLEMENTED
;
...
...
@@ -55,11 +46,26 @@ void AddNFunctor<DeviceType::OPENCL, T>::operator()(
built_options
.
emplace
(
MakeString
(
"-DINPUT_NUM="
,
input_tensors
.
size
()));
kernel_
=
runtime
->
BuildKernel
(
"addn"
,
kernel_name
,
built_options
);
}
std
::
vector
<
index_t
>
output_shape
=
input_tensors
[
0
]
->
shape
();
const
index_t
channel_blocks
=
RoundUpDiv4
(
channels
);
const
index_t
width_pixels
=
channel_blocks
*
width
;
const
index_t
batch_height_pixels
=
batch
*
height
;
if
(
!
IsVecEqual
(
input_shape_
,
input_tensors
[
0
]
->
shape
()))
{
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
output_image_shape
);
output_tensor
->
ResizeImage
(
output_shape
,
output_image_shape
);
uint32_t
idx
=
0
;
for
(
auto
input
:
input_tensors
)
{
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
}
kernel_
.
setArg
(
idx
++
,
*
(
output_tensor
->
opencl_image
()));
input_shape_
=
input_tensors
[
0
]
->
shape
();
}
const
uint32_t
gws
[
2
]
=
{
static_cast
<
uint32_t
>
(
width_pixels
),
...
...
mace/kernels/opencl/batch_norm_opencl.cc
浏览文件 @
2944863e
...
...
@@ -61,7 +61,8 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
}
kernel_
=
runtime
->
BuildKernel
(
"batch_norm"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
scale
->
opencl_image
()));
...
...
@@ -73,6 +74,8 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
}
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
relux_max_limit_
);
input_shape_
=
input
->
shape
();
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
...
...
mace/kernels/opencl/bias_add_opencl.cc
浏览文件 @
2944863e
...
...
@@ -33,10 +33,13 @@ void BiasAddFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
kernel_
=
runtime
->
BuildKernel
(
"bias_add"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
bias
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
input_shape_
=
input
->
shape
();
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
...
...
mace/kernels/opencl/channel_shuffle.cc
浏览文件 @
2944863e
...
...
@@ -13,9 +13,10 @@ namespace mace {
namespace
kernels
{
template
<
typename
T
>
void
ChannelShuffleFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
)
{
void
ChannelShuffleFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
)
{
output
->
ResizeLike
(
input
);
const
index_t
batch
=
input
->
dim
(
0
);
...
...
@@ -39,12 +40,15 @@ void ChannelShuffleFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *inpu
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
kernel_
=
runtime
->
BuildKernel
(
"channel_shuffle"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
groups_
);
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
channels_per_group
));
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
input_shape_
=
input
->
shape
();
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
group_channel_blocks
),
static_cast
<
uint32_t
>
(
width
),
...
...
mace/kernels/opencl/concat.cc
浏览文件 @
2944863e
...
...
@@ -15,6 +15,7 @@ static void Concat2(cl::Kernel *kernel,
const
Tensor
*
input0
,
const
Tensor
*
input1
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
StatsFuture
*
future
)
{
const
index_t
batch
=
output
->
dim
(
0
);
...
...
@@ -41,6 +42,8 @@ static void Concat2(cl::Kernel *kernel,
}
*
kernel
=
runtime
->
BuildKernel
(
"concat"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input0
->
shape
()))
{
uint32_t
idx
=
0
;
kernel
->
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
input0
->
opencl_image
())));
...
...
@@ -49,6 +52,7 @@ static void Concat2(cl::Kernel *kernel,
kernel
->
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input0
->
dim
(
3
)));
kernel
->
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
output
->
opencl_image
())));
*
prev_input_shape
=
input0
->
shape
();
}
const
uint32_t
gws
[
3
]
=
{
...
...
@@ -142,7 +146,7 @@ void ConcatFunctor<DeviceType::OPENCL, T>::operator()(
switch
(
inputs_count
)
{
case
2
:
Concat2
(
&
kernel_
,
input_list
[
0
],
input_list
[
1
],
DataTypeToEnum
<
T
>::
value
,
output
,
future
);
&
input_shape_
,
output
,
future
);
break
;
default:
if
(
divisible_four
)
{
...
...
mace/kernels/opencl/conv_2d_opencl.cc
浏览文件 @
2944863e
...
...
@@ -18,6 +18,7 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
StatsFuture
*
future
);
...
...
@@ -31,6 +32,7 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
StatsFuture
*
future
);
...
...
@@ -44,6 +46,7 @@ extern void Conv2dOpencl(cl::Kernel *kernel,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
StatsFuture
*
future
);
...
...
@@ -57,8 +60,8 @@ void Conv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
cl
::
Kernel
*
kernel
,
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
stride
,
const
int
*
padding
,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
DataType
dt
,
Tensor
*
output
,
StatsFuture
*
future
);
const
float
relux_max_limit
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
input_shape
,
Tensor
*
output
,
StatsFuture
*
future
);
// Selection matrix: kernel_size x stride_size
static
const
Conv2dOpenclFunction
selector
[
5
]
=
{
Conv2dOpenclK1x1
,
nullptr
,
Conv2dOpenclK3x3
,
nullptr
,
nullptr
};
...
...
@@ -97,11 +100,11 @@ void Conv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
auto
conv2d_func
=
selector
[
kernel_h
-
1
];
conv2d_func
(
&
kernel_
,
input
,
filter
,
bias
,
strides_
[
0
],
paddings
.
data
(),
dilations_
,
activation_
,
relux_max_limit_
,
DataTypeToEnum
<
T
>::
value
,
output
,
future
);
DataTypeToEnum
<
T
>::
value
,
&
input_shape_
,
output
,
future
);
}
else
{
Conv2dOpencl
(
&
kernel_
,
input
,
filter
,
bias
,
strides_
[
0
],
paddings
.
data
(),
dilations_
,
activation_
,
relux_max_limit_
,
DataTypeToEnum
<
T
>::
value
,
output
,
future
);
DataTypeToEnum
<
T
>::
value
,
&
input_shape_
,
output
,
future
);
}
}
...
...
mace/kernels/opencl/conv_2d_opencl_1x1.cc
浏览文件 @
2944863e
...
...
@@ -20,6 +20,7 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
StatsFuture
*
future
)
{
const
index_t
batch
=
output
->
dim
(
0
);
...
...
@@ -68,6 +69,8 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
auto
runtime
=
OpenCLRuntime
::
Global
();
*
kernel
=
runtime
->
BuildKernel
(
"conv_2d_1x1"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
kernel
->
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
filter
->
opencl_image
()));
...
...
@@ -83,6 +86,8 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
height
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
width
));
kernel
->
setArg
(
idx
++
,
stride
);
*
prev_input_shape
=
input
->
shape
();
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
...
...
mace/kernels/opencl/conv_2d_opencl_3x3.cc
浏览文件 @
2944863e
...
...
@@ -22,6 +22,7 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
StatsFuture
*
future
)
{
const
index_t
batch
=
output
->
dim
(
0
);
...
...
@@ -62,7 +63,8 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel,
auto
runtime
=
OpenCLRuntime
::
Global
();
*
kernel
=
runtime
->
BuildKernel
(
"conv_2d_3x3"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
kernel
->
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
filter
->
opencl_image
()));
...
...
@@ -81,6 +83,8 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel,
kernel
->
setArg
(
idx
++
,
padding
[
1
]
/
2
);
kernel
->
setArg
(
idx
++
,
dilations
[
0
]);
kernel
->
setArg
(
idx
++
,
dilations
[
1
]);
*
prev_input_shape
=
input
->
shape
();
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
...
...
mace/kernels/opencl/conv_2d_opencl_general.cc
浏览文件 @
2944863e
...
...
@@ -22,6 +22,7 @@ extern void Conv2dOpencl(cl::Kernel *kernel,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
StatsFuture
*
future
)
{
const
index_t
batch
=
output
->
dim
(
0
);
...
...
@@ -62,7 +63,8 @@ extern void Conv2dOpencl(cl::Kernel *kernel,
auto
runtime
=
OpenCLRuntime
::
Global
();
*
kernel
=
runtime
->
BuildKernel
(
"conv_2d"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
kernel
->
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
filter
->
opencl_image
()));
...
...
@@ -83,6 +85,8 @@ extern void Conv2dOpencl(cl::Kernel *kernel,
kernel
->
setArg
(
idx
++
,
padding
[
1
]
/
2
);
kernel
->
setArg
(
idx
++
,
dilations
[
0
]);
kernel
->
setArg
(
idx
++
,
dilations
[
1
]);
*
prev_input_shape
=
input
->
shape
();
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
...
...
mace/kernels/opencl/depthwise_conv_opencl.cc
浏览文件 @
2944863e
...
...
@@ -21,6 +21,7 @@ void DepthwiseConv2d(cl::Kernel *kernel,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
StatsFuture
*
future
)
{
const
index_t
batch
=
output
->
dim
(
0
);
...
...
@@ -35,17 +36,6 @@ void DepthwiseConv2d(cl::Kernel *kernel,
const
index_t
input_channel_blocks
=
RoundUpDiv4
(
input_channels
);
const
index_t
width_blocks
=
RoundUpDiv4
(
width
);
if
(
kernel
->
get
()
==
nullptr
)
{
const
index_t
input_batch
=
input
->
dim
(
0
);
const
index_t
input_height
=
input
->
dim
(
1
);
const
index_t
input_width
=
input
->
dim
(
2
);
const
index_t
filter_height
=
filter
->
dim
(
0
);
const
index_t
filter_width
=
filter
->
dim
(
1
);
MACE_CHECK
(
multiplier
==
1
,
"Multiplier > 1 not supported"
);
MACE_CHECK
(
multiplier
*
input_channels
==
channels
);
MACE_CHECK
(
filter
->
dim
(
2
)
==
input_channels
,
filter
->
dim
(
2
),
"!="
,
input_channels
);
auto
runtime
=
OpenCLRuntime
::
Global
();
std
::
set
<
std
::
string
>
built_options
;
std
::
string
kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
"depthwise_conv2d"
);
...
...
@@ -80,6 +70,18 @@ void DepthwiseConv2d(cl::Kernel *kernel,
*
kernel
=
runtime
->
BuildKernel
(
"depthwise_conv2d"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input
->
shape
()))
{
const
index_t
input_batch
=
input
->
dim
(
0
);
const
index_t
input_height
=
input
->
dim
(
1
);
const
index_t
input_width
=
input
->
dim
(
2
);
const
index_t
filter_height
=
filter
->
dim
(
0
);
const
index_t
filter_width
=
filter
->
dim
(
1
);
MACE_CHECK
(
multiplier
==
1
,
"Multiplier > 1 not supported"
);
MACE_CHECK
(
multiplier
*
input_channels
==
channels
);
MACE_CHECK
(
filter
->
dim
(
2
)
==
input_channels
,
filter
->
dim
(
2
),
"!="
,
input_channels
);
uint32_t
idx
=
0
;
kernel
->
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
...
...
@@ -102,6 +104,7 @@ void DepthwiseConv2d(cl::Kernel *kernel,
kernel
->
setArg
(
idx
++
,
static_cast
<
short
>
(
dilations
[
0
]));
kernel
->
setArg
(
idx
++
,
static_cast
<
short
>
(
dilations
[
1
]));
}
*
prev_input_shape
=
input
->
shape
();
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
...
...
@@ -120,9 +123,7 @@ void DepthwiseConv2dFunctor<DeviceType::OPENCL, T>::operator()(
const
Tensor
*
bias
,
Tensor
*
output
,
StatsFuture
*
future
)
{
typedef
void
(
*
Conv2dOpenclFunction
)(
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
Tensor
*
output
,
StatsFuture
*
future
);
index_t
kernel_h
=
filter
->
dim
(
2
);
index_t
kernel_w
=
filter
->
dim
(
3
);
if
(
strides_
[
0
]
!=
strides_
[
1
])
{
...
...
@@ -163,7 +164,7 @@ void DepthwiseConv2dFunctor<DeviceType::OPENCL, T>::operator()(
DepthwiseConv2d
(
&
kernel_
,
input
,
filter
,
bias
,
strides_
[
0
],
paddings
.
data
(),
dilations_
,
activation_
,
relux_max_limit_
,
DataTypeToEnum
<
T
>::
value
,
output
,
future
);
DataTypeToEnum
<
T
>::
value
,
&
input_shape_
,
output
,
future
);
}
template
struct
DepthwiseConv2dFunctor
<
DeviceType
::
OPENCL
,
float
>;
...
...
mace/kernels/opencl/eltwise_opencl.cc
浏览文件 @
2944863e
...
...
@@ -36,6 +36,8 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
if
(
!
coeff_
.
empty
())
built_options
.
emplace
(
"-DCOEFF_SUM"
);
kernel_
=
runtime
->
BuildKernel
(
"eltwise"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
input_shape_
,
input0
->
shape
()))
{
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
input0
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
input1
->
opencl_image
()));
...
...
@@ -44,6 +46,7 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
kernel_
.
setArg
(
idx
++
,
coeff_
[
1
]);
}
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
input_shape_
=
input0
->
shape
();
}
const
uint32_t
gws
[
2
]
=
{
static_cast
<
uint32_t
>
(
width_pixels
),
...
...
mace/kernels/opencl/fully_connected_opencl.cc
浏览文件 @
2944863e
...
...
@@ -13,6 +13,7 @@ void FCWXKernel(cl::Kernel *kernel,
const
Tensor
*
input
,
const
Tensor
*
weight
,
const
Tensor
*
bias
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
const
ActivationType
activation
,
std
::
vector
<
uint32_t
>
&
gws
,
...
...
@@ -67,6 +68,11 @@ void FCWXKernel(cl::Kernel *kernel,
const
uint32_t
inter_local_blks
=
kwg_size
/
(
gws
[
0
]
*
gws
[
1
]);
lws
=
{
gws
[
0
],
gws
[
1
],
inter_local_blks
};
}
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input
->
shape
()))
{
const
index_t
batch
=
output
->
dim
(
0
);
const
index_t
output_blocks
=
RoundUpDiv4
(
output
->
dim
(
3
));
uint32_t
idx
=
0
;
kernel
->
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
weight
->
opencl_image
()));
...
...
@@ -80,6 +86,10 @@ void FCWXKernel(cl::Kernel *kernel,
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
RoundUpDiv4
(
input
->
dim
(
3
))));
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
output_blocks
));
kernel
->
setArg
(
idx
++
,
relux_max_limit
);
gws
[
2
]
=
static_cast
<
uint32_t
>
(
batch
*
output_blocks
);
*
prev_input_shape
=
input
->
shape
();
}
cl
::
Event
event
;
cl_int
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
...
...
@@ -103,6 +113,7 @@ void FCWTXKernel(cl::Kernel *kernel,
const
Tensor
*
input
,
const
Tensor
*
weight
,
const
Tensor
*
bias
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
const
ActivationType
activation
,
std
::
vector
<
uint32_t
>
&
gws
,
...
...
@@ -141,6 +152,9 @@ void FCWTXKernel(cl::Kernel *kernel,
*
kernel
=
runtime
->
BuildKernel
(
"fully_connected"
,
kernel_name
,
built_options
);
lws
=
{
16
,
64
,
1
};
}
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
kernel
->
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
weight
->
opencl_image
()));
...
...
@@ -155,14 +169,13 @@ void FCWTXKernel(cl::Kernel *kernel,
kernel
->
setArg
(
idx
++
,
relux_max_limit
);
const
index_t
batch
=
output
->
dim
(
0
);
const
index_t
output_size
=
output
->
dim
(
3
);
const
index_t
output_blocks
=
RoundUpDiv4
(
output_size
);
const
index_t
output_blocks
=
RoundUpDiv4
(
output
->
dim
(
3
));
gws
=
{
static_cast
<
uint32_t
>
(
batch
),
static_cast
<
uint32_t
>
(
output_blocks
),
};
lws
=
{
16
,
64
,
1
};
*
prev_input_shape
=
input
->
shape
();
}
std
::
stringstream
ss
;
...
...
@@ -185,11 +198,11 @@ void FullyConnectedFunctor<DeviceType::OPENCL, T>::operator()(
output
->
ResizeImage
(
output_shape
,
output_image_shape
);
if
(
weight_type_
==
BufferType
::
WEIGHT_HEIGHT
)
{
FCWTXKernel
<
T
>
(
&
kernel_
,
input
,
weight
,
bias
,
output
,
FCWTXKernel
<
T
>
(
&
kernel_
,
input
,
weight
,
bias
,
&
input_shape_
,
output
,
activation_
,
gws_
,
lws_
,
relux_max_limit_
,
future
);
}
else
{
FCWXKernel
<
T
>
(
&
kernel_
,
input
,
weight
,
bias
,
output
,
activation_
,
gws_
,
lws_
,
relux_max_limit_
,
future
);
FCWXKernel
<
T
>
(
&
kernel_
,
input
,
weight
,
bias
,
&
input_shape_
,
output
,
activation_
,
gws_
,
lws_
,
relux_max_limit_
,
future
);
}
};
...
...
mace/kernels/opencl/helper.h
浏览文件 @
2944863e
...
...
@@ -71,6 +71,13 @@ inline bool LimitKernelTime() {
return
flag
!=
nullptr
&&
strlen
(
flag
)
==
1
&&
flag
[
0
]
==
'1'
;
}
template
<
typename
T
>
bool
IsVecEqual
(
const
std
::
vector
<
T
>
&
input0
,
const
std
::
vector
<
T
>
&
input1
)
{
return
((
input0
.
size
()
==
input1
.
size
())
&&
(
std
::
equal
(
input0
.
begin
(),
input0
.
end
(),
input1
.
begin
())));
}
namespace
{
template
<
typename
T
>
void
AppendToStream
(
std
::
stringstream
*
ss
,
const
std
::
string
&
delimiter
,
T
v
)
{
...
...
mace/kernels/opencl/matmul.cc
浏览文件 @
2944863e
...
...
@@ -36,17 +36,16 @@ void MatMulFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *A,
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
kernel_
=
runtime
->
BuildKernel
(
"matmul"
,
kernel_name
,
built_options
);
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
A
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
B
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
C
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
height
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
width
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
A
->
dim
(
2
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
height_blocks
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
RoundUpDiv4
(
A
->
dim
(
2
))));
}
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
A
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
B
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
C
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
height
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
width
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
A
->
dim
(
2
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
height_blocks
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
RoundUpDiv4
(
A
->
dim
(
2
))));
const
uint32_t
gws
[
2
]
=
{
static_cast
<
uint32_t
>
(
width_blocks
),
...
...
mace/kernels/opencl/pooling_opencl.cc
浏览文件 @
2944863e
...
...
@@ -17,31 +17,6 @@ void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
StatsFuture
*
future
)
{
MACE_CHECK
(
dilations_
[
0
]
==
1
&&
dilations_
[
1
]
==
1
)
<<
"Pooling opencl kernel not support dilation yet"
;
std
::
vector
<
index_t
>
output_shape
(
4
);
std
::
vector
<
index_t
>
filter_shape
=
{
kernels_
[
0
],
kernels_
[
1
],
input
->
dim
(
3
),
input
->
dim
(
3
)};
std
::
vector
<
int
>
paddings
(
2
);
if
(
paddings_
.
empty
())
{
kernels
::
CalcNHWCPaddingAndOutputSize
(
input
->
shape
().
data
(),
filter_shape
.
data
(),
dilations_
,
strides_
,
padding_type_
,
output_shape
.
data
(),
paddings
.
data
());
}
else
{
paddings
=
paddings_
;
CalcOutputSize
(
input
->
shape
().
data
(),
filter_shape
.
data
(),
paddings_
.
data
(),
dilations_
,
strides_
,
RoundType
::
CEIL
,
output_shape
.
data
());
}
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
output_image_shape
);
output
->
ResizeImage
(
output_shape
,
output_image_shape
);
index_t
batch
=
output
->
dim
(
0
);
index_t
out_height
=
output
->
dim
(
1
);
index_t
out_width
=
output
->
dim
(
2
);
index_t
channels
=
output
->
dim
(
3
);
index_t
channel_blocks
=
(
channels
+
3
)
/
4
;
if
(
kernel_
.
get
()
==
nullptr
)
{
const
DataType
dt
=
DataTypeToEnum
<
T
>::
value
;
...
...
@@ -62,18 +37,49 @@ void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
}
kernel_
=
runtime
->
BuildKernel
(
"pooling"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
std
::
vector
<
index_t
>
output_shape
(
4
);
std
::
vector
<
index_t
>
filter_shape
=
{
kernels_
[
0
],
kernels_
[
1
],
input
->
dim
(
3
),
input
->
dim
(
3
)};
std
::
vector
<
int
>
paddings
(
2
);
if
(
paddings_
.
empty
())
{
kernels
::
CalcNHWCPaddingAndOutputSize
(
input
->
shape
().
data
(),
filter_shape
.
data
(),
dilations_
,
strides_
,
padding_type_
,
output_shape
.
data
(),
paddings
.
data
());
}
else
{
paddings
=
paddings_
;
CalcOutputSize
(
input
->
shape
().
data
(),
filter_shape
.
data
(),
paddings_
.
data
(),
dilations_
,
strides_
,
RoundType
::
CEIL
,
output_shape
.
data
());
}
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
output_image_shape
);
output
->
ResizeImage
(
output_shape
,
output_image_shape
);
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input
->
dim
(
1
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input
->
dim
(
2
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
out
_height
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
out
put
->
dim
(
1
)
));
kernel_
.
setArg
(
idx
++
,
paddings
[
0
]
/
2
);
kernel_
.
setArg
(
idx
++
,
paddings
[
1
]
/
2
);
kernel_
.
setArg
(
idx
++
,
strides_
[
0
]);
kernel_
.
setArg
(
idx
++
,
kernels_
[
0
]);
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
input_shape_
=
input
->
shape
();
}
index_t
batch
=
output
->
dim
(
0
);
index_t
out_height
=
output
->
dim
(
1
);
index_t
out_width
=
output
->
dim
(
2
);
index_t
channels
=
output
->
dim
(
3
);
index_t
channel_blocks
=
(
channels
+
3
)
/
4
;
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
static_cast
<
uint32_t
>
(
out_width
),
static_cast
<
uint32_t
>
(
batch
*
out_height
),
...
...
mace/kernels/opencl/resize_bilinear_opencl.cc
浏览文件 @
2944863e
...
...
@@ -25,6 +25,18 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
const
index_t
out_width
=
out_width_
;
if
(
kernel_
.
get
()
==
nullptr
)
{
auto
runtime
=
OpenCLRuntime
::
Global
();
std
::
set
<
std
::
string
>
built_options
;
std
::
string
kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
"resize_bilinear_nocache"
);
built_options
.
emplace
(
"-Dresize_bilinear_nocache="
+
kernel_name
);
auto
dt
=
DataTypeToEnum
<
T
>::
value
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
kernel_
=
runtime
->
BuildKernel
(
"resize_bilinear"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
MACE_CHECK
(
out_height
>
0
&&
out_width
>
0
);
std
::
vector
<
index_t
>
output_shape
{
batch
,
out_height
,
out_width
,
channels
};
...
...
@@ -38,16 +50,6 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
float
width_scale
=
CalculateResizeScale
(
in_width
,
out_width
,
align_corners_
);
auto
runtime
=
OpenCLRuntime
::
Global
();
std
::
set
<
std
::
string
>
built_options
;
std
::
string
kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
"resize_bilinear_nocache"
);
built_options
.
emplace
(
"-Dresize_bilinear_nocache="
+
kernel_name
);
auto
dt
=
DataTypeToEnum
<
T
>::
value
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
kernel_
=
runtime
->
BuildKernel
(
"resize_bilinear"
,
kernel_name
,
built_options
);
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
...
...
@@ -56,6 +58,9 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
in_height
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
in_width
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
out_height
));
input_shape_
=
input
->
shape
();
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
...
...
mace/kernels/opencl/softmax_opencl.cc
浏览文件 @
2944863e
...
...
@@ -34,11 +34,14 @@ void SoftmaxFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *logits,
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
kernel_
=
runtime
->
BuildKernel
(
"softmax"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
input_shape_
,
logits
->
shape
()))
{
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
logits
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
channels
));
kernel_
.
setArg
(
idx
++
,
remain_channels
);
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
input_shape_
=
logits
->
shape
();
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
static_cast
<
uint32_t
>
(
width
),
...
...
mace/kernels/opencl/space_to_batch_opencl.cc
浏览文件 @
2944863e
...
...
@@ -43,6 +43,8 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, T>::operator()(
kernel_
=
runtime
->
BuildKernel
(
"space_to_batch"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
space_shape_
,
space_tensor
->
shape
()))
{
uint32_t
idx
=
0
;
if
(
b2s_
)
{
kernel_
.
setArg
(
idx
++
,
*
(
batch_tensor
->
opencl_image
()));
...
...
@@ -59,6 +61,8 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, T>::operator()(
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
space_tensor
->
dim
(
2
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
batch_tensor
->
dim
(
1
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
batch_tensor
->
dim
(
2
)));
space_shape_
=
space_tensor
->
shape
();
}
const
uint32_t
chan_blk
=
RoundUpDiv4
<
uint32_t
>
(
batch_tensor
->
dim
(
3
));
...
...
mace/kernels/opencl/winograd_transform.cc
浏览文件 @
2944863e
...
...
@@ -14,6 +14,21 @@ namespace kernels {
template
<
typename
T
>
void
WinogradTransformFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
input_tensor
,
Tensor
*
output_tensor
,
StatsFuture
*
future
)
{
if
(
kernel_
.
get
()
==
nullptr
)
{
std
::
string
obfuscated_kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
"winograd_transform_2x2"
);
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
"-Dwinograd_transform_2x2="
+
obfuscated_kernel_name
);
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
DataTypeToEnum
<
T
>::
value
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
DataTypeToEnum
<
T
>::
value
));
auto
runtime
=
OpenCLRuntime
::
Global
();
kernel_
=
runtime
->
BuildKernel
(
"winograd_transform"
,
obfuscated_kernel_name
,
built_options
);
}
std
::
vector
<
index_t
>
output_shape
(
4
);
std
::
vector
<
index_t
>
filter_shape
=
{
3
,
3
,
input_tensor
->
dim
(
3
),
1
};
std
::
vector
<
int
>
paddings
(
2
);
...
...
@@ -27,29 +42,16 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(
paddings_
.
data
(),
dilations_
.
data
(),
strides_
.
data
(),
RoundType
::
FLOOR
,
output_shape
.
data
());
}
const
index_t
round_h
=
(
output_shape
[
1
]
+
1
)
/
2
;
const
index_t
round_w
=
(
output_shape
[
2
]
+
1
)
/
2
;
const
index_t
out_width
=
input_tensor
->
dim
(
0
)
*
round_h
*
round_w
;
if
(
kernel_
.
get
()
==
nullptr
)
{
if
(
!
IsVecEqual
(
input_shape_
,
input_tensor
->
shape
())
)
{
output_shape
=
{
16
,
input_tensor
->
dim
(
3
),
out_width
,
1
};
std
::
vector
<
size_t
>
image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_HEIGHT
,
image_shape
);
output_tensor
->
ResizeImage
(
output_shape
,
image_shape
);
std
::
string
obfuscated_kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
"winograd_transform_2x2"
);
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
"-Dwinograd_transform_2x2="
+
obfuscated_kernel_name
);
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
DataTypeToEnum
<
T
>::
value
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
DataTypeToEnum
<
T
>::
value
));
auto
runtime
=
OpenCLRuntime
::
Global
();
kernel_
=
runtime
->
BuildKernel
(
"winograd_transform"
,
obfuscated_kernel_name
,
built_options
);
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
input_tensor
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
output_tensor
->
opencl_image
()));
...
...
@@ -60,6 +62,8 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
round_w
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
paddings
[
0
]
/
2
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
paddings
[
1
]
/
2
));
input_shape_
=
input_tensor
->
shape
();
}
const
uint32_t
gws
[
2
]
=
{
...
...
@@ -79,11 +83,6 @@ void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(
const
Tensor
*
bias
,
Tensor
*
output_tensor
,
StatsFuture
*
future
)
{
std
::
vector
<
index_t
>
output_shape
=
{
batch_
,
height_
,
width_
,
input_tensor
->
dim
(
1
)};
std
::
vector
<
size_t
>
image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
image_shape
);
output_tensor
->
ResizeImage
(
output_shape
,
image_shape
);
if
(
kernel_
.
get
()
==
nullptr
)
{
std
::
string
obfuscated_kernel_name
=
...
...
@@ -121,6 +120,13 @@ void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(
auto
runtime
=
OpenCLRuntime
::
Global
();
kernel_
=
runtime
->
BuildKernel
(
"winograd_transform"
,
obfuscated_kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
input_shape_
,
input_tensor
->
shape
()))
{
std
::
vector
<
index_t
>
output_shape
=
{
batch_
,
height_
,
width_
,
input_tensor
->
dim
(
1
)};
std
::
vector
<
size_t
>
image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
image_shape
);
output_tensor
->
ResizeImage
(
output_shape
,
image_shape
);
const
uint32_t
round_h
=
(
height_
+
1
)
/
2
;
const
uint32_t
round_w
=
(
width_
+
1
)
/
2
;
...
...
@@ -139,6 +145,8 @@ void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
round_h
*
round_w
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
round_w
));
kernel_
.
setArg
(
idx
++
,
relux_max_limit_
);
input_shape_
=
input_tensor
->
shape
();
}
const
uint32_t
gws
[
2
]
=
{
...
...
mace/kernels/pooling.h
浏览文件 @
2944863e
...
...
@@ -182,6 +182,7 @@ struct PoolingFunctor<DeviceType::OPENCL, T> : PoolingFunctorBase {
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namespace kernels
...
...
mace/kernels/resize_bilinear.h
浏览文件 @
2944863e
...
...
@@ -172,6 +172,7 @@ struct ResizeBilinearFunctor<DeviceType::OPENCL, T>
void
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namespace kernels
...
...
mace/kernels/softmax.h
浏览文件 @
2944863e
...
...
@@ -57,6 +57,7 @@ struct SoftmaxFunctor<DeviceType::OPENCL, T> {
void
operator
()(
const
Tensor
*
logits
,
Tensor
*
output
,
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namepsace kernels
...
...
mace/kernels/space_to_batch.h
浏览文件 @
2944863e
...
...
@@ -54,6 +54,7 @@ struct SpaceToBatchFunctor<DeviceType::OPENCL, T> : SpaceToBatchFunctorBase {
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
space_shape_
;
};
}
// namespace kernels
...
...
mace/kernels/winograd_transform.h
浏览文件 @
2944863e
...
...
@@ -49,6 +49,7 @@ struct WinogradTransformFunctor<DeviceType::OPENCL, T>
void
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
input_shape_
;
};
struct
WinogradInverseTransformFunctorBase
{
...
...
@@ -105,6 +106,7 @@ struct WinogradInverseTransformFunctor<DeviceType::OPENCL, T>
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namespace kernels
...
...
mace/ops/activation.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,7 @@
#include "mace/ops/activation.h"
namespace
mace
{
namespace
ops
{
void
Register_Activation
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Activation"
)
...
...
@@ -26,4 +27,5 @@ void Register_Activation(OperatorRegistry *op_registry) {
ActivationOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace ops
}
// namespace mace
mace/ops/activation.h
浏览文件 @
2944863e
...
...
@@ -5,10 +5,13 @@
#ifndef MACE_OPS_ACTIVATION_H_
#define MACE_OPS_ACTIVATION_H_
#include <string>
#include "mace/core/operator.h"
#include "mace/kernels/activation.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
class
T
>
class
ActivationOp
:
public
Operator
<
D
,
T
>
{
...
...
@@ -36,6 +39,7 @@ class ActivationOp : public Operator<D, T> {
kernels
::
ActivationFunctor
<
D
,
T
>
functor_
;
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_ACTIVATION_H_
mace/ops/activation_benchmark.cc
浏览文件 @
2944863e
...
...
@@ -3,11 +3,15 @@
//
#include <string>
#include "mace/core/operator.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
template
<
DeviceType
D
,
typename
T
>
static
void
ReluBenchmark
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
)
{
...
...
@@ -316,4 +320,6 @@ BM_SIGMOID(1, 3, 512, 512);
BM_SIGMOID
(
1
,
32
,
112
,
112
);
BM_SIGMOID
(
1
,
64
,
256
,
256
);
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/activation_test.cc
浏览文件 @
2944863e
...
...
@@ -6,6 +6,8 @@
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
class
ActivationOpTest
:
public
OpsTestBase
{};
...
...
@@ -365,4 +367,6 @@ TEST_F(ActivationOpTest, OPENCLSimpleSigmoid) {
TestSimpleSigmoid
<
DeviceType
::
OPENCL
>
();
}
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/addn.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,7 @@
#include "mace/ops/addn.h"
namespace
mace
{
namespace
ops
{
void
Register_AddN
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"AddN"
)
...
...
@@ -26,4 +27,5 @@ void Register_AddN(OperatorRegistry *op_registry) {
AddNOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace ops
}
// namespace mace
mace/ops/addn.h
浏览文件 @
2944863e
...
...
@@ -11,6 +11,7 @@
#include "mace/kernels/addn.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
class
T
>
class
AddNOp
:
public
Operator
<
D
,
T
>
{
...
...
@@ -40,6 +41,7 @@ class AddNOp : public Operator<D, T> {
kernels
::
AddNFunctor
<
D
,
T
>
functor_
;
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_ADDN_H_
mace/ops/addn_benchmark.cc
浏览文件 @
2944863e
...
...
@@ -3,11 +3,15 @@
//
#include <string>
#include "mace/core/operator.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
template
<
DeviceType
D
,
typename
T
>
static
void
AddNBenchmark
(
int
iters
,
int
inputs
,
int
n
,
int
h
,
int
w
,
int
c
)
{
mace
::
testing
::
StopTiming
();
...
...
@@ -75,4 +79,6 @@ BM_ADDN(4, 1, 128, 128, 3);
BM_ADDN
(
2
,
1
,
256
,
256
,
3
);
BM_ADDN
(
2
,
1
,
512
,
512
,
3
);
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/addn_test.cc
浏览文件 @
2944863e
...
...
@@ -6,6 +6,8 @@
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
class
AddnOpTest
:
public
OpsTestBase
{};
...
...
@@ -62,15 +64,15 @@ TEST_F(AddnOpTest, CPUSimpleAdd3) { SimpleAdd3<DeviceType::CPU>(); }
template
<
DeviceType
D
>
void
RandomTest
()
{
testing
::
internal
::
LogToStderr
();
s
rand
(
time
(
NULL
)
);
s
tatic
unsigned
int
seed
=
time
(
NULL
);
for
(
int
round
=
0
;
round
<
10
;
++
round
)
{
// generate random input
index_t
n
=
1
+
(
rand
(
)
%
5
);
index_t
h
=
1
+
(
rand
(
)
%
100
);
index_t
w
=
1
+
(
rand
(
)
%
100
);
index_t
c
=
1
+
(
rand
(
)
%
32
);
int
input_num
=
2
+
rand
(
)
%
3
;
index_t
n
=
1
+
(
rand
_r
(
&
seed
)
%
5
);
index_t
h
=
1
+
(
rand
_r
(
&
seed
)
%
100
);
index_t
w
=
1
+
(
rand
_r
(
&
seed
)
%
100
);
index_t
c
=
1
+
(
rand
_r
(
&
seed
)
%
32
);
int
input_num
=
2
+
rand
_r
(
&
seed
)
%
3
;
// Construct graph
OpsTestNet
net
;
auto
op_def
=
OpDefBuilder
(
"AddN"
,
"AddNTest"
);
...
...
@@ -117,4 +119,6 @@ void RandomTest() {
TEST_F
(
AddnOpTest
,
OPENCLRandom
)
{
RandomTest
<
DeviceType
::
OPENCL
>
();
}
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/batch_norm.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,7 @@
#include "mace/ops/batch_norm.h"
namespace
mace
{
namespace
ops
{
void
Register_BatchNorm
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BatchNorm"
)
...
...
@@ -26,4 +27,5 @@ void Register_BatchNorm(OperatorRegistry *op_registry) {
BatchNormOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace ops
}
// namespace mace
mace/ops/batch_norm.h
浏览文件 @
2944863e
...
...
@@ -10,6 +10,7 @@
#include "mace/kernels/batch_norm.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
class
T
>
class
BatchNormOp
:
public
Operator
<
D
,
T
>
{
...
...
@@ -55,6 +56,7 @@ class BatchNormOp : public Operator<D, T> {
OP_OUTPUT_TAGS
(
OUTPUT
);
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_BATCH_NORM_H_
mace/ops/batch_norm_benchmark.cc
浏览文件 @
2944863e
...
...
@@ -8,6 +8,9 @@
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
template
<
DeviceType
D
,
typename
T
>
static
void
BatchNorm
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
)
{
...
...
@@ -101,4 +104,6 @@ BM_BATCH_NORM(1, 1024, 7, 7);
BM_BATCH_NORM
(
32
,
1
,
256
,
256
);
BM_BATCH_NORM
(
32
,
3
,
256
,
256
);
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/batch_norm_test.cc
浏览文件 @
2944863e
...
...
@@ -6,6 +6,8 @@
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
class
BatchNormOpTest
:
public
OpsTestBase
{};
...
...
@@ -75,11 +77,10 @@ TEST_F(BatchNormOpTest, SimpleCPU) { Simple<DeviceType::CPU>(); }
TEST_F
(
BatchNormOpTest
,
SimpleOPENCL
)
{
Simple
<
DeviceType
::
OPENCL
>
();
}
TEST_F
(
BatchNormOpTest
,
SimpleRandomOPENCL
)
{
srand
(
time
(
NULL
));
// generate random input
index_t
batch
=
1
+
rand
()
%
10
;
index_t
channels
=
3
+
rand
()
%
50
;
static
unsigned
int
seed
=
time
(
NULL
);
index_t
batch
=
1
+
rand_r
(
&
seed
)
%
10
;
index_t
channels
=
3
+
rand_r
(
&
seed
)
%
50
;
index_t
height
=
64
;
index_t
width
=
64
;
...
...
@@ -147,11 +148,10 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
}
TEST_F
(
BatchNormOpTest
,
SimpleRandomHalfOPENCL
)
{
srand
(
time
(
NULL
));
// generate random input
index_t
batch
=
1
+
rand
()
%
10
;
index_t
channels
=
3
+
rand
()
%
50
;
static
unsigned
int
seed
=
time
(
NULL
);
index_t
batch
=
1
+
rand_r
(
&
seed
)
%
10
;
index_t
channels
=
3
+
rand_r
(
&
seed
)
%
50
;
index_t
height
=
64
;
index_t
width
=
64
;
...
...
@@ -220,11 +220,10 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) {
}
TEST_F
(
BatchNormOpTest
,
ComplexRandomOPENCL
)
{
srand
(
time
(
NULL
));
// generate random input
index_t
batch
=
1
+
rand
()
%
10
;
index_t
channels
=
3
+
rand
()
%
50
;
static
unsigned
int
seed
=
time
(
NULL
);
index_t
batch
=
1
+
rand_r
(
&
seed
)
%
10
;
index_t
channels
=
3
+
rand_r
(
&
seed
)
%
50
;
index_t
height
=
103
;
index_t
width
=
113
;
...
...
@@ -292,11 +291,10 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
}
TEST_F
(
BatchNormOpTest
,
ComplexRandomHalfOPENCL
)
{
srand
(
time
(
NULL
));
// generate random input
index_t
batch
=
1
+
rand
()
%
10
;
index_t
channels
=
3
+
rand
()
%
50
;
static
unsigned
int
seed
=
time
(
NULL
);
index_t
batch
=
1
+
rand_r
(
&
seed
)
%
10
;
index_t
channels
=
3
+
rand_r
(
&
seed
)
%
50
;
index_t
height
=
103
;
index_t
width
=
113
;
...
...
@@ -363,4 +361,7 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) {
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.5
);
}
}
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/batch_to_space.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,7 @@
#include "mace/ops/batch_to_space.h"
namespace
mace
{
namespace
ops
{
void
Register_BatchToSpaceND
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BatchToSpaceND"
)
...
...
@@ -19,4 +20,5 @@ void Register_BatchToSpaceND(OperatorRegistry *op_registry) {
BatchToSpaceNDOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace ops
}
// namespace mace
mace/ops/batch_to_space.h
浏览文件 @
2944863e
...
...
@@ -2,15 +2,17 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_OPS_
SPACE_TO_BATCH
_H_
#define MACE_OPS_
SPACE_TO_BATCH
_H_
#ifndef MACE_OPS_
BATCH_TO_SPACE
_H_
#define MACE_OPS_
BATCH_TO_SPACE
_H_
#include <memory>
#include <vector>
#include "mace/core/operator.h"
#include "mace/kernels/space_to_batch.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
typename
T
>
class
BatchToSpaceNDOp
:
public
Operator
<
D
,
T
>
{
...
...
@@ -68,6 +70,7 @@ class BatchToSpaceNDOp : public Operator<D, T> {
OP_OUTPUT_TAGS
(
OUTPUT
);
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_
SPACE_TO_BATCH
_H_
#endif // MACE_OPS_
BATCH_TO_SPACE
_H_
mace/ops/batch_to_space_benchmark.cc
浏览文件 @
2944863e
...
...
@@ -7,6 +7,9 @@
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
template
<
DeviceType
D
,
typename
T
>
static
void
BMBatchToSpace
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
,
int
arg
)
{
...
...
@@ -53,4 +56,7 @@ static void BMBatchToSpace(
BM_BATCH_TO_SPACE
(
128
,
8
,
8
,
128
,
2
);
BM_BATCH_TO_SPACE
(
4
,
128
,
128
,
32
,
2
);
BM_BATCH_TO_SPACE
(
16
,
64
,
64
,
32
,
4
);
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/bias_add.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,7 @@
#include "mace/ops/bias_add.h"
namespace
mace
{
namespace
ops
{
void
Register_BiasAdd
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BiasAdd"
)
...
...
@@ -26,4 +27,5 @@ void Register_BiasAdd(OperatorRegistry *op_registry) {
BiasAddOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace ops
}
// namespace mace
mace/ops/bias_add.h
浏览文件 @
2944863e
...
...
@@ -2,13 +2,14 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_BIAS_ADD_H_
#define MACE_BIAS_ADD_H_
#ifndef MACE_
OPS_
BIAS_ADD_H_
#define MACE_
OPS_
BIAS_ADD_H_
#include "mace/core/operator.h"
#include "mace/kernels/bias_add.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
class
T
>
class
BiasAddOp
:
public
Operator
<
D
,
T
>
{
...
...
@@ -40,6 +41,7 @@ class BiasAddOp : public Operator<D, T> {
OP_OUTPUT_TAGS
(
OUTPUT
);
};
}
// namespace ops
}
// namespace mace
#endif // MACE_BIAS_ADD_H_
#endif // MACE_
OPS_
BIAS_ADD_H_
mace/ops/bias_add_benchmark.cc
浏览文件 @
2944863e
...
...
@@ -8,6 +8,9 @@
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
template
<
DeviceType
D
,
typename
T
>
static
void
BiasAdd
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
)
{
mace
::
testing
::
StopTiming
();
...
...
@@ -77,4 +80,7 @@ BM_BIAS_ADD(1, 512, 14, 14);
BM_BIAS_ADD
(
1
,
1024
,
7
,
7
);
BM_BIAS_ADD
(
32
,
1
,
256
,
256
);
BM_BIAS_ADD
(
32
,
3
,
256
,
256
);
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/bias_add_test.cc
浏览文件 @
2944863e
...
...
@@ -6,6 +6,8 @@
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
class
BiasAddOpTest
:
public
OpsTestBase
{};
...
...
@@ -60,13 +62,12 @@ TEST_F(BiasAddOpTest, BiasAddSimpleOPENCL) {
}
TEST_F
(
BiasAddOpTest
,
SimpleRandomOPENCL
)
{
srand
(
time
(
NULL
));
// generate random input
index_t
batch
=
1
+
rand
()
%
10
;
index_t
channels
=
3
+
rand
()
%
50
;
index_t
height
=
64
+
rand
()
%
50
;
index_t
width
=
64
+
rand
()
%
50
;
static
unsigned
int
seed
=
time
(
NULL
);
index_t
batch
=
1
+
rand_r
(
&
seed
)
%
10
;
index_t
channels
=
3
+
rand_r
(
&
seed
)
%
50
;
index_t
height
=
64
+
rand_r
(
&
seed
)
%
50
;
index_t
width
=
64
+
rand_r
(
&
seed
)
%
50
;
// Construct graph
OpsTestNet
net
;
...
...
@@ -110,13 +111,12 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) {
}
TEST_F
(
BiasAddOpTest
,
ComplexRandomOPENCL
)
{
srand
(
time
(
NULL
));
// generate random input
index_t
batch
=
1
+
rand
()
%
10
;
index_t
channels
=
3
+
rand
()
%
50
;
index_t
height
=
103
+
rand
()
%
100
;
index_t
width
=
113
+
rand
()
%
100
;
static
unsigned
int
seed
=
time
(
NULL
);
index_t
batch
=
1
+
rand_r
(
&
seed
)
%
10
;
index_t
channels
=
3
+
rand_r
(
&
seed
)
%
50
;
index_t
height
=
103
+
rand_r
(
&
seed
)
%
100
;
index_t
width
=
113
+
rand_r
(
&
seed
)
%
100
;
// Construct graph
OpsTestNet
net
;
...
...
@@ -158,4 +158,7 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) {
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-2
);
}
}
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/buffer_to_image.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,7 @@
#include "mace/ops/buffer_to_image.h"
namespace
mace
{
namespace
ops
{
void
Register_BufferToImage
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BufferToImage"
)
...
...
@@ -20,4 +21,5 @@ void Register_BufferToImage(OperatorRegistry *op_registry) {
BufferToImageOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace ops
}
// namespace mace
mace/ops/buffer_to_image.h
浏览文件 @
2944863e
...
...
@@ -9,6 +9,7 @@
#include "mace/kernels/buffer_to_image.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
typename
T
>
class
BufferToImageOp
:
public
Operator
<
D
,
T
>
{
...
...
@@ -36,5 +37,6 @@ class BufferToImageOp : public Operator<D, T> {
OP_OUTPUT_TAGS
(
OUTPUT
);
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_BUFFER_TO_IMAGE_H_
mace/ops/buffer_to_image_test.cc
浏览文件 @
2944863e
...
...
@@ -5,7 +5,9 @@
#include "gtest/gtest.h"
#include "mace/ops/ops_test_util.h"
using
namespace
mace
;
namespace
mace
{
namespace
ops
{
namespace
test
{
template
<
DeviceType
D
,
typename
T
>
void
TestBidirectionTransform
(
const
int
type
,
...
...
@@ -188,3 +190,7 @@ TEST(BufferToImageTest, ArgStringHalfToHalfSmall) {
TestStringHalfBidirectionTransform
<
DeviceType
::
OPENCL
,
half
>
(
kernels
::
ARGUMENT
,
{
2
},
input_data
);
}
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/channel_shuffle.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,7 @@
#include "mace/ops/channel_shuffle.h"
namespace
mace
{
namespace
ops
{
void
Register_ChannelShuffle
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"ChannelShuffle"
)
...
...
@@ -24,4 +25,5 @@ void Register_ChannelShuffle(OperatorRegistry *op_registry) {
ChannelShuffleOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace ops
}
// namespace mace
mace/ops/channel_shuffle.h
浏览文件 @
2944863e
...
...
@@ -11,6 +11,7 @@
#include "mace/kernels/channel_shuffle.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
typename
T
>
class
ChannelShuffleOp
:
public
Operator
<
D
,
T
>
{
...
...
@@ -42,6 +43,7 @@ class ChannelShuffleOp : public Operator<D, T> {
kernels
::
ChannelShuffleFunctor
<
D
,
T
>
functor_
;
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_CHANNEL_SHUFFLE_H_
mace/ops/channel_shuffle_benchmark.cc
浏览文件 @
2944863e
...
...
@@ -7,10 +7,12 @@
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
static
void
ChannelShuffle
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
,
int
group
)
{
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
,
int
group
)
{
mace
::
testing
::
StopTiming
();
OpsTestNet
net
;
...
...
@@ -23,15 +25,15 @@ static void ChannelShuffle(
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
OpDefBuilder
(
"ChannelShuffle"
,
"ChannelShuffleTest"
)
.
Input
(
"InputImage"
)
.
Output
(
"Output"
)
.
AddIntArg
(
"group"
,
group
)
.
Finalize
(
net
.
NewOperatorDef
());
.
Input
(
"InputImage"
)
.
Output
(
"Output"
)
.
AddIntArg
(
"group"
,
group
)
.
Finalize
(
net
.
NewOperatorDef
());
}
else
{
OpDefBuilder
(
"Softmax"
,
"SoftmaxBM"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
}
// Warm-up
...
...
@@ -47,18 +49,19 @@ static void ChannelShuffle(
net
.
Sync
();
}
#define BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, TYPE, DEVICE) \
static void BM_CHANNEL_SHUFFLE_##N##_##C##_##H##_##W##_##G##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::MaccProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
ChannelShuffle<DEVICE, TYPE>(iters, N, C, H, W, G); \
} \
#define BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, TYPE, DEVICE) \
static void \
BM_CHANNEL_SHUFFLE_##N##_##C##_##H##_##W##_##G##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::MaccProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
ChannelShuffle<DEVICE, TYPE>(iters, N, C, H, W, G); \
} \
BENCHMARK(BM_CHANNEL_SHUFFLE_##N##_##C##_##H##_##W##_##G##_##TYPE##_##DEVICE)
#define BM_CHANNEL_SHUFFLE(N, C, H, W, G) \
BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, float, CPU); \
#define BM_CHANNEL_SHUFFLE(N, C, H, W, G)
\
BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, float, CPU);
\
BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, float, OPENCL); \
BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, half, OPENCL);
...
...
@@ -66,4 +69,6 @@ BM_CHANNEL_SHUFFLE(1, 64, 64, 64, 8);
BM_CHANNEL_SHUFFLE
(
1
,
64
,
128
,
128
,
8
);
BM_CHANNEL_SHUFFLE
(
1
,
64
,
256
,
256
,
8
);
}
// namespace mace
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/channel_shuffle_test.cc
浏览文件 @
2944863e
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/operator.h"
#include "mace/ops/ops_test_util.h"
using
namespace
mace
;
namespace
mace
{
namespace
ops
{
namespace
test
{
class
ChannelShuffleOpTest
:
public
OpsTestBase
{};
...
...
@@ -38,30 +41,34 @@ TEST_F(ChannelShuffleOpTest, C16G4_OPENCL) {
// Add input data
net
.
AddInputFromArray
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
1
,
1
,
2
,
16
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
});
"Input"
,
{
1
,
1
,
2
,
16
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
});
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
&
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
OpDefBuilder
(
"ChannelShuffle"
,
"ChannelShuffleTest"
)
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
.
AddIntArg
(
"group"
,
4
)
.
Finalize
(
net
.
NewOperatorDef
());
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
.
AddIntArg
(
"group"
,
4
)
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
DeviceType
::
OPENCL
);
// Transfer output
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
&
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
// Check
auto
expected
=
CreateTensor
<
float
>
(
{
1
,
1
,
2
,
16
},
{
0
,
4
,
8
,
12
,
1
,
5
,
9
,
13
,
2
,
6
,
10
,
14
,
3
,
7
,
11
,
15
,
16
,
20
,
24
,
28
,
17
,
21
,
25
,
29
,
18
,
22
,
26
,
30
,
19
,
23
,
27
,
31
});
{
1
,
1
,
2
,
16
},
{
0
,
4
,
8
,
12
,
1
,
5
,
9
,
13
,
2
,
6
,
10
,
14
,
3
,
7
,
11
,
15
,
16
,
20
,
24
,
28
,
17
,
21
,
25
,
29
,
18
,
22
,
26
,
30
,
19
,
23
,
27
,
31
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/concat.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,7 @@
#include "mace/ops/concat.h"
namespace
mace
{
namespace
ops
{
void
Register_Concat
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Concat"
)
...
...
@@ -25,4 +26,5 @@ void Register_Concat(OperatorRegistry *op_registry) {
ConcatOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace ops
}
// namespace mace
mace/ops/concat.h
浏览文件 @
2944863e
...
...
@@ -5,9 +5,13 @@
#ifndef MACE_OPS_CONCAT_H_
#define MACE_OPS_CONCAT_H_
#include <vector>
#include "mace/core/operator.h"
#include "mace/kernels/concat.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
typename
T
>
class
ConcatOp
:
public
Operator
<
D
,
T
>
{
...
...
@@ -41,6 +45,7 @@ class ConcatOp : public Operator<D, T> {
OP_OUTPUT_TAGS
(
OUTPUT
);
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_CONCAT_H_
mace/ops/concat_benchmark.cc
浏览文件 @
2944863e
...
...
@@ -7,6 +7,9 @@
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
template
<
DeviceType
D
,
typename
T
>
static
void
ConcatHelper
(
int
iters
,
int
concat_dim
,
int
dim1
)
{
mace
::
testing
::
StopTiming
();
...
...
@@ -106,4 +109,6 @@ BM_CONCAT_OPENCL_MACRO(3, 32, 32, 64, half);
BM_CONCAT_OPENCL_MACRO
(
3
,
32
,
32
,
128
,
half
);
BM_CONCAT_OPENCL_MACRO
(
3
,
32
,
32
,
256
,
half
);
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/concat_test.cc
浏览文件 @
2944863e
...
...
@@ -2,11 +2,16 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/ops/concat.h"
#include <string>
#include <functional>
#include "gmock/gmock.h"
#include "mace/ops/ops_test_util.h"
#include "mace/ops/concat.h"
using
namespace
mace
;
namespace
mace
{
namespace
ops
{
namespace
test
{
class
ConcatOpTest
:
public
OpsTestBase
{};
...
...
@@ -87,10 +92,10 @@ TEST_F(ConcatOpTest, CPUSimpleVertical) {
}
TEST_F
(
ConcatOpTest
,
CPURandom
)
{
s
rand
(
time
(
nullptr
)
);
s
tatic
unsigned
int
seed
=
time
(
NULL
);
int
dim
=
5
;
int
num_inputs
=
2
+
rand
(
)
%
10
;
int
axis
=
rand
(
)
%
dim
;
int
num_inputs
=
2
+
rand
_r
(
&
seed
)
%
10
;
int
axis
=
rand
_r
(
&
seed
)
%
dim
;
// Construct graph
OpsTestNet
net
;
auto
builder
=
OpDefBuilder
(
"Concat"
,
"ConcatTest"
);
...
...
@@ -108,7 +113,7 @@ TEST_F(ConcatOpTest, CPURandom) {
std
::
vector
<
float
*>
input_ptrs
(
num_inputs
,
nullptr
);
index_t
concat_axis_size
=
0
;
for
(
int
i
=
0
;
i
<
num_inputs
;
++
i
)
{
input_shapes
[
i
][
axis
]
=
1
+
rand
(
)
%
dim
;
input_shapes
[
i
][
axis
]
=
1
+
rand
_r
(
&
seed
)
%
dim
;
concat_axis_size
+=
input_shapes
[
i
][
axis
];
GenerateRandomRealTypeData
(
input_shapes
[
i
],
&
inputs
[
i
]);
input_ptrs
[
i
]
=
inputs
[
i
].
data
();
...
...
@@ -217,3 +222,7 @@ TEST_F(ConcatOpTest, OPENCLAlignedMultiInput) {
OpenclRandomTest
<
float
>
(
{{
3
,
32
,
32
,
32
},
{
3
,
32
,
32
,
32
},
{
3
,
32
,
32
,
32
},
{
3
,
32
,
32
,
32
}},
3
);
}
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/conv_2d.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,7 @@
#include "mace/ops/conv_2d.h"
namespace
mace
{
namespace
ops
{
void
Register_Conv2D
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Conv2D"
)
...
...
@@ -26,4 +27,5 @@ void Register_Conv2D(OperatorRegistry *op_registry) {
Conv2dOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace ops
}
// namespace mace
mace/ops/conv_2d.h
浏览文件 @
2944863e
...
...
@@ -12,6 +12,7 @@
#include "mace/ops/conv_pool_2d_base.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
typename
T
>
class
Conv2dOp
:
public
ConvPool2dOpBase
<
D
,
T
>
{
...
...
@@ -44,6 +45,7 @@ class Conv2dOp : public ConvPool2dOpBase<D, T> {
OP_OUTPUT_TAGS
(
OUTPUT
);
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_CONV_2D_H_
mace/ops/conv_2d_benchmark.cc
浏览文件 @
2944863e
...
...
@@ -10,6 +10,8 @@
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
template
<
DeviceType
D
,
typename
T
>
static
void
Conv2d
(
int
iters
,
...
...
@@ -80,30 +82,32 @@ static void Conv2d(int iters,
// approximate the amortized latency. The OpenCL runtime for Mali/Adreno is
// in-order.
#define BM_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, DILATION, P, OC, TYPE, \
DEVICE) \
static void \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##D##DILATION##_##P##_##OC##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
int64_t pad_h = 0, pad_w = 0; \
if (P == SAME) { \
pad_h = KH / 2; \
pad_w = KW / 2; \
} \
int64_t oh = \
(H + 2 * pad_h - KH - (KH - 1) * (DILATION - 1)) / STRIDE + 1; \
int64_t ow = \
(W + 2 * pad_w - KW - (KW - 1) * (DILATION - 1)) / STRIDE + 1; \
const int64_t macc = \
static_cast<int64_t>(iters) * N * OC * oh * ow * (KH * KW * C + 1); \
mace::testing::MaccProcessed(macc); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
Conv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, DILATION, \
mace::Padding::P, OC); \
} \
BENCHMARK( \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##D##DILATION##_##P##_##OC##_##TYPE##_##DEVICE)
#define BM_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, DILATION, P, OC, TYPE, \
DEVICE) \
static void \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##D##DILATION\
##_##P##_##OC##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
int64_t pad_h = 0, pad_w = 0; \
if (P == SAME) { \
pad_h = KH / 2; \
pad_w = KW / 2; \
} \
int64_t oh = \
(H + 2 * pad_h - KH - (KH - 1) * (DILATION - 1)) / STRIDE + 1; \
int64_t ow = \
(W + 2 * pad_w - KW - (KW - 1) * (DILATION - 1)) / STRIDE + 1; \
const int64_t macc = \
static_cast<int64_t>(iters) * N * OC * oh * ow * (KH * KW * C + 1); \
mace::testing::MaccProcessed(macc); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
Conv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, DILATION, \
mace::Padding::P, OC); \
} \
BENCHMARK( \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##D##DILATION\
##_##P##_##OC##_##TYPE##_##DEVICE)
#define BM_CONV_2D(N, C, H, W, KH, KW, S, D, P, OC) \
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, CPU); \
...
...
@@ -139,4 +143,6 @@ BM_CONV_2D(1, 32, 256, 256, 3, 3, 1, 4, VALID, 32);
BM_CONV_2D
(
1
,
128
,
56
,
56
,
1
,
1
,
1
,
1
,
SAME
,
128
);
BM_CONV_2D
(
1
,
1024
,
7
,
7
,
1
,
1
,
1
,
1
,
SAME
,
1024
);
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/conv_2d_test.cc
浏览文件 @
2944863e
...
...
@@ -2,11 +2,15 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/ops/conv_2d.h"
#include <fstream>
#include <vector>
#include "mace/ops/conv_2d.h"
#include "mace/ops/ops_test_util.h"
using
namespace
mace
;
namespace
mace
{
namespace
ops
{
namespace
test
{
class
Conv2dOpTest
:
public
OpsTestBase
{};
...
...
@@ -347,14 +351,13 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape,
testing
::
internal
::
LogToStderr
();
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
Padding
type
)
{
srand
(
time
(
NULL
));
// generate random input
index_t
batch
=
3
+
(
rand
()
%
10
);
static
unsigned
int
seed
=
time
(
NULL
);
index_t
batch
=
3
+
(
rand_r
(
&
seed
)
%
10
);
index_t
height
=
shape
[
0
];
index_t
width
=
shape
[
1
];
index_t
input_channels
=
shape
[
2
]
+
(
rand
(
)
%
10
);
index_t
output_channels
=
shape
[
3
]
+
(
rand
(
)
%
10
);
index_t
input_channels
=
shape
[
2
]
+
(
rand
_r
(
&
seed
)
%
10
);
index_t
output_channels
=
shape
[
3
]
+
(
rand
_r
(
&
seed
)
%
10
);
// Construct graph
OpsTestNet
net
;
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
...
...
@@ -729,3 +732,7 @@ TEST_F(Conv2dOpTest, OPENCLAlignedPad2) {
TEST_F
(
Conv2dOpTest
,
OPENCLUnalignedPad4
)
{
TestArbitraryPadConvNxN
<
DeviceType
::
OPENCL
,
float
>
({
107
,
113
,
5
,
7
},
{
4
,
4
});
}
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/conv_pool_2d_base.h
浏览文件 @
2944863e
...
...
@@ -5,10 +5,13 @@
#ifndef MACE_OPS_CONV_POOL_2D_BASE_H_
#define MACE_OPS_CONV_POOL_2D_BASE_H_
#include <vector>
#include "mace/core/operator.h"
#include "mace/kernels/conv_pool_2d_util.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
class
T
>
class
ConvPool2dOpBase
:
public
Operator
<
D
,
T
>
{
...
...
@@ -29,6 +32,7 @@ class ConvPool2dOpBase : public Operator<D, T> {
std
::
vector
<
int
>
dilations_
;
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_CONV_POOL_2D_BASE_H_
mace/ops/core_test.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,8 @@
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
TEST
(
CoreTest
,
INIT_MODE
)
{
std
::
vector
<
OperatorDef
>
op_defs
;
...
...
@@ -56,4 +58,6 @@ TEST(CoreTest, INIT_MODE) {
1e-5
);
}
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/depthwise_conv2d.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,7 @@
#include "mace/ops/depthwise_conv2d.h"
namespace
mace
{
namespace
ops
{
void
Register_DepthwiseConv2d
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"DepthwiseConv2d"
)
...
...
@@ -26,4 +27,5 @@ void Register_DepthwiseConv2d(OperatorRegistry *op_registry) {
DepthwiseConv2dOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace ops
}
// namespace mace
mace/ops/depthwise_conv2d.h
浏览文件 @
2944863e
...
...
@@ -6,6 +6,7 @@
#define MACE_OPS_DEPTHWISE_CONV2D_H_
#include <memory>
#include <string>
#include "mace/core/operator.h"
#include "mace/kernels/conv_2d.h"
...
...
@@ -13,6 +14,7 @@
#include "mace/ops/conv_pool_2d_base.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
typename
T
>
class
DepthwiseConv2dOp
:
public
ConvPool2dOpBase
<
D
,
T
>
{
...
...
@@ -48,6 +50,7 @@ class DepthwiseConv2dOp : public ConvPool2dOpBase<D, T> {
OP_OUTPUT_TAGS
(
OUTPUT
);
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_DEPTHWISE_CONV2D_H_
mace/ops/depthwise_conv2d_benchmark.cc
浏览文件 @
2944863e
...
...
@@ -10,6 +10,8 @@
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
template
<
DeviceType
D
,
typename
T
>
static
void
DepthwiseConv2d
(
int
iters
,
...
...
@@ -75,31 +77,33 @@ static void DepthwiseConv2d(int iters,
}
}
#define BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, M, TYPE, \
DEVICE) \
static void \
BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##M##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t dilation = 1; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
int64_t pad_h = 0, pad_w = 0; \
if (P == SAME) { \
pad_h = KH / 2; \
pad_w = KW / 2; \
} \
int64_t oh = \
(H + 2 * pad_h - KH - (KH - 1) * (dilation - 1)) / STRIDE + 1; \
int64_t ow = \
(W + 2 * pad_w - KW - (KW - 1) * (dilation - 1)) / STRIDE + 1; \
const int64_t macc = \
static_cast<int64_t>(iters) * N * C * M * oh * ow * (KH * KW + 1); \
mace::testing::MaccProcessed(macc); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
DepthwiseConv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, \
mace::Padding::P, M); \
} \
BENCHMARK( \
BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##M##_##TYPE##_##DEVICE)
#define BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, M, TYPE, \
DEVICE) \
static void \
BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_\
##P##_##M##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t dilation = 1; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
int64_t pad_h = 0, pad_w = 0; \
if (P == SAME) { \
pad_h = KH / 2; \
pad_w = KW / 2; \
} \
int64_t oh = \
(H + 2 * pad_h - KH - (KH - 1) * (dilation - 1)) / STRIDE + 1; \
int64_t ow = \
(W + 2 * pad_w - KW - (KW - 1) * (dilation - 1)) / STRIDE + 1; \
const int64_t macc = \
static_cast<int64_t>(iters) * N * C * M * oh * ow * (KH * KW + 1); \
mace::testing::MaccProcessed(macc); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
DepthwiseConv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, \
mace::Padding::P, M); \
} \
BENCHMARK( \
BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_\
##P##_##M##_##TYPE##_##DEVICE)
#define BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, M) \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, CPU); \
...
...
@@ -121,4 +125,6 @@ BM_DEPTHWISE_CONV_2D(1, 64, 33, 31, 3, 3, 2, SAME, 1);
BM_DEPTHWISE_CONV_2D
(
1
,
3
,
512
,
512
,
3
,
3
,
2
,
VALID
,
1
);
BM_DEPTHWISE_CONV_2D
(
1
,
3
,
512
,
512
,
3
,
3
,
2
,
SAME
,
1
);
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/depthwise_conv2d_test.cc
浏览文件 @
2944863e
...
...
@@ -5,9 +5,9 @@
#include "mace/ops/conv_2d.h"
#include "mace/ops/ops_test_util.h"
using
namespace
mace
;
namespace
{
namespace
mace
{
namespace
ops
{
namespace
test
{
class
DepthwiseConv2dOpTest
:
public
OpsTestBase
{};
...
...
@@ -207,11 +207,10 @@ void TestNxNS12(const index_t height, const index_t width) {
testing
::
internal
::
LogToStderr
();
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
Padding
type
)
{
srand
(
time
(
NULL
));
// generate random input
index_t
batch
=
1
+
rand
()
%
5
;
index_t
input_channels
=
3
+
rand
()
%
16
;
static
unsigned
int
seed
=
time
(
NULL
);
index_t
batch
=
1
+
rand_r
(
&
seed
)
%
5
;
index_t
input_channels
=
3
+
rand_r
(
&
seed
)
%
16
;
index_t
multiplier
=
1
;
// Construct graph
OpsTestNet
net
;
...
...
@@ -316,4 +315,6 @@ TEST_F(DepthwiseConv2dOpTest, OpenCLUnalignedNxNS12Half) {
TestNxNS12
<
DeviceType
::
OPENCL
,
half
>
(
107
,
113
);
}
}
// namespace
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/eltwise.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,7 @@
#include "mace/ops/eltwise.h"
namespace
mace
{
namespace
ops
{
void
Register_Eltwise
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Eltwise"
)
...
...
@@ -26,4 +27,5 @@ void Register_Eltwise(OperatorRegistry *op_registry) {
EltwiseOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace ops
}
// namespace mace
mace/ops/eltwise.h
浏览文件 @
2944863e
...
...
@@ -2,13 +2,14 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_OPS_
RESHAP
E_H_
#define MACE_OPS_
RESHAP
E_H_
#ifndef MACE_OPS_
ELTWIS
E_H_
#define MACE_OPS_
ELTWIS
E_H_
#include "mace/core/operator.h"
#include "mace/kernels/eltwise.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
typename
T
>
class
EltwiseOp
:
public
Operator
<
D
,
T
>
{
...
...
@@ -44,6 +45,7 @@ class EltwiseOp : public Operator<D, T> {
OP_OUTPUT_TAGS
(
OUTPUT
);
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_
RESHAP
E_H_
#endif // MACE_OPS_
ELTWIS
E_H_
mace/ops/eltwise_benchmark.cc
浏览文件 @
2944863e
...
...
@@ -2,13 +2,17 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/kernels/eltwise.h"
#include <string>
#include "mace/core/operator.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/kernels/eltwise.h"
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
template
<
DeviceType
D
,
typename
T
>
static
void
EltwiseBenchmark
(
int
iters
,
kernels
::
EltwiseType
type
,
int
n
,
int
h
,
int
w
,
int
c
)
{
...
...
@@ -81,4 +85,6 @@ BM_ELTWISE(0, 1, 240, 240, 256);
BM_ELTWISE
(
1
,
1
,
240
,
240
,
256
);
BM_ELTWISE
(
2
,
1
,
240
,
240
,
256
);
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/eltwise_test.cc
浏览文件 @
2944863e
...
...
@@ -7,6 +7,8 @@
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
class
EltwiseOpTest
:
public
OpsTestBase
{};
...
...
@@ -170,4 +172,6 @@ TEST_F(EltwiseOpTest, OPENCLRandomHalf) {
{
13
,
32
,
32
,
64
});
}
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/folded_batch_norm.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,7 @@
#include "mace/ops/folded_batch_norm.h"
namespace
mace
{
namespace
ops
{
void
Register_FoldedBatchNorm
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"FoldedBatchNorm"
)
...
...
@@ -26,4 +27,5 @@ void Register_FoldedBatchNorm(OperatorRegistry *op_registry) {
FoldedBatchNormOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace ops
}
// namespace mace
mace/ops/folded_batch_norm.h
浏览文件 @
2944863e
...
...
@@ -5,10 +5,13 @@
#ifndef MACE_OPS_FOLDED_BATCH_NORM_H_
#define MACE_OPS_FOLDED_BATCH_NORM_H_
#include <string>
#include "mace/core/operator.h"
#include "mace/kernels/batch_norm.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
class
T
>
class
FoldedBatchNormOp
:
public
Operator
<
D
,
T
>
{
...
...
@@ -48,6 +51,7 @@ class FoldedBatchNormOp : public Operator<D, T> {
OP_OUTPUT_TAGS
(
OUTPUT
);
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_FOLDED_BATCH_NORM_H_
mace/ops/folded_batch_norm_test.cc
浏览文件 @
2944863e
...
...
@@ -6,6 +6,8 @@
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
class
FoldedBatchNormOpTest
:
public
OpsTestBase
{};
...
...
@@ -14,12 +16,12 @@ void CalculateScaleOffset(const std::vector<float> &gamma,
const
std
::
vector
<
float
>
&
mean
,
const
std
::
vector
<
float
>
&
var
,
const
float
epsilon
,
std
::
vector
<
float
>
&
scale
,
std
::
vector
<
float
>
&
offset
)
{
std
::
vector
<
float
>
*
scale
,
std
::
vector
<
float
>
*
offset
)
{
size_t
size
=
gamma
.
size
();
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
scale
[
i
]
=
gamma
[
i
]
/
std
::
sqrt
(
var
[
i
]
+
epsilon
);
offset
[
i
]
=
offset
[
i
]
-
mean
[
i
]
*
scale
[
i
];
(
*
scale
)
[
i
]
=
gamma
[
i
]
/
std
::
sqrt
(
var
[
i
]
+
epsilon
);
(
*
offset
)[
i
]
=
(
*
offset
)[
i
]
-
mean
[
i
]
*
(
*
scale
)
[
i
];
}
}
...
...
@@ -32,7 +34,7 @@ void Simple() {
{
5
,
5
,
7
,
7
,
9
,
9
,
11
,
11
,
13
,
13
,
15
,
15
});
std
::
vector
<
float
>
scale
(
1
);
std
::
vector
<
float
>
offset
(
1
);
CalculateScaleOffset
({
4.0
f
},
{
2.0
},
{
10
},
{
11.67
f
},
1e-3
,
scale
,
offset
);
CalculateScaleOffset
({
4.0
f
},
{
2.0
},
{
10
},
{
11.67
f
},
1e-3
,
&
scale
,
&
offset
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Scale"
,
{
1
},
scale
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Offset"
,
{
1
},
offset
);
...
...
@@ -172,11 +174,10 @@ width});
*/
TEST_F
(
FoldedBatchNormOpTest
,
SimpleRandomOPENCL
)
{
srand
(
time
(
NULL
));
// generate random input
index_t
batch
=
1
+
rand
()
%
10
;
index_t
channels
=
3
+
rand
()
%
50
;
static
unsigned
int
seed
=
time
(
NULL
);
index_t
batch
=
1
+
rand_r
(
&
seed
)
%
10
;
index_t
channels
=
3
+
rand_r
(
&
seed
)
%
50
;
index_t
height
=
64
;
index_t
width
=
64
;
...
...
@@ -227,11 +228,10 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomOPENCL) {
}
TEST_F
(
FoldedBatchNormOpTest
,
SimpleRandomHalfOPENCL
)
{
srand
(
time
(
NULL
));
// generate random input
index_t
batch
=
1
+
rand
()
%
10
;
index_t
channels
=
3
+
rand
()
%
50
;
static
unsigned
int
seed
=
time
(
NULL
);
index_t
batch
=
1
+
rand_r
(
&
seed
)
%
10
;
index_t
channels
=
3
+
rand_r
(
&
seed
)
%
50
;
index_t
height
=
64
;
index_t
width
=
64
;
...
...
@@ -283,11 +283,10 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomHalfOPENCL) {
}
TEST_F
(
FoldedBatchNormOpTest
,
ComplexRandomOPENCL
)
{
srand
(
time
(
NULL
));
// generate random input
index_t
batch
=
1
+
rand
()
%
10
;
index_t
channels
=
3
+
rand
()
%
50
;
static
unsigned
int
seed
=
time
(
NULL
);
index_t
batch
=
1
+
rand_r
(
&
seed
)
%
10
;
index_t
channels
=
3
+
rand_r
(
&
seed
)
%
50
;
index_t
height
=
103
;
index_t
width
=
113
;
...
...
@@ -337,11 +336,10 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomOPENCL) {
}
TEST_F
(
FoldedBatchNormOpTest
,
ComplexRandomHalfOPENCL
)
{
srand
(
time
(
NULL
));
// generate random input
index_t
batch
=
1
+
rand
()
%
10
;
index_t
channels
=
3
+
rand
()
%
50
;
static
unsigned
int
seed
=
time
(
NULL
);
index_t
batch
=
1
+
rand_r
(
&
seed
)
%
10
;
index_t
channels
=
3
+
rand_r
(
&
seed
)
%
50
;
index_t
height
=
103
;
index_t
width
=
113
;
...
...
@@ -390,4 +388,7 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomHalfOPENCL) {
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.5
);
}
}
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/fully_connected.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,7 @@
#include "mace/ops/fully_connected.h"
namespace
mace
{
namespace
ops
{
void
Register_FullyConnected
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"FC"
)
...
...
@@ -26,4 +27,5 @@ void Register_FullyConnected(OperatorRegistry *op_registry) {
FullyConnectedOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace ops
}
// namespace mace
mace/ops/fully_connected.h
浏览文件 @
2944863e
...
...
@@ -11,6 +11,7 @@
#include "mace/kernels/fully_connected.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
class
T
>
class
FullyConnectedOp
:
public
Operator
<
D
,
T
>
{
...
...
@@ -48,6 +49,7 @@ class FullyConnectedOp : public Operator<D, T> {
OP_OUTPUT_TAGS
(
OUTPUT
);
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_FULLY_CONNECTED_H_
mace/ops/fully_connected_benchmark.cc
浏览文件 @
2944863e
...
...
@@ -3,11 +3,14 @@
//
#include <string>
#include "mace/core/operator.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
template
<
DeviceType
D
,
typename
T
>
static
void
FCBenchmark
(
...
...
@@ -84,4 +87,7 @@ BM_FC(1, 16, 16, 32, 32);
BM_FC
(
1
,
8
,
8
,
32
,
1000
);
BM_FC
(
1
,
2
,
2
,
512
,
2
);
BM_FC
(
1
,
7
,
7
,
512
,
4096
);
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/fully_connected_test.cc
浏览文件 @
2944863e
...
...
@@ -3,10 +3,13 @@
//
#include <fstream>
#include "mace/core/operator.h"
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
class
FullyConnectedOpTest
:
public
OpsTestBase
{};
...
...
@@ -263,4 +266,6 @@ TEST_F(FullyConnectedOpTest, OPENCLHalfWidthFormatAligned) {
TestWXFormat
<
half
>
(
1
,
16
,
32
,
32
,
32
);
}
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/fused_conv_2d.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,7 @@
#include "mace/ops/fused_conv_2d.h"
namespace
mace
{
namespace
ops
{
void
Register_FusedConv2D
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"FusedConv2D"
)
...
...
@@ -26,4 +27,5 @@ void Register_FusedConv2D(OperatorRegistry *op_registry) {
FusedConv2dOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace ops
}
// namespace mace
mace/ops/fused_conv_2d.h
浏览文件 @
2944863e
...
...
@@ -13,6 +13,7 @@
#include "mace/ops/conv_pool_2d_base.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
typename
T
>
class
FusedConv2dOp
:
public
ConvPool2dOpBase
<
D
,
T
>
{
...
...
@@ -47,6 +48,7 @@ class FusedConv2dOp : public ConvPool2dOpBase<D, T> {
OP_OUTPUT_TAGS
(
OUTPUT
);
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_FUSED_CONV_2D_H_
mace/ops/fused_conv_2d_test.cc
浏览文件 @
2944863e
...
...
@@ -8,6 +8,8 @@
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
class
FusedConv2dOpTest
:
public
OpsTestBase
{};
...
...
@@ -276,9 +278,8 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
testing
::
internal
::
LogToStderr
();
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
Padding
type
)
{
unsigned
int
seed
=
time
(
NULL
);
// generate random input
static
unsigned
int
seed
=
time
(
NULL
);
index_t
batch
=
3
+
(
rand_r
(
&
seed
)
%
10
);
index_t
height
=
shape
[
0
];
index_t
width
=
shape
[
1
];
...
...
@@ -352,9 +353,8 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) {
testing
::
internal
::
LogToStderr
();
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
Padding
type
)
{
unsigned
int
seed
=
time
(
NULL
);
// generate random input
static
unsigned
int
seed
=
time
(
NULL
);
index_t
batch
=
3
+
(
rand_r
(
&
seed
)
%
10
);
index_t
height
=
shape
[
0
];
index_t
width
=
shape
[
1
];
...
...
@@ -679,4 +679,6 @@ TEST_F(FusedConv2dOpTest, OPENCL15X15AtrousConvD4) {
{
2
,
2
});
}
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/global_avg_pooling.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,7 @@
#include "mace/ops/global_avg_pooling.h"
namespace
mace
{
namespace
ops
{
void
Register_GlobalAvgPooling
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"GlobalAvgPooling"
)
...
...
@@ -14,4 +15,5 @@ void Register_GlobalAvgPooling(OperatorRegistry *op_registry) {
GlobalAvgPoolingOp
<
DeviceType
::
CPU
,
float
>
);
}
}
// namespace ops
}
// namespace mace
mace/ops/global_avg_pooling.h
浏览文件 @
2944863e
...
...
@@ -11,6 +11,7 @@
#include "mace/kernels/global_avg_pooling.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
class
T
>
class
GlobalAvgPoolingOp
:
public
Operator
<
D
,
T
>
{
...
...
@@ -40,6 +41,7 @@ class GlobalAvgPoolingOp : public Operator<D, T> {
OP_OUTPUT_TAGS
(
OUTPUT
);
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_GLOBAL_AVG_POOLING_H_
mace/ops/global_avg_pooling_benchmark.cc
浏览文件 @
2944863e
...
...
@@ -8,7 +8,8 @@
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
kernels
{
namespace
ops
{
namespace
test
{
template
<
DeviceType
D
>
static
void
GlobalAvgPooling
(
...
...
@@ -54,5 +55,6 @@ BM_GLOBAL_AVG_POOLING(1, 3, 7, 7);
BM_GLOBAL_AVG_POOLING
(
1
,
3
,
64
,
64
);
BM_GLOBAL_AVG_POOLING
(
1
,
3
,
256
,
256
);
}
// namespace kernels
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/global_avg_pooling_test.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,8 @@
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
class
GlobalAvgPoolingOpTest
:
public
OpsTestBase
{};
...
...
@@ -32,4 +34,6 @@ TEST_F(GlobalAvgPoolingOpTest, 3x7x7_CPU) {
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/image_to_buffer.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,7 @@
#include "mace/ops/image_to_buffer.h"
namespace
mace
{
namespace
ops
{
void
Register_ImageToBuffer
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"ImageToBuffer"
)
...
...
@@ -20,4 +21,5 @@ void Register_ImageToBuffer(OperatorRegistry *op_registry) {
ImageToBufferOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace ops
}
// namespace mace
mace/ops/image_to_buffer.h
浏览文件 @
2944863e
...
...
@@ -9,6 +9,7 @@
#include "mace/kernels/buffer_to_image.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
typename
T
>
class
ImageToBufferOp
:
public
Operator
<
D
,
T
>
{
...
...
@@ -35,5 +36,7 @@ class ImageToBufferOp : public Operator<D, T> {
OP_OUTPUT_TAGS
(
OUTPUT
);
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_IMAGE_TO_BUFFER_H_
mace/ops/matmul.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,7 @@
#include "mace/ops/matmul.h"
namespace
mace
{
namespace
ops
{
void
Register_MatMul
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"MatMul"
)
...
...
@@ -26,4 +27,5 @@ void Register_MatMul(OperatorRegistry *op_registry) {
MatMulOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace ops
}
// namespace mace
mace/ops/matmul.h
浏览文件 @
2944863e
...
...
@@ -9,6 +9,7 @@
#include "mace/kernels/matmul.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
class
T
>
class
MatMulOp
:
public
Operator
<
D
,
T
>
{
...
...
@@ -35,6 +36,7 @@ class MatMulOp : public Operator<D, T> {
kernels
::
MatMulFunctor
<
D
,
T
>
functor_
;
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_MATMUL_H_
mace/ops/matmul_benchmark.cc
浏览文件 @
2944863e
...
...
@@ -3,11 +3,15 @@
//
#include <string>
#include "mace/core/operator.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
template
<
DeviceType
D
,
typename
T
>
static
void
MatMulBenchmark
(
int
iters
,
int
batch
,
int
height
,
int
channels
,
int
out_width
)
{
...
...
@@ -72,4 +76,7 @@ BM_MATMUL(16, 32, 128, 3969);
BM_MATMUL
(
16
,
128
,
128
,
49
);
BM_MATMUL
(
16
,
128
,
128
,
961
);
BM_MATMUL
(
16
,
128
,
128
,
3969
);
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/matmul_test.cc
浏览文件 @
2944863e
...
...
@@ -3,10 +3,13 @@
//
#include <fstream>
#include "mace/core/operator.h"
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
class
MatMulOpTest
:
public
OpsTestBase
{};
...
...
@@ -170,4 +173,7 @@ TEST_F(MatMulOpTest, OPENCLHalfUnAlignedWithBatch) {
Complex
<
half
>
(
16
,
32
,
64
,
64
);
Complex
<
half
>
(
31
,
31
,
61
,
67
);
}
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/ops_test_util.h
浏览文件 @
2944863e
...
...
@@ -22,6 +22,8 @@
#include "mace/utils/utils.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
class
OpDefBuilder
{
public:
...
...
@@ -423,6 +425,8 @@ void ImageToBuffer(OpsTestNet *net,
net
->
Sync
();
}
}
// namespace test
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_OPS_TEST_UTIL_H_
mace/ops/pooling.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,7 @@
#include "mace/ops/pooling.h"
namespace
mace
{
namespace
ops
{
void
Register_Pooling
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Pooling"
)
...
...
@@ -30,4 +31,5 @@ void Register_Pooling(OperatorRegistry *op_registry) {
PoolingOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace ops
}
// namespace mace
mace/ops/pooling.h
浏览文件 @
2944863e
...
...
@@ -12,6 +12,7 @@
#include "mace/ops/conv_pool_2d_base.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
class
T
>
class
PoolingOp
:
public
ConvPool2dOpBase
<
D
,
T
>
{
...
...
@@ -46,6 +47,7 @@ class PoolingOp : public ConvPool2dOpBase<D, T> {
OP_OUTPUT_TAGS
(
OUTPUT
);
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_POOLING_H_
mace/ops/pooling_benchmark.cc
浏览文件 @
2944863e
...
...
@@ -9,7 +9,8 @@
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
kernels
{
namespace
ops
{
namespace
test
{
template
<
DeviceType
D
>
static
void
Pooling
(
int
iters
,
...
...
@@ -73,5 +74,6 @@ BM_POOLING(1, 3, 257, 257, 2, 2, SAME, MAX);
BM_POOLING
(
1
,
3
,
513
,
513
,
2
,
2
,
SAME
,
MAX
);
BM_POOLING
(
1
,
3
,
1025
,
1025
,
2
,
2
,
SAME
,
MAX
);
}
// namespace kernels
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/pooling_test.cc
浏览文件 @
2944863e
...
...
@@ -10,6 +10,8 @@
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
class
PoolingOpTest
:
public
OpsTestBase
{};
...
...
@@ -394,4 +396,6 @@ TEST_F(PoolingOpTest, OPENCLUnAlignedLargeKernelAvgPooling) {
Padding
::
SAME
);
}
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/reshape.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,7 @@
#include "mace/ops/reshape.h"
namespace
mace
{
namespace
ops
{
void
Register_Reshape
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Reshape"
)
...
...
@@ -14,4 +15,5 @@ void Register_Reshape(OperatorRegistry *op_registry) {
ReshapeOp
<
DeviceType
::
CPU
,
float
>
);
}
}
// namespace ops
}
// namespace mace
mace/ops/reshape.h
浏览文件 @
2944863e
...
...
@@ -11,6 +11,7 @@
#include "mace/kernels/reshape.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
typename
T
>
class
ReshapeOp
:
public
Operator
<
D
,
T
>
{
...
...
@@ -63,6 +64,7 @@ class ReshapeOp : public Operator<D, T> {
OP_OUTPUT_TAGS
(
OUTPUT
);
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_RESHAPE_H_
mace/ops/reshape_test.cc
浏览文件 @
2944863e
...
...
@@ -7,6 +7,8 @@
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
class
ReshapeTest
:
public
OpsTestBase
{};
...
...
@@ -54,4 +56,6 @@ TEST_F(ReshapeTest, Complex) {
TestReshape
({
1
,
2
,
3
,
4
},
{
1
,
3
,
8
},
{
1
,
3
,
8
});
}
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/resize_bilinear.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,7 @@
#include "mace/ops/resize_bilinear.h"
namespace
mace
{
namespace
ops
{
void
Register_ResizeBilinear
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"ResizeBilinear"
)
...
...
@@ -26,4 +27,5 @@ void Register_ResizeBilinear(OperatorRegistry *op_registry) {
ResizeBilinearOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace ops
}
// namespace mace
mace/ops/resize_bilinear.h
浏览文件 @
2944863e
...
...
@@ -9,6 +9,7 @@
#include "mace/kernels/resize_bilinear.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
class
T
>
class
ResizeBilinearOp
:
public
Operator
<
D
,
T
>
{
...
...
@@ -34,6 +35,7 @@ class ResizeBilinearOp : public Operator<D, T> {
kernels
::
ResizeBilinearFunctor
<
D
,
T
>
functor_
;
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_RESIZE_BILINEAR_H_
mace/ops/resize_bilinear_benchmark.cc
浏览文件 @
2944863e
...
...
@@ -8,6 +8,9 @@
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
template
<
DeviceType
D
,
typename
T
>
static
void
ResizeBilinearBenchmark
(
int
iters
,
int
batch
,
...
...
@@ -86,4 +89,6 @@ BM_RESIZE_BILINEAR(1, 128, 240, 240, 480, 480);
BM_RESIZE_BILINEAR
(
1
,
3
,
4032
,
3016
,
480
,
480
);
BM_RESIZE_BILINEAR
(
1
,
3
,
480
,
480
,
4032
,
3016
);
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/resize_bilinear_test.cc
浏览文件 @
2944863e
...
...
@@ -4,11 +4,13 @@
#include <vector>
#include "mace/ops/resize_bilinear.h"
#include "mace/core/operator.h"
#include "mace/ops/resize_bilinear.h"
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
class
ResizeBilinearTest
:
public
OpsTestBase
{};
...
...
@@ -63,9 +65,8 @@ TEST_F(ResizeBilinearTest, ResizeBilinearWAlignCorners) {
template
<
DeviceType
D
>
void
TestRandomResizeBilinear
()
{
unsigned
int
seed
=
time
(
nullptr
);
testing
::
internal
::
LogToStderr
();
static
unsigned
int
seed
=
time
(
NULL
);
for
(
int
round
=
0
;
round
<
10
;
++
round
)
{
int
batch
=
1
+
rand_r
(
&
seed
)
%
5
;
int
channels
=
1
+
rand_r
(
&
seed
)
%
100
;
...
...
@@ -108,7 +109,7 @@ void TestRandomResizeBilinear() {
ImageToBuffer
<
D
,
float
>
(
&
net
,
"OutputImage"
,
"DeviceOutput"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
}
else
{
// TODO(
yejianwu)
support NEON
// TODO(
someone):
support NEON
}
// Check
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"DeviceOutput"
),
0.001
);
...
...
@@ -125,4 +126,6 @@ TEST_F(ResizeBilinearTest, OPENCLRandomResizeBilinear) {
TestRandomResizeBilinear
<
DeviceType
::
OPENCL
>
();
}
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/slice.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,7 @@
#include "mace/ops/slice.h"
namespace
mace
{
namespace
ops
{
void
Register_Slice
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Slice"
)
...
...
@@ -25,4 +26,5 @@ void Register_Slice(OperatorRegistry *op_registry) {
SliceOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace ops
}
// namespace mace
mace/ops/slice.h
浏览文件 @
2944863e
...
...
@@ -9,7 +9,9 @@
#include "mace/core/operator.h"
#include "mace/kernels/slice.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
typename
T
>
class
SliceOp
:
public
Operator
<
D
,
T
>
{
...
...
@@ -36,6 +38,7 @@ class SliceOp : public Operator<D, T> {
OP_INPUT_TAGS
(
INPUT
);
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_SLICE_H_
mace/ops/slice_benchmark.cc
浏览文件 @
2944863e
...
...
@@ -7,6 +7,9 @@
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
template
<
DeviceType
D
,
typename
T
>
static
void
BMSliceHelper
(
int
iters
,
const
std
::
vector
<
index_t
>
&
input_shape
,
...
...
@@ -79,5 +82,6 @@ BM_SLICE(1, 32, 32, 256, 2);
BM_SLICE
(
1
,
128
,
128
,
32
,
2
);
BM_SLICE
(
1
,
128
,
128
,
128
,
2
);
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/slice_test.cc
浏览文件 @
2944863e
...
...
@@ -5,17 +5,19 @@
#include <functional>
#include <vector>
#include "gmock/gmock.h"
#include "mace/ops/slice.h"
#include "mace/ops/ops_test_util.h"
#include "gmock/gmock.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
class
SliceOpTest
:
public
OpsTestBase
{};
template
<
DeviceType
D
,
typename
T
>
void
RandomTest
(
const
int
num_outputs
)
{
unsigned
int
seed
=
time
(
nullptr
);
static
unsigned
int
seed
=
time
(
NULL
);
const
index_t
output_channels
=
4
*
(
1
+
rand_r
(
&
seed
)
%
10
);
const
index_t
input_channels
=
num_outputs
*
output_channels
;
const
index_t
batch
=
3
+
(
rand_r
(
&
seed
)
%
10
);
...
...
@@ -108,4 +110,6 @@ TEST_F(SliceOpTest, OPENCLHalf) {
RandomTest
<
DeviceType
::
OPENCL
,
half
>
(
11
);
}
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/softmax.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,7 @@
#include "mace/ops/softmax.h"
namespace
mace
{
namespace
ops
{
void
Register_Softmax
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Softmax"
)
...
...
@@ -26,4 +27,5 @@ void Register_Softmax(OperatorRegistry *op_registry) {
SoftmaxOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace ops
}
// namespace mace
mace/ops/softmax.h
浏览文件 @
2944863e
...
...
@@ -9,6 +9,7 @@
#include "mace/kernels/softmax.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
class
T
>
class
SoftmaxOp
:
public
Operator
<
D
,
T
>
{
...
...
@@ -34,6 +35,7 @@ class SoftmaxOp : public Operator<D, T> {
OP_OUTPUT_TAGS
(
OUTPUT
);
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_SOFTMAX_H_
mace/ops/softmax_benchmark.cc
浏览文件 @
2944863e
...
...
@@ -3,11 +3,15 @@
//
#include <string>
#include "mace/core/operator.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
template
<
DeviceType
D
,
typename
T
>
static
void
SoftmaxBenchmark
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
)
{
...
...
@@ -66,4 +70,7 @@ BM_SOFTMAX(1, 3, 512, 512);
BM_SOFTMAX
(
1
,
4
,
512
,
512
);
BM_SOFTMAX
(
1
,
10
,
256
,
256
);
BM_SOFTMAX
(
1
,
1024
,
7
,
7
);
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/softmax_test.cc
浏览文件 @
2944863e
...
...
@@ -6,6 +6,8 @@
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
class
SoftmaxOpTest
:
public
OpsTestBase
{};
...
...
@@ -102,4 +104,6 @@ TEST_F(SoftmaxOpTest, OPENCLUnAligned) {
Complex
<
DeviceType
::
OPENCL
>
({
5
,
211
,
107
,
1
});
}
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/space_to_batch.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,7 @@
#include "mace/ops/space_to_batch.h"
namespace
mace
{
namespace
ops
{
void
Register_SpaceToBatchND
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"SpaceToBatchND"
)
...
...
@@ -19,4 +20,5 @@ void Register_SpaceToBatchND(OperatorRegistry *op_registry) {
SpaceToBatchNDOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace ops
}
// namespace mace
mace/ops/space_to_batch.h
浏览文件 @
2944863e
...
...
@@ -12,6 +12,7 @@
#include "mace/kernels/space_to_batch.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
typename
T
>
class
SpaceToBatchNDOp
:
public
Operator
<
D
,
T
>
{
...
...
@@ -72,6 +73,7 @@ class SpaceToBatchNDOp : public Operator<D, T> {
OP_OUTPUT_TAGS
(
OUTPUT
);
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_SPACE_TO_BATCH_H_
mace/ops/space_to_batch_benchmark.cc
浏览文件 @
2944863e
...
...
@@ -7,6 +7,9 @@
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
template
<
DeviceType
D
,
typename
T
>
static
void
BMSpaceToBatch
(
int
iters
,
int
batch
,
int
height
,
int
width
,
int
channels
,
int
shape
)
{
...
...
@@ -55,4 +58,7 @@ static void BMSpaceToBatch(
BM_SPACE_TO_BATCH
(
128
,
16
,
16
,
128
,
2
);
BM_SPACE_TO_BATCH
(
1
,
256
,
256
,
32
,
2
);
BM_SPACE_TO_BATCH
(
1
,
256
,
256
,
32
,
4
);
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/space_to_batch_test.cc
浏览文件 @
2944863e
...
...
@@ -3,10 +3,13 @@
//
#include <fstream>
#include "gtest/gtest.h"
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
template
<
DeviceType
D
>
void
RunSpaceToBatch
(
const
std
::
vector
<
index_t
>
&
input_shape
,
...
...
@@ -217,4 +220,6 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) {
// space_tensor.get());
//}
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/winograd_convolution_test.cc
浏览文件 @
2944863e
...
...
@@ -3,11 +3,14 @@
//
#include <fstream>
#include "mace/core/operator.h"
#include "mace/kernels/conv_pool_2d_util.h"
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
class
WinogradConvlutionTest
:
public
OpsTestBase
{};
...
...
@@ -40,7 +43,7 @@ void WinogradConvolution(const index_t batch,
const
index_t
in_channels
,
const
index_t
out_channels
,
const
Padding
padding
)
{
srand
(
time
(
NULL
));
//
srand(time(NULL));
// Construct graph
OpsTestNet
net
;
...
...
@@ -157,7 +160,7 @@ void WinogradConvolutionWithPad(const index_t batch,
const
index_t
in_channels
,
const
index_t
out_channels
,
const
int
padding
)
{
srand
(
time
(
NULL
));
//
srand(time(NULL));
// Construct graph
OpsTestNet
net
;
...
...
@@ -246,9 +249,6 @@ void WinogradConvolutionWithPad(const index_t batch,
}
}
TEST_F
(
WinogradConvlutionTest
,
UnAlignedConvolutionPad2
)
{
WinogradConvolutionWithPad
<
DeviceType
::
OPENCL
,
float
>
(
1
,
64
,
64
,
40
,
19
,
2
);
WinogradConvolutionWithPad
<
DeviceType
::
OPENCL
,
float
>
(
1
,
32
,
32
,
96
,
109
,
2
);
}
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/winograd_inverse_transform.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,7 @@
#include "mace/ops/winograd_inverse_transform.h"
namespace
mace
{
namespace
ops
{
void
Register_WinogradInverseTransform
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"WinogradInverseTransform"
)
...
...
@@ -19,4 +20,5 @@ void Register_WinogradInverseTransform(OperatorRegistry *op_registry) {
WinogradInverseTransformOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace ops
}
// namespace mace
mace/ops/winograd_inverse_transform.h
浏览文件 @
2944863e
...
...
@@ -13,6 +13,7 @@
#include "mace/kernels/winograd_transform.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
typename
T
>
class
WinogradInverseTransformOp
:
public
Operator
<
D
,
T
>
{
...
...
@@ -43,6 +44,7 @@ class WinogradInverseTransformOp : public Operator<D, T> {
OP_OUTPUT_TAGS
(
OUTPUT
);
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_WINOGRAD_INVERSE_TRANSFORM_H_
mace/ops/winograd_transform.cc
浏览文件 @
2944863e
...
...
@@ -5,6 +5,7 @@
#include "mace/ops/winograd_transform.h"
namespace
mace
{
namespace
ops
{
void
Register_WinogradTransform
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"WinogradTransform"
)
...
...
@@ -19,4 +20,5 @@ void Register_WinogradTransform(OperatorRegistry *op_registry) {
WinogradTransformOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace ops
}
// namespace mace
mace/ops/winograd_transform.h
浏览文件 @
2944863e
...
...
@@ -11,6 +11,7 @@
#include "mace/kernels/winograd_transform.h"
namespace
mace
{
namespace
ops
{
template
<
DeviceType
D
,
typename
T
>
class
WinogradTransformOp
:
public
Operator
<
D
,
T
>
{
...
...
@@ -37,6 +38,7 @@ class WinogradTransformOp : public Operator<D, T> {
OP_OUTPUT_TAGS
(
OUTPUT
);
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_WINOGRAD_TRANSFORM_H_
mace/ops/winograd_transform_benchmark.cc
浏览文件 @
2944863e
...
...
@@ -7,6 +7,9 @@
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
template
<
DeviceType
D
,
typename
T
>
static
void
BMWinogradTransform
(
int
iters
,
int
batch
,
int
height
,
int
width
,
int
channels
)
{
...
...
@@ -105,4 +108,6 @@ BM_WINOGRAD_INVERSE_TRANSFORM(1, 14, 14, 32);
BM_WINOGRAD_INVERSE_TRANSFORM
(
1
,
62
,
62
,
32
);
BM_WINOGRAD_INVERSE_TRANSFORM
(
1
,
126
,
126
,
32
);
}
// namespace test
}
// namespace ops
}
// namespace mace
tools/benchmark.sh
浏览文件 @
2944863e
...
...
@@ -18,7 +18,13 @@ OPTION_ARGS=$3
echo
$OPTION_ARGS
DEVICE_ID
=
`
echo_device_id_by_soc
$TARGET_SOC
`
RESULT_VALUE
=
`
echo_device_id_by_soc
$TARGET_SOC
`
if
[
$?
-ne
0
]
;
then
echo
$RESULT_VALUE
exit
1
else
DEVICE_ID
=
$RESULT_VALUE
fi
if
[
-f
"
$MODEL_OUTPUT_DIR
/benchmark_model"
]
;
then
rm
-rf
$MODEL_OUTPUT_DIR
/benchmark_model
...
...
tools/build_run_throughput_test.sh
浏览文件 @
2944863e
...
...
@@ -17,7 +17,13 @@ RUN_SECONDS=$2
MERGED_LIB_FILE
=
$3
MODEL_INPUT_DIR
=
$4
DEVICE_ID
=
`
echo_device_id_by_soc
$TARGET_SOC
`
RESULT_VALUE
=
`
echo_device_id_by_soc
$TARGET_SOC
`
if
[
$?
-ne
0
]
;
then
echo
$RESULT_VALUE
exit
1
else
DEVICE_ID
=
$RESULT_VALUE
fi
if
[
"
$CPU_MODEL_TAG
"
!=
''
]
;
then
CPU_MODEL_TAG_BUILD_FLAGS
=
"--copt=-DMACE_CPU_MODEL_TAG=
${
CPU_MODEL_TAG
}
"
...
...
tools/clear_env.sh
浏览文件 @
2944863e
...
...
@@ -13,7 +13,13 @@ CURRENT_DIR=`dirname $0`
source
${
CURRENT_DIR
}
/env.sh
TARGET_SOC
=
$1
DEVICE_ID
=
`
echo_device_id_by_soc
$TARGET_SOC
`
RESULT_VALUE
=
`
echo_device_id_by_soc
$TARGET_SOC
`
if
[
$?
-ne
0
]
;
then
echo
$RESULT_VALUE
exit
1
else
DEVICE_ID
=
$RESULT_VALUE
fi
if
[
x
"
$TARGET_ABI
"
!=
x
"host"
]
;
then
adb
-s
$DEVICE_ID
shell
rm
-rf
$PHONE_DATA_DIR
||
exit
1
...
...
tools/env.sh
浏览文件 @
2944863e
...
...
@@ -40,6 +40,10 @@ echo_device_id_by_soc()
device_soc
=
`
adb
-s
${
device
}
shell getprop |
grep
ro.board.platform |
cut
-d
[
-f3
|
cut
-d
]
-f1
`
if
[
x
"
$TARGET_SOC
"
=
x
"
$device_soc
"
]
;
then
echo
"
$device
"
return
0
fi
done
echo
"MACE ERROR: Not found device with soc
${
TARGET_SOC
}
"
return
1
}
tools/generate_production_code.sh
浏览文件 @
2944863e
...
...
@@ -16,7 +16,13 @@ TARGET_SOC=$1
CL_BIN_DIRS
=
$2
PULL_OR_NOT
=
$3
DEVICE_ID
=
`
echo_device_id_by_soc
$TARGET_SOC
`
RESULT_VALUE
=
`
echo_device_id_by_soc
$TARGET_SOC
`
if
[
$?
-ne
0
]
;
then
echo
$RESULT_VALUE
exit
1
else
DEVICE_ID
=
$RESULT_VALUE
fi
if
[
"
$PULL_OR_NOT
"
=
1
]
;
then
CL_BIN_DIR
=
${
CL_BIN_DIRS
}
...
...
tools/tuning_run.sh
浏览文件 @
2944863e
...
...
@@ -22,7 +22,13 @@ OPTION_ARGS=$7
echo
$OPTION_ARGS
DEVICE_ID
=
`
echo_device_id_by_soc
$TARGET_SOC
`
RESULT_VALUE
=
`
echo_device_id_by_soc
$TARGET_SOC
`
if
[
$?
-ne
0
]
;
then
echo
$RESULT_VALUE
exit
1
else
DEVICE_ID
=
$RESULT_VALUE
fi
if
[
x
"
$TARGET_ABI
"
=
x
"host"
]
;
then
MACE_CPP_MIN_VLOG_LEVEL
=
$VLOG_LEVEL
\
...
...
tools/validate_tools.sh
浏览文件 @
2944863e
...
...
@@ -16,7 +16,13 @@ TARGET_SOC=$1
MODEL_OUTPUT_DIR
=
$2
GENERATE_DATA_OR_NOT
=
$3
DEVICE_ID
=
`
echo_device_id_by_soc
$TARGET_SOC
`
RESULT_VALUE
=
`
echo_device_id_by_soc
$TARGET_SOC
`
if
[
$?
-ne
0
]
;
then
echo
$RESULT_VALUE
exit
1
else
DEVICE_ID
=
$RESULT_VALUE
fi
IFS
=
','
read
-r
-a
INPUT_NAMES
<<<
"
${
INPUT_NODES
}
"
IFS
=
','
read
-r
-a
OUTPUT_NAMES
<<<
"
${
OUTPUT_NODES
}
"
...
...
tools/wino_conv.py
浏览文件 @
2944863e
...
...
@@ -2,22 +2,89 @@ import numpy as np
import
math
import
tensorflow
as
tf
A_T
=
np
.
array
([[
1
,
1
,
1
,
0
],
[
0
,
1
,
-
1
,
-
1
]]).
astype
(
np
.
float32
)
A
=
np
.
transpose
(
A_T
)
B_T
=
np
.
array
([
A_T
=
{}
A
=
{}
B_T
=
{}
B
=
{}
G
=
{}
G_T
=
{}
# f(2, 3)
A_T
[
4
]
=
np
.
array
([[
1
,
1
,
1
,
0
],
[
0
,
1
,
-
1
,
-
1
]]).
astype
(
np
.
float32
)
A
[
4
]
=
np
.
transpose
(
A_T
[
4
])
B_T
[
4
]
=
np
.
array
([
[
1
,
0
,
-
1
,
0
],
[
0
,
1
,
1
,
0
],
[
0
,
-
1
,
1
,
0
],
[
0
,
1
,
0
,
-
1
]
]).
astype
(
np
.
float32
)
B
=
np
.
transpose
(
B_T
)
G
=
np
.
array
([
B
[
4
]
=
np
.
transpose
(
B_T
[
4
]
)
G
[
4
]
=
np
.
array
([
[
1
,
0
,
0
],
[
0.5
,
0.5
,
0.5
],
[
0.5
,
-
0.5
,
0.5
],
[
0
,
0
,
1
],
]).
astype
(
np
.
float32
)
G_T
=
np
.
transpose
(
G
)
G_T
[
4
]
=
np
.
transpose
(
G
[
4
])
# f(4, 3)
A_T
[
6
]
=
np
.
array
([
[
1
,
1
,
1
,
1
,
1
,
0
],
[
0
,
1
,
-
1
,
2
,
-
2
,
0
],
[
0
,
1
,
1
,
4
,
4
,
0
],
[
0
,
1
,
-
1
,
8
,
-
8
,
1
],
]).
astype
(
np
.
float32
)
A
[
6
]
=
np
.
transpose
(
A_T
[
6
])
B_T
[
6
]
=
np
.
array
([
[
4
,
0
,
-
5
,
0
,
1
,
0
],
[
0
,
-
4
,
-
4
,
1
,
1
,
0
],
[
0
,
4
,
-
4
,
-
1
,
1
,
0
],
[
0
,
-
2
,
-
1
,
2
,
1
,
0
],
[
0
,
2
,
-
1
,
-
2
,
1
,
0
],
[
0
,
4
,
0
,
-
5
,
0
,
1
],
]).
astype
(
np
.
float32
)
B
[
6
]
=
np
.
transpose
(
B_T
[
6
])
G
[
6
]
=
np
.
array
([
[
1
/
4.0
,
0
,
0
],
[
-
1
/
6.0
,
-
1
/
6.0
,
-
1
/
6.0
],
[
-
1
/
6.0
,
1
/
6.0
,
-
1
/
6.0
],
[
1
/
24.0
,
1
/
12.0
,
1
/
6.0
],
[
1
/
24.0
,
-
1
/
12.0
,
1
/
6.0
],
[
0
,
0
,
1
],
]).
astype
(
np
.
float32
)
G_T
[
6
]
=
np
.
transpose
(
G
[
6
])
# f(6, 3)
A_T
[
8
]
=
np
.
array
([
[
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
],
[
0
,
1
,
-
1
,
2
,
-
2
,
1
/
2.
,
-
1
/
2.
,
0
],
[
0
,
1
,
1
,
4
,
4
,
1
/
4.
,
1
/
4.
,
0
],
[
0
,
1
,
-
1
,
8
,
-
8
,
1
/
8.
,
-
1
/
8.
,
0
],
[
0
,
1
,
1
,
16
,
16
,
1
/
16.
,
1
/
16.
,
0
],
[
0
,
1
,
-
1
,
32
,
-
32
,
1
/
32.
,
-
1
/
32.
,
1
],
]).
astype
(
np
.
float32
)
A
[
8
]
=
np
.
transpose
(
A_T
[
8
])
B_T
[
8
]
=
np
.
array
([
[
1
,
0
,
-
21
/
4.
,
0
,
21
/
4.
,
0
,
-
1
,
0
],
[
0
,
1
,
1
,
-
17
/
4.
,
-
17
/
4.
,
1
,
1
,
0
],
[
0
,
-
1
,
1
,
17
/
4.
,
-
17
/
4.
,
-
1
,
1
,
0
],
[
0
,
1
/
2.
,
1
/
4.
,
-
5
/
2.
,
-
5
/
4.
,
2
,
1
,
0
],
[
0
,
-
1
/
2.
,
1
/
4.
,
5
/
2.
,
-
5
/
4.
,
-
2
,
1
,
0
],
[
0
,
2
,
4
,
-
5
/
2.
,
-
5
,
1
/
2.
,
1
,
0
],
[
0
,
-
2
,
4
,
5
/
2.
,
-
5
,
-
1
/
2.
,
1
,
0
],
[
0
,
-
1
,
0
,
21
/
4.
,
0
,
-
21
/
4.
,
0
,
1
],
]).
astype
(
np
.
float32
)
B
[
8
]
=
np
.
transpose
(
B_T
[
8
])
G
[
8
]
=
np
.
array
([
[
1
,
0
,
0
],
[
-
2
/
9.
,
-
2
/
9.
,
-
2
/
9.
],
[
-
2
/
9.
,
2
/
9.
,
-
2
/
9.
],
[
1
/
90.
,
1
/
45.
,
2
/
45.
],
[
1
/
90.
,
-
1
/
45.
,
2
/
45.
],
[
32
/
45.
,
16
/
45.
,
8
/
45.
],
[
32
/
45.
,
-
16
/
45.
,
8
/
45.
],
[
0
,
0
,
1
],
]).
astype
(
np
.
float32
)
G_T
[
8
]
=
np
.
transpose
(
G
[
8
])
def
output_shape
(
input_shape
,
filter_shape
):
...
...
@@ -29,55 +96,54 @@ def output_shape(input_shape, filter_shape):
return
out_shape
def
winog_conv
(
input
,
filter
):
m
=
2
r
=
3
def
winograd_conv
(
m
,
r
,
input
,
filter
):
alpha
=
m
+
r
-
1
print
'Winograd(m = %d, r = %d, tile size=%d'
%
(
m
,
r
,
alpha
)
alpha_square
=
alpha
*
alpha
input_shape
=
input
.
shape
filter_shape
=
filter
.
shape
out_shape
=
output_shape
(
input_shape
,
filter_shape
)
K
=
filter_shape
[
0
]
C
=
input_shape
[
1
]
U
=
np
.
zeros
((
K
*
16
,
C
))
U
=
np
.
zeros
((
K
*
alpha_square
,
C
))
for
k
in
range
(
K
):
for
c
in
range
(
C
):
u
=
np
.
dot
(
np
.
dot
(
G
,
filter
[
k
,
c
,
:,
:]),
G_T
)
for
i
in
range
(
4
):
for
j
in
range
(
4
)
:
U
[(
i
*
4
+
j
)
*
K
+
k
,
c
]
=
u
[
i
,
j
]
u
=
np
.
dot
(
np
.
dot
(
G
[
alpha
],
filter
[
k
,
c
,
:,
:]),
G_T
[
alpha
]
)
for
i
in
range
(
alpha
):
for
j
in
range
(
alpha
)
:
U
[(
i
*
alpha
+
j
)
*
K
+
k
,
c
]
=
u
[
i
,
j
]
print
'filter out: '
,
U
.
shape
print
U
[
0
,
0
]
U
.
astype
(
np
.
float32
).
tofile
(
"filter_out"
)
rounded_h
=
int
(
math
.
ceil
(
out_shape
[
2
]
/
2.0
))
rounded_w
=
int
(
math
.
ceil
(
out_shape
[
3
]
/
2.0
))
rounded_h
=
int
(
math
.
ceil
(
out_shape
[
2
]
/
(
m
*
1.0
)
))
rounded_w
=
int
(
math
.
ceil
(
out_shape
[
3
]
/
(
m
*
1.0
)
))
P
=
input_shape
[
0
]
*
rounded_h
*
rounded_w
V
=
np
.
zeros
((
C
*
16
,
P
))
V
=
np
.
zeros
((
C
*
alpha_square
,
P
))
for
p
in
range
(
P
):
for
c
in
range
(
C
):
n
=
p
/
(
rounded_w
*
rounded_h
)
t
=
p
%
(
rounded_h
*
rounded_w
)
h_idx
=
t
/
rounded_w
w_idx
=
t
%
rounded_w
h_start
=
h_idx
*
2
w_start
=
w_idx
*
2
h_end
=
min
(
h_start
+
4
,
input_shape
[
2
])
w_end
=
min
(
w_start
+
4
,
input_shape
[
3
])
d
=
np
.
zeros
((
4
,
4
))
d
[
0
:
h_end
-
h_start
,
0
:
w_end
-
w_start
]
=
input
[
n
,
c
,
h_start
:
h_end
,
w_start
:
w_end
]
v
=
np
.
dot
(
np
.
dot
(
B_T
,
d
),
B
)
for
i
in
range
(
4
):
for
j
in
range
(
4
):
V
[(
i
*
4
+
j
)
*
C
+
c
,
p
]
=
v
[
i
,
j
]
tmp
=
V
.
reshape
(
16
,
C
,
P
,
1
)
h_start
=
h_idx
*
m
w_start
=
w_idx
*
m
h_end
=
min
(
h_start
+
alpha
,
input_shape
[
2
])
w_end
=
min
(
w_start
+
alpha
,
input_shape
[
3
])
d
=
np
.
zeros
((
alpha
,
alpha
))
d
[
0
:
h_end
-
h_start
,
0
:
w_end
-
w_start
]
=
\
input
[
n
,
c
,
h_start
:
h_end
,
w_start
:
w_end
]
v
=
np
.
dot
(
np
.
dot
(
B_T
[
alpha
],
d
),
B
[
alpha
])
for
i
in
range
(
alpha
):
for
j
in
range
(
alpha
):
V
[(
i
*
alpha
+
j
)
*
C
+
c
,
p
]
=
v
[
i
,
j
]
tmp
=
V
.
reshape
(
alpha_square
,
C
,
P
,
1
)
print
'input out: '
,
tmp
.
shape
tmp
.
astype
(
np
.
float32
).
tofile
(
"C"
)
M
=
np
.
zeros
((
16
*
K
,
P
))
for
i
in
range
(
alpha
*
alpha
):
M
=
np
.
zeros
((
alpha_square
*
K
,
P
))
for
i
in
range
(
alpha
_square
):
u
=
U
[
i
*
K
:
(
i
+
1
)
*
K
,
:]
v
=
V
[
i
*
C
:
(
i
+
1
)
*
C
,
:]
M
[
i
*
K
:
(
i
+
1
)
*
K
,
:]
=
np
.
dot
(
u
,
v
)
...
...
@@ -87,17 +153,17 @@ def winog_conv(input, filter):
res
=
np
.
zeros
((
out_shape
[
0
],
out_shape
[
2
],
out_shape
[
3
],
out_shape
[
1
]))
for
k
in
range
(
K
):
for
b
in
range
(
P
):
m
=
np
.
zeros
((
4
,
4
))
for
i
in
range
(
4
):
for
j
in
range
(
4
):
m
[
i
][
j
]
=
M
[(
i
*
4
+
j
)
*
K
+
k
,
b
]
y
=
np
.
dot
(
np
.
dot
(
A_T
,
m
),
A
)
for
i
in
range
(
2
):
for
j
in
range
(
2
):
tm
=
np
.
zeros
((
alpha
,
alpha
))
for
i
in
range
(
alpha
):
for
j
in
range
(
alpha
):
tm
[
i
][
j
]
=
M
[(
i
*
alpha
+
j
)
*
K
+
k
,
b
]
y
=
np
.
dot
(
np
.
dot
(
A_T
[
alpha
],
tm
),
A
[
alpha
]
)
for
i
in
range
(
m
):
for
j
in
range
(
m
):
n
=
b
/
(
rounded_h
*
rounded_w
)
t
=
b
%
(
rounded_h
*
rounded_w
)
p
=
(
t
/
rounded_w
)
*
2
+
i
q
=
(
t
%
rounded_w
)
*
2
+
j
p
=
(
t
/
rounded_w
)
*
m
+
i
q
=
(
t
%
rounded_w
)
*
m
+
j
if
p
>=
out_shape
[
2
]
or
q
>=
out_shape
[
3
]:
continue
res
[
n
,
p
,
q
,
k
]
=
y
[
i
,
j
]
...
...
@@ -115,25 +181,27 @@ def tf_conv(input, filter):
def
main
():
input
=
np
.
random
.
random
([
7
,
61
,
71
,
31
]).
astype
(
np
.
float32
)
input
=
np
.
random
.
random
([
5
,
23
,
29
,
15
]).
astype
(
np
.
float32
)
# input = np.fromfile(file="A", dtype=np.float32)
# input = input.reshape(1, 3, 3, 5)
print
'input shape: '
,
input
.
shape
input
.
tofile
(
"A"
)
filter
=
np
.
random
.
random
([
3
,
3
,
31
,
31
]).
astype
(
np
.
float32
)
#
input.tofile("A")
filter
=
np
.
random
.
random
([
3
,
3
,
15
,
13
]).
astype
(
np
.
float32
)
tf_out
=
tf_conv
(
input
,
filter
)
input
=
input
.
transpose
((
0
,
3
,
1
,
2
))
filter
=
filter
.
transpose
((
3
,
2
,
0
,
1
))
print
'filter shape: '
,
filter
.
shape
filter
.
tofile
(
"filter_in"
)
winog_out
=
winog_conv
(
input
,
filter
)
res
=
np
.
allclose
(
tf_out
,
winog_out
)
if
res
:
print
"=========Pass========="
else
:
print
"=========Failed========="
print
"TF: "
,
tf_out
print
"Winograd: "
,
winog_out
# filter.tofile("filter_in")
for
i
in
[
2
,
4
,
6
]:
print
"==========f(%d,3)=========="
%
i
winograd_out
=
winograd_conv
(
i
,
3
,
input
,
filter
)
res
=
np
.
allclose
(
tf_out
,
winograd_out
)
if
res
:
print
"=========Pass========="
else
:
print
"=========Failed======="
print
"TF: "
,
tf_out
print
"Winograd: "
,
winograd_out
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录