Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
79e6e50b
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
79e6e50b
编写于
3月 05, 2018
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix PReLU bug: not fuse PReLU anymore.
上级
de12a6df
变更
34
隐藏空白更改
内联
并排
Showing
34 changed file
with
155 addition
and
204 deletion
+155
-204
mace/kernels/activation.h
mace/kernels/activation.h
+39
-26
mace/kernels/batch_norm.h
mace/kernels/batch_norm.h
+7
-12
mace/kernels/conv_2d.h
mace/kernels/conv_2d.h
+7
-14
mace/kernels/depthwise_conv2d.h
mace/kernels/depthwise_conv2d.h
+7
-14
mace/kernels/fully_connected.h
mace/kernels/fully_connected.h
+7
-12
mace/kernels/opencl/activation_opencl.cc
mace/kernels/opencl/activation_opencl.cc
+4
-1
mace/kernels/opencl/batch_norm_opencl.cc
mace/kernels/opencl/batch_norm_opencl.cc
+0
-4
mace/kernels/opencl/cl/activation.cl
mace/kernels/opencl/cl/activation.cl
+9
-2
mace/kernels/opencl/cl/batch_norm.cl
mace/kernels/opencl/cl/batch_norm.cl
+3
-4
mace/kernels/opencl/cl/common.h
mace/kernels/opencl/cl/common.h
+4
-2
mace/kernels/opencl/cl/conv_2d.cl
mace/kernels/opencl/cl/conv_2d.cl
+5
-6
mace/kernels/opencl/cl/conv_2d_1x1.cl
mace/kernels/opencl/cl/conv_2d_1x1.cl
+5
-6
mace/kernels/opencl/cl/conv_2d_3x3.cl
mace/kernels/opencl/cl/conv_2d_3x3.cl
+6
-7
mace/kernels/opencl/cl/depthwise_conv2d.cl
mace/kernels/opencl/cl/depthwise_conv2d.cl
+10
-12
mace/kernels/opencl/cl/fully_connected.cl
mace/kernels/opencl/cl/fully_connected.cl
+3
-4
mace/kernels/opencl/cl/winograd_transform.cl
mace/kernels/opencl/cl/winograd_transform.cl
+6
-9
mace/kernels/opencl/conv_2d_opencl.cc
mace/kernels/opencl/conv_2d_opencl.cc
+3
-8
mace/kernels/opencl/conv_2d_opencl_1x1.cc
mace/kernels/opencl/conv_2d_opencl_1x1.cc
+0
-5
mace/kernels/opencl/conv_2d_opencl_3x3.cc
mace/kernels/opencl/conv_2d_opencl_3x3.cc
+0
-5
mace/kernels/opencl/conv_2d_opencl_general.cc
mace/kernels/opencl/conv_2d_opencl_general.cc
+0
-5
mace/kernels/opencl/depthwise_conv_opencl.cc
mace/kernels/opencl/depthwise_conv_opencl.cc
+3
-8
mace/kernels/opencl/fully_connected_opencl.cc
mace/kernels/opencl/fully_connected_opencl.cc
+0
-4
mace/kernels/opencl/winograd_transform.cc
mace/kernels/opencl/winograd_transform.cc
+0
-1
mace/kernels/winograd_transform.h
mace/kernels/winograd_transform.h
+6
-11
mace/ops/activation.h
mace/ops/activation.h
+4
-4
mace/ops/activation_test.cc
mace/ops/activation_test.cc
+9
-4
mace/ops/batch_norm.h
mace/ops/batch_norm.h
+1
-1
mace/ops/conv_2d.h
mace/ops/conv_2d.h
+0
-1
mace/ops/conv_2d_test.cc
mace/ops/conv_2d_test.cc
+2
-2
mace/ops/depthwise_conv2d.h
mace/ops/depthwise_conv2d.h
+1
-2
mace/ops/folded_batch_norm.h
mace/ops/folded_batch_norm.h
+1
-2
mace/ops/fully_connected.h
mace/ops/fully_connected.h
+1
-2
mace/ops/fused_conv_2d.h
mace/ops/fused_conv_2d.h
+1
-2
mace/ops/winograd_inverse_transform.h
mace/ops/winograd_inverse_transform.h
+1
-2
未找到文件。
mace/kernels/activation.h
浏览文件 @
79e6e50b
...
...
@@ -46,8 +46,7 @@ void DoActivation(const T *input_ptr,
T
*
output_ptr
,
const
index_t
size
,
const
ActivationType
type
,
const
float
relux_max_limit
,
const
float
prelu_alpha
)
{
const
float
relux_max_limit
)
{
MACE_CHECK
(
DataTypeToEnum
<
T
>::
value
!=
DataType
::
DT_HALF
);
switch
(
type
)
{
...
...
@@ -66,17 +65,6 @@ void DoActivation(const T *input_ptr,
static_cast
<
T
>
(
relux_max_limit
));
}
break
;
case
PRELU
:
#pragma omp parallel for
for
(
index_t
i
=
0
;
i
<
size
;
++
i
)
{
T
in
=
input_ptr
[
i
];
if
(
in
<
0
)
{
output_ptr
[
i
]
=
in
*
prelu_alpha
;
}
else
{
output_ptr
[
i
]
=
in
;
}
}
break
;
case
TANH
:
#pragma omp parallel for
for
(
index_t
i
=
0
;
i
<
size
;
++
i
)
{
...
...
@@ -95,45 +83,70 @@ void DoActivation(const T *input_ptr,
}
}
template
<
typename
T
>
void
PReLUActivation
(
const
T
*
input_ptr
,
const
index_t
size
,
const
index_t
input_chan
,
const
T
*
alpha_ptr
,
T
*
output_ptr
)
{
#pragma omp parallel for
for
(
index_t
i
=
0
;
i
<
size
;
++
i
)
{
const
index_t
chan_idx
=
i
%
input_chan
;
T
in
=
input_ptr
[
i
];
if
(
in
<
0
)
{
output_ptr
[
i
]
=
in
*
alpha_ptr
[
chan_idx
];
}
else
{
output_ptr
[
i
]
=
in
;
}
}
}
template
<
DeviceType
D
,
typename
T
>
class
ActivationFunctor
{
public:
ActivationFunctor
(
ActivationType
type
,
T
relux_max_limit
,
T
prelu_alpha
)
ActivationFunctor
(
ActivationType
type
,
T
relux_max_limit
)
:
activation_
(
type
),
relux_max_limit_
(
relux_max_limit
),
prelu_alpha_
(
prelu_alpha
)
{}
relux_max_limit_
(
relux_max_limit
){}
void
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
)
{
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
alpha
,
Tensor
*
output
,
StatsFuture
*
future
)
{
const
T
*
input_ptr
=
input
->
data
<
T
>
();
T
*
output_ptr
=
output
->
mutable_data
<
T
>
();
DoActivation
(
input_ptr
,
output_ptr
,
output
->
size
(),
activation_
,
relux_max_limit_
,
prelu_alpha_
);
if
(
activation_
==
PRELU
)
{
const
T
*
alpha_ptr
=
alpha
==
nullptr
?
nullptr
:
alpha
->
data
<
T
>
();
PReLUActivation
(
input_ptr
,
output
->
size
(),
input
->
dim
(
3
),
alpha_ptr
,
output_ptr
);
}
else
{
DoActivation
(
input_ptr
,
output_ptr
,
output
->
size
(),
activation_
,
relux_max_limit_
);
}
}
private:
ActivationType
activation_
;
T
relux_max_limit_
;
T
prelu_alpha_
;
};
template
<
>
void
ActivationFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
);
const
Tensor
*
input
,
const
Tensor
*
alpha
,
Tensor
*
output
,
StatsFuture
*
future
);
template
<
typename
T
>
class
ActivationFunctor
<
DeviceType
::
OPENCL
,
T
>
{
public:
ActivationFunctor
(
ActivationType
type
,
T
relux_max_limit
,
T
prelu_alpha
)
ActivationFunctor
(
ActivationType
type
,
T
relux_max_limit
)
:
activation_
(
type
),
relux_max_limit_
(
relux_max_limit
),
prelu_alpha_
(
prelu_alpha
)
{}
relux_max_limit_
(
relux_max_limit
){}
void
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
);
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
alpha
,
Tensor
*
output
,
StatsFuture
*
future
);
private:
ActivationType
activation_
;
T
relux_max_limit_
;
T
prelu_alpha_
;
cl
::
Kernel
kernel_
;
std
::
string
tuning_key_prefix_
;
};
...
...
mace/kernels/batch_norm.h
浏览文件 @
79e6e50b
...
...
@@ -21,27 +21,23 @@ namespace kernels {
struct
BatchNormFunctorBase
{
BatchNormFunctorBase
(
bool
folded_constant
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
prelu_alpha
)
const
float
relux_max_limit
)
:
folded_constant_
(
folded_constant
),
activation_
(
activation
),
relux_max_limit_
(
relux_max_limit
),
prelu_alpha_
(
prelu_alpha
)
{}
relux_max_limit_
(
relux_max_limit
){}
const
bool
folded_constant_
;
const
ActivationType
activation_
;
const
float
relux_max_limit_
;
const
float
prelu_alpha_
;
};
template
<
DeviceType
D
,
typename
T
>
struct
BatchNormFunctor
:
BatchNormFunctorBase
{
BatchNormFunctor
(
const
bool
folded_constant
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
prelu_alpha
)
const
float
relux_max_limit
)
:
BatchNormFunctorBase
(
folded_constant
,
activation
,
relux_max_limit
,
prelu_alpha
)
{}
folded_constant
,
activation
,
relux_max_limit
)
{}
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
scale
,
...
...
@@ -132,7 +128,7 @@ struct BatchNormFunctor : BatchNormFunctorBase {
}
}
DoActivation
(
output_ptr
,
output_ptr
,
output
->
NumElements
(),
activation_
,
relux_max_limit_
,
prelu_alpha_
);
relux_max_limit_
);
}
};
...
...
@@ -150,10 +146,9 @@ template <typename T>
struct
BatchNormFunctor
<
DeviceType
::
OPENCL
,
T
>
:
BatchNormFunctorBase
{
BatchNormFunctor
(
const
bool
folded_constant
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
prelu_alpha
)
const
float
relux_max_limit
)
:
BatchNormFunctorBase
(
folded_constant
,
activation
,
relux_max_limit
,
prelu_alpha
)
{}
folded_constant
,
activation
,
relux_max_limit
)
{}
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
scale
,
const
Tensor
*
offset
,
...
...
mace/kernels/conv_2d.h
浏览文件 @
79e6e50b
...
...
@@ -182,15 +182,13 @@ struct Conv2dFunctorBase {
const
std
::
vector
<
int
>
&
paddings
,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
prelu_alpha
)
const
float
relux_max_limit
)
:
strides_
(
strides
),
padding_type_
(
padding_type
),
paddings_
(
paddings
),
dilations_
(
dilations
),
activation_
(
activation
),
relux_max_limit_
(
relux_max_limit
),
prelu_alpha_
(
prelu_alpha
)
{}
relux_max_limit_
(
relux_max_limit
){}
const
int
*
strides_
;
// [stride_h, stride_w]
const
Padding
padding_type_
;
...
...
@@ -198,7 +196,6 @@ struct Conv2dFunctorBase {
const
int
*
dilations_
;
// [dilation_h, dilation_w]
const
ActivationType
activation_
;
const
float
relux_max_limit_
;
const
float
prelu_alpha_
;
};
template
<
DeviceType
D
,
typename
T
>
...
...
@@ -208,15 +205,13 @@ struct Conv2dFunctor : Conv2dFunctorBase {
const
std
::
vector
<
int
>
&
paddings
,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
prelu_alpha
)
const
float
relux_max_limit
)
:
Conv2dFunctorBase
(
strides
,
padding_type
,
paddings
,
dilations
,
activation
,
relux_max_limit
,
prelu_alpha
)
{}
relux_max_limit
)
{}
void
operator
()(
const
Tensor
*
input
,
// NHWC
const
Tensor
*
filter
,
// HWOI
...
...
@@ -622,7 +617,7 @@ struct Conv2dFunctor : Conv2dFunctorBase {
}
}
DoActivation
(
output_data
,
output_data
,
output
->
NumElements
(),
activation_
,
relux_max_limit_
,
prelu_alpha_
);
relux_max_limit_
);
}
};
...
...
@@ -640,15 +635,13 @@ struct Conv2dFunctor<DeviceType::OPENCL, T> : Conv2dFunctorBase {
const
std
::
vector
<
int
>
&
paddings
,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
prelu_alpha
)
const
float
relux_max_limit
)
:
Conv2dFunctorBase
(
strides
,
padding_type
,
paddings
,
dilations
,
activation
,
relux_max_limit
,
prelu_alpha
)
{}
relux_max_limit
)
{}
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
filter
,
...
...
mace/kernels/depthwise_conv2d.h
浏览文件 @
79e6e50b
...
...
@@ -241,15 +241,13 @@ struct DepthwiseConv2dFunctorBase {
const
std
::
vector
<
int
>
&
paddings
,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
prelu_alpha
)
const
float
relux_max_limit
)
:
strides_
(
strides
),
padding_type_
(
padding_type
),
paddings_
(
paddings
),
dilations_
(
dilations
),
activation_
(
activation
),
relux_max_limit_
(
relux_max_limit
),
prelu_alpha_
(
prelu_alpha
)
{}
relux_max_limit_
(
relux_max_limit
){}
const
int
*
strides_
;
// [stride_h, stride_w]
const
Padding
padding_type_
;
...
...
@@ -257,7 +255,6 @@ struct DepthwiseConv2dFunctorBase {
const
int
*
dilations_
;
// [dilation_h, dilation_w]
const
ActivationType
activation_
;
const
float
relux_max_limit_
;
const
float
prelu_alpha_
;
};
template
<
DeviceType
D
,
typename
T
>
...
...
@@ -267,15 +264,13 @@ struct DepthwiseConv2dFunctor : public DepthwiseConv2dFunctorBase {
const
std
::
vector
<
int
>
&
paddings
,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
prelu_alpha
)
const
float
relux_max_limit
)
:
DepthwiseConv2dFunctorBase
(
strides
,
padding_type
,
paddings
,
dilations
,
activation
,
relux_max_limit
,
prelu_alpha
)
{}
relux_max_limit
)
{}
void
operator
()(
const
Tensor
*
input
,
// NHWC
const
Tensor
*
filter
,
// HWIM
...
...
@@ -408,7 +403,7 @@ struct DepthwiseConv2dFunctor : public DepthwiseConv2dFunctorBase {
output_ptr
=
output
->
mutable_data
<
T
>
();
DoActivation
(
output_ptr
,
output_ptr
,
output
->
NumElements
(),
activation_
,
relux_max_limit_
,
prelu_alpha_
);
relux_max_limit_
);
}
};
...
...
@@ -428,15 +423,13 @@ struct DepthwiseConv2dFunctor<DeviceType::OPENCL, T>
const
std
::
vector
<
int
>
&
paddings
,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
prelu_alpha
)
const
float
relux_max_limit
)
:
DepthwiseConv2dFunctorBase
(
strides
,
padding_type
,
paddings
,
dilations
,
activation
,
relux_max_limit
,
prelu_alpha
)
{}
relux_max_limit
)
{}
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
filter
,
...
...
mace/kernels/fully_connected.h
浏览文件 @
79e6e50b
...
...
@@ -15,23 +15,19 @@ namespace kernels {
struct
FullyConnectedBase
{
FullyConnectedBase
(
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
prelu_alpha
)
const
float
relux_max_limit
)
:
activation_
(
activation
),
relux_max_limit_
(
relux_max_limit
),
prelu_alpha_
(
prelu_alpha
)
{}
relux_max_limit_
(
relux_max_limit
){}
const
ActivationType
activation_
;
const
float
relux_max_limit_
;
const
float
prelu_alpha_
;
};
template
<
DeviceType
D
,
typename
T
>
struct
FullyConnectedFunctor
:
FullyConnectedBase
{
FullyConnectedFunctor
(
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
prelu_alpha
)
:
FullyConnectedBase
(
activation
,
relux_max_limit
,
prelu_alpha
)
{}
const
float
relux_max_limit
)
:
FullyConnectedBase
(
activation
,
relux_max_limit
)
{}
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
weight
,
...
...
@@ -70,16 +66,15 @@ struct FullyConnectedFunctor : FullyConnectedBase {
}
DoActivation
(
output_ptr
,
output_ptr
,
output
->
NumElements
(),
activation_
,
relux_max_limit_
,
prelu_alpha_
);
relux_max_limit_
);
}
};
template
<
typename
T
>
struct
FullyConnectedFunctor
<
DeviceType
::
OPENCL
,
T
>
:
FullyConnectedBase
{
FullyConnectedFunctor
(
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
prelu_alpha
)
:
FullyConnectedBase
(
activation
,
relux_max_limit
,
prelu_alpha
)
{}
const
float
relux_max_limit
)
:
FullyConnectedBase
(
activation
,
relux_max_limit
)
{}
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
weight
,
...
...
mace/kernels/opencl/activation_opencl.cc
浏览文件 @
79e6e50b
...
...
@@ -14,6 +14,7 @@ namespace kernels {
template
<
typename
T
>
void
ActivationFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
input
,
const
Tensor
*
alpha
,
Tensor
*
output
,
StatsFuture
*
future
)
{
const
index_t
batch
=
input
->
dim
(
0
);
...
...
@@ -60,8 +61,10 @@ void ActivationFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
runtime
->
BuildKernel
(
"activation"
,
kernel_name
,
built_options
);
int
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
input
->
buffer
())));
if
(
activation_
==
PRELU
)
{
kernel_
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
alpha
->
buffer
())));
}
kernel_
.
setArg
(
idx
++
,
static_cast
<
float
>
(
relux_max_limit_
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
float
>
(
prelu_alpha_
));
kernel_
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
output
->
buffer
())));
}
...
...
mace/kernels/opencl/batch_norm_opencl.cc
浏览文件 @
79e6e50b
...
...
@@ -50,9 +50,6 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
case
RELUX
:
built_options
.
emplace
(
"-DUSE_RELUX"
);
break
;
case
PRELU
:
built_options
.
emplace
(
"-DUSE_PRELU"
);
break
;
case
TANH
:
built_options
.
emplace
(
"-DUSE_TANH"
);
break
;
...
...
@@ -79,7 +76,6 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
}
kernel_
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
output
->
buffer
())));
kernel_
.
setArg
(
idx
++
,
relux_max_limit_
);
kernel_
.
setArg
(
idx
++
,
prelu_alpha_
);
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
...
...
mace/kernels/opencl/cl/activation.cl
浏览文件 @
79e6e50b
#
include
<common.h>
__kernel
void
activation
(
__read_only
image2d_t
input,
#
ifdef
USE_PRELU
__read_only
image2d_t
alpha,
#
endif
__private
const
float
relux_max_limit,
__private
const
float
prelu_alpha,
__write_only
image2d_t
output
)
{
const
int
ch_blk
=
get_global_id
(
0
)
;
const
int
w
=
get_global_id
(
1
)
;
...
...
@@ -11,7 +13,12 @@ __kernel void activation(__read_only image2d_t input,
const
int
pos
=
mad24
(
ch_blk,
width,
w
)
;
DATA_TYPE4
in
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
pos,
hb
))
;
DATA_TYPE4
out
=
do_activation
(
in,
relux_max_limit,
prelu_alpha
)
;
#
ifdef
USE_PRELU
DATA_TYPE4
prelu_alpha
=
READ_IMAGET
(
alpha,
SAMPLER,
(
int2
)(
ch_blk,
0
))
;
DATA_TYPE4
out
=
do_activation
(
in,
prelu_alpha,
relux_max_limit
)
;
#
else
DATA_TYPE4
out
=
do_activation
(
in,
relux_max_limit
)
;
#
endif
WRITE_IMAGET
(
output,
(
int2
)(
pos,
hb
)
,
out
)
;
}
mace/kernels/opencl/cl/batch_norm.cl
浏览文件 @
79e6e50b
...
...
@@ -9,8 +9,7 @@ __kernel void batch_norm(__read_only image2d_t input,
__private
const
float
epsilon,
#
endif
__write_only
image2d_t
output,
__private
const
float
relux_max_limit,
__private
const
float
prelu_alpha
)
{
__private
const
float
relux_max_limit
)
{
const
int
ch_blk
=
get_global_id
(
0
)
;
const
int
w
=
get_global_id
(
1
)
;
const
int
hb
=
get_global_id
(
2
)
;
...
...
@@ -35,8 +34,8 @@ __kernel void batch_norm(__read_only image2d_t input,
DATA_TYPE4
in
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
pos,
hb
))
;
DATA_TYPE4
out
=
mad
(
in,
bn_scale,
bn_offset
)
;
#
if
defined
(
USE_RELU
)
|
| defined(USE_RELUX) || defined(USE_
PRELU) || defined(USE_
TANH) |
|
defined
(
USE_SIGMOID
)
out
=
do_activation
(
out,
relux_max_limit
,
prelu_alpha
)
;
#
if
defined
(
USE_RELU
)
|
| defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out
=
do_activation
(
out,
relux_max_limit
)
;
#
endif
WRITE_IMAGET
(
output,
(
int2
)(
pos,
hb
)
,
out
)
;
...
...
mace/kernels/opencl/cl/common.h
浏览文件 @
79e6e50b
...
...
@@ -22,8 +22,10 @@ __constant sampler_t SAMPLER = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP |
inline
DATA_TYPE4
do_activation
(
DATA_TYPE4
in
,
__private
const
float
relux_max_limit
,
__private
const
float
prelu_alpha
)
{
#ifdef USE_PRELU
DATA_TYPE4
prelu_alpha
,
#endif
__private
const
float
relux_max_limit
)
{
DATA_TYPE4
out
;
#ifdef USE_RELU
out
=
fmax
(
in
,
0
);
...
...
mace/kernels/opencl/cl/conv_2d.cl
浏览文件 @
79e6e50b
...
...
@@ -7,7 +7,6 @@ __kernel void conv_2d(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] */
#
endif
__write_only
image2d_t
output,
__private
const
float
relux_max_limit,
__private
const
float
prelu_alpha,
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
int
in_ch_blks,
...
...
@@ -112,11 +111,11 @@ __kernel void conv_2d(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] */
}
}
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_
PRELU) || defined(USE_
TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
,
prelu_alpha
)
;
out1
=
do_activation
(
out1,
relux_max_limit
,
prelu_alpha
)
;
out2
=
do_activation
(
out2,
relux_max_limit
,
prelu_alpha
)
;
out3
=
do_activation
(
out3,
relux_max_limit
,
prelu_alpha
)
;
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
)
;
out1
=
do_activation
(
out1,
relux_max_limit
)
;
out2
=
do_activation
(
out2,
relux_max_limit
)
;
out3
=
do_activation
(
out3,
relux_max_limit
)
;
#
endif
const
int
out_x_base
=
mul24
(
out_ch_blk,
out_width
)
;
...
...
mace/kernels/opencl/cl/conv_2d_1x1.cl
浏览文件 @
79e6e50b
...
...
@@ -7,7 +7,6 @@ __kernel void conv_2d_1x1(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
#
endif
__write_only
image2d_t
output,
__private
const
float
relux_max_limit,
__private
const
float
prelu_alpha,
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
int
in_ch_blks,
...
...
@@ -86,11 +85,11 @@ __kernel void conv_2d_1x1(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
filter_x_base
+=
4
;
}
#
if
defined
(
USE_RELU
)
|
| defined(USE_RELUX) || defined(USE_
PRELU) || defined(USE_
TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
,
prelu_alpha
)
;
out1
=
do_activation
(
out1,
relux_max_limit
,
prelu_alpha
)
;
out2
=
do_activation
(
out2,
relux_max_limit
,
prelu_alpha
)
;
out3
=
do_activation
(
out3,
relux_max_limit
,
prelu_alpha
)
;
#
if
defined
(
USE_RELU
)
|
| defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
)
;
out1
=
do_activation
(
out1,
relux_max_limit
)
;
out2
=
do_activation
(
out2,
relux_max_limit
)
;
out3
=
do_activation
(
out3,
relux_max_limit
)
;
#
endif
const
int
out_x_base
=
mul24
(
out_ch_blk,
width
)
;
...
...
mace/kernels/opencl/cl/conv_2d_3x3.cl
浏览文件 @
79e6e50b
...
...
@@ -7,7 +7,6 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
#
endif
__write_only
image2d_t
output,
__private
const
float
relux_max_limit,
__private
const
float
prelu_alpha,
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
int
in_ch_blks,
...
...
@@ -120,12 +119,12 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
}
}
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_
PRELU) || defined(USE_
TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
,
prelu_alpha
)
;
out1
=
do_activation
(
out1,
relux_max_limit
,
prelu_alpha
)
;
out2
=
do_activation
(
out2,
relux_max_limit
,
prelu_alpha
)
;
out3
=
do_activation
(
out3,
relux_max_limit
,
prelu_alpha
)
;
out4
=
do_activation
(
out4,
relux_max_limit
,
prelu_alpha
)
;
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
)
;
out1
=
do_activation
(
out1,
relux_max_limit
)
;
out2
=
do_activation
(
out2,
relux_max_limit
)
;
out3
=
do_activation
(
out3,
relux_max_limit
)
;
out4
=
do_activation
(
out4,
relux_max_limit
)
;
#
endif
const
int
out_x_base
=
mul24
(
out_ch_blk,
out_width
)
;
...
...
mace/kernels/opencl/cl/depthwise_conv2d.cl
浏览文件 @
79e6e50b
...
...
@@ -8,7 +8,6 @@ __kernel void depthwise_conv2d(__read_only image2d_t input, /* [c%4 * w * c/4, h
#
endif
__write_only
image2d_t
output,
__private
const
float
relux_max_limit,
__private
const
float
prelu_alpha,
__private
const
short
in_height,
__private
const
short
in_width,
__private
const
short
in_ch_blks,
...
...
@@ -103,11 +102,11 @@ __kernel void depthwise_conv2d(__read_only image2d_t input, /* [c%4 * w * c/4, h
in_hb_idx += dilation_h;
}
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_
PRELU) || defined(USE_
TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit
, prelu_alpha
);
out1 = do_activation(out1, relux_max_limit
, prelu_alpha
);
out2 = do_activation(out2, relux_max_limit
, prelu_alpha
);
out3 = do_activation(out3, relux_max_limit
, prelu_alpha
);
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit);
out1 = do_activation(out1, relux_max_limit);
out2 = do_activation(out2, relux_max_limit);
out3 = do_activation(out3, relux_max_limit);
#endif
const short out_x_base = mul24(out_ch_blk, out_width);
...
...
@@ -134,7 +133,6 @@ __kernel void depthwise_conv2d_s1(__read_only image2d_t input, /* [c%4 * w * c/4
#endif
__write_only image2d_t output,
__private const DATA_TYPE relux_max_limit,
__private const DATA_TYPE prelu_alpha,
__private const short in_height,
__private const short in_width,
__private const short in_ch_blks,
...
...
@@ -220,11 +218,11 @@ __kernel void depthwise_conv2d_s1(__read_only image2d_t input, /* [c%4 * w * c/4
in_hb_idx += 1;
}
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_
PRELU) || defined(USE_
TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
,
prelu_alpha
)
;
out1
=
do_activation
(
out1,
relux_max_limit
,
prelu_alpha
)
;
out2
=
do_activation
(
out2,
relux_max_limit
,
prelu_alpha
)
;
out3
=
do_activation
(
out3,
relux_max_limit
,
prelu_alpha
)
;
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
)
;
out1
=
do_activation
(
out1,
relux_max_limit
)
;
out2
=
do_activation
(
out2,
relux_max_limit
)
;
out3
=
do_activation
(
out3,
relux_max_limit
)
;
#
endif
const
short
out_x_base
=
mul24
(
out_ch_blk,
out_width
)
;
...
...
mace/kernels/opencl/cl/fully_connected.cl
浏览文件 @
79e6e50b
...
...
@@ -10,8 +10,7 @@ __kernel void fully_connected(__read_only image2d_t input,
__private
const
int
input_height,
__private
const
int
input_width,
__private
const
int
input_channel,
__private
const
float
relux_max_limit,
__private
const
float
prelu_alpha
)
{
__private
const
float
relux_max_limit
)
{
const
int
batch_idx
=
get_global_id
(
0
)
;
const
int
out_blk_idx
=
get_global_id
(
1
)
;
const
int
input_chan_blk
=
(
input_channel
+
3
)
>>
2
;
...
...
@@ -51,8 +50,8 @@ __kernel void fully_connected(__read_only image2d_t input,
input_coord.y++
;
}
#
if
defined
(
USE_RELU
)
|
| defined(USE_RELUX) || defined(USE_
PRELU) || defined(USE_
TANH) |
|
defined
(
USE_SIGMOID
)
result
=
do_activation
(
result,
relux_max_limit
,
prelu_alpha
)
;
#
if
defined
(
USE_RELU
)
|
| defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
result
=
do_activation
(
result,
relux_max_limit
)
;
#
endif
WRITE_IMAGET
(
output,
(
int2
)(
out_blk_idx,
batch_idx
)
,
result
)
;
}
mace/kernels/opencl/cl/winograd_transform.cl
浏览文件 @
79e6e50b
...
...
@@ -115,8 +115,7 @@ __kernel void winograd_inverse_transform_2x2(__read_only image2d_t input,
__private const int out_width,
__private const int round_hw,
__private const int round_w,
__private const float relux_max_limit,
__private const float prelu_alpha) {
__private const float relux_max_limit) {
const int width_idx = get_global_id(0);
const int height_idx = get_global_id(1);
const int out_channel = get_global_size(1);
...
...
@@ -183,11 +182,11 @@ __kernel void winograd_inverse_transform_2x2(__read_only image2d_t input,
#endif
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_
PRELU) || defined(USE_
TANH) |
|
defined
(
USE_SIGMOID
)
in0[0]
=
do_activation
(
in0[0],
relux_max_limit
,
prelu_alpha
)
;
in0[1]
=
do_activation
(
in0[1],
relux_max_limit
,
prelu_alpha
)
;
in1[0]
=
do_activation
(
in1[0],
relux_max_limit
,
prelu_alpha
)
;
in1[1]
=
do_activation
(
in1[1],
relux_max_limit
,
prelu_alpha
)
;
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
in0[0]
=
do_activation
(
in0[0],
relux_max_limit
)
;
in0[1]
=
do_activation
(
in0[1],
relux_max_limit
)
;
in1[0]
=
do_activation
(
in1[0],
relux_max_limit
)
;
in1[1]
=
do_activation
(
in1[1],
relux_max_limit
)
;
#
endif
WRITE_IMAGET
(
output,
(
int2
)(
coord_x,
coord_y
)
,
in0[0]
)
;
...
...
@@ -205,6 +204,4 @@ __kernel void winograd_inverse_transform_2x2(__read_only image2d_t input,
WRITE_IMAGET
(
output,
(
int2
)(
coord_x
+
1
,
coord_y
+
1
)
,
in1[1]
)
;
}
}
mace/kernels/opencl/conv_2d_opencl.cc
浏览文件 @
79e6e50b
...
...
@@ -17,7 +17,6 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
prelu_alpha
,
const
DataType
dt
,
Tensor
*
output
,
StatsFuture
*
future
);
...
...
@@ -31,7 +30,6 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
prelu_alpha
,
const
DataType
dt
,
Tensor
*
output
,
StatsFuture
*
future
);
...
...
@@ -45,7 +43,6 @@ extern void Conv2dOpencl(cl::Kernel *kernel,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
prelu_alpha
,
const
DataType
dt
,
Tensor
*
output
,
StatsFuture
*
future
);
...
...
@@ -60,7 +57,7 @@ void Conv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
cl
::
Kernel
*
kernel
,
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
stride
,
const
int
*
padding
,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
prelu_alpha
,
const
DataType
dt
,
const
float
relux_max_limit
,
const
DataType
dt
,
Tensor
*
output
,
StatsFuture
*
future
);
// Selection matrix: kernel_size x stride_size
static
const
Conv2dOpenclFunction
selector
[
5
]
=
...
...
@@ -99,12 +96,10 @@ void Conv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
selector
[
kernel_h
-
1
]
!=
nullptr
)
{
auto
conv2d_func
=
selector
[
kernel_h
-
1
];
conv2d_func
(
&
kernel_
,
input
,
filter
,
bias
,
strides_
[
0
],
paddings
.
data
(),
dilations_
,
activation_
,
relux_max_limit_
,
prelu_alpha_
,
DataTypeToEnum
<
T
>::
value
,
output
,
future
);
relux_max_limit_
,
DataTypeToEnum
<
T
>::
value
,
output
,
future
);
}
else
{
Conv2dOpencl
(
&
kernel_
,
input
,
filter
,
bias
,
strides_
[
0
],
paddings
.
data
(),
dilations_
,
activation_
,
relux_max_limit_
,
prelu_alpha_
,
DataTypeToEnum
<
T
>::
value
,
output
,
future
);
activation_
,
relux_max_limit_
,
DataTypeToEnum
<
T
>::
value
,
output
,
future
);
}
}
...
...
mace/kernels/opencl/conv_2d_opencl_1x1.cc
浏览文件 @
79e6e50b
...
...
@@ -19,7 +19,6 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
prelu_alpha
,
const
DataType
dt
,
Tensor
*
output
,
StatsFuture
*
future
)
{
...
...
@@ -56,9 +55,6 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
case
RELUX
:
built_options
.
emplace
(
"-DUSE_RELUX"
);
break
;
case
PRELU
:
built_options
.
emplace
(
"-DUSE_PRELU"
);
break
;
case
TANH
:
built_options
.
emplace
(
"-DUSE_TANH"
);
break
;
...
...
@@ -86,7 +82,6 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
output
->
buffer
())));
// FIXME handle flexable data type: half not supported
kernel
->
setArg
(
idx
++
,
relux_max_limit
);
kernel
->
setArg
(
idx
++
,
prelu_alpha
);
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
input_height
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
input_width
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
input_channel_blocks
));
...
...
mace/kernels/opencl/conv_2d_opencl_3x3.cc
浏览文件 @
79e6e50b
...
...
@@ -21,7 +21,6 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
prelu_alpha
,
const
DataType
dt
,
Tensor
*
output
,
StatsFuture
*
future
)
{
...
...
@@ -51,9 +50,6 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel,
case
RELUX
:
built_options
.
emplace
(
"-DUSE_RELUX"
);
break
;
case
PRELU
:
built_options
.
emplace
(
"-DUSE_PRELU"
);
break
;
case
TANH
:
built_options
.
emplace
(
"-DUSE_TANH"
);
break
;
...
...
@@ -80,7 +76,6 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel,
kernel
->
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
output
->
buffer
())));
kernel
->
setArg
(
idx
++
,
relux_max_limit
);
kernel
->
setArg
(
idx
++
,
prelu_alpha
);
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
input
->
dim
(
1
)));
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
input
->
dim
(
2
)));
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
input_channel_blocks
));
...
...
mace/kernels/opencl/conv_2d_opencl_general.cc
浏览文件 @
79e6e50b
...
...
@@ -21,7 +21,6 @@ extern void Conv2dOpencl(cl::Kernel *kernel,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
prelu_alpha
,
const
DataType
dt
,
Tensor
*
output
,
StatsFuture
*
future
)
{
...
...
@@ -51,9 +50,6 @@ extern void Conv2dOpencl(cl::Kernel *kernel,
case
RELUX
:
built_options
.
emplace
(
"-DUSE_RELUX"
);
break
;
case
PRELU
:
built_options
.
emplace
(
"-DUSE_PRELU"
);
break
;
case
TANH
:
built_options
.
emplace
(
"-DUSE_TANH"
);
break
;
...
...
@@ -80,7 +76,6 @@ extern void Conv2dOpencl(cl::Kernel *kernel,
kernel
->
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
output
->
buffer
())));
kernel
->
setArg
(
idx
++
,
relux_max_limit
);
kernel
->
setArg
(
idx
++
,
prelu_alpha
);
kernel
->
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
input
->
dim
(
1
)));
kernel
->
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
input
->
dim
(
2
)));
kernel
->
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
input_channel_blocks
));
...
...
mace/kernels/opencl/depthwise_conv_opencl.cc
浏览文件 @
79e6e50b
...
...
@@ -20,7 +20,6 @@ void DepthwiseConv2d(cl::Kernel *kernel,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
prelu_alpha
,
const
DataType
dt
,
Tensor
*
output
,
StatsFuture
*
future
)
{
...
...
@@ -69,9 +68,6 @@ void DepthwiseConv2d(cl::Kernel *kernel,
case
RELUX
:
built_options
.
emplace
(
"-DUSE_RELUX"
);
break
;
case
PRELU
:
built_options
.
emplace
(
"-DUSE_PRELU"
);
break
;
case
TANH
:
built_options
.
emplace
(
"-DUSE_TANH"
);
break
;
...
...
@@ -96,7 +92,6 @@ void DepthwiseConv2d(cl::Kernel *kernel,
kernel
->
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
output
->
buffer
())));
kernel
->
setArg
(
idx
++
,
relux_max_limit
);
kernel
->
setArg
(
idx
++
,
prelu_alpha
);
kernel
->
setArg
(
idx
++
,
static_cast
<
short
>
(
input_height
));
kernel
->
setArg
(
idx
++
,
static_cast
<
short
>
(
input_width
));
kernel
->
setArg
(
idx
++
,
static_cast
<
short
>
(
input_channel_blocks
));
...
...
@@ -140,8 +135,8 @@ void DepthwiseConv2dFunctor<DeviceType::OPENCL, T>::operator()(
<<
" is not implemented yet, using slow version"
;
// TODO(heliangliang) The CPU/NEON kernel should map the buffer
DepthwiseConv2dFunctor
<
DeviceType
::
CPU
,
float
>
(
strides_
,
padding_type_
,
paddings_
,
dilations_
,
activation_
,
relux_max_limit_
,
prelu_alpha
_
)(
input
,
filter
,
bias
,
output
,
future
);
strides_
,
padding_type_
,
paddings_
,
dilations_
,
activation_
,
relux_max_limit
_
)(
input
,
filter
,
bias
,
output
,
future
);
return
;
}
...
...
@@ -169,7 +164,7 @@ void DepthwiseConv2dFunctor<DeviceType::OPENCL, T>::operator()(
output
->
ResizeImage
(
output_shape
,
output_image_shape
);
DepthwiseConv2d
(
&
kernel_
,
input
,
filter
,
bias
,
strides_
[
0
],
paddings
.
data
(),
dilations_
,
activation_
,
relux_max_limit_
,
prelu_alpha_
,
activation_
,
relux_max_limit_
,
DataTypeToEnum
<
T
>::
value
,
output
,
future
);
}
...
...
mace/kernels/opencl/fully_connected_opencl.cc
浏览文件 @
79e6e50b
...
...
@@ -48,9 +48,6 @@ void FullyConnectedFunctor<DeviceType::OPENCL, T>::operator()(
case
RELUX
:
built_options
.
emplace
(
"-DUSE_RELUX"
);
break
;
case
PRELU
:
built_options
.
emplace
(
"-DUSE_PRELU"
);
break
;
case
TANH
:
built_options
.
emplace
(
"-DUSE_TANH"
);
break
;
...
...
@@ -78,7 +75,6 @@ void FullyConnectedFunctor<DeviceType::OPENCL, T>::operator()(
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
input
->
dim
(
3
)));
// FIXME handle flexable data type: half not supported
kernel_
.
setArg
(
idx
++
,
relux_max_limit_
);
kernel_
.
setArg
(
idx
++
,
prelu_alpha_
);
}
const
uint32_t
gws
[
2
]
=
{
...
...
mace/kernels/opencl/winograd_transform.cc
浏览文件 @
79e6e50b
...
...
@@ -129,7 +129,6 @@ void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(const Te
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
round_h
*
round_w
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
round_w
));
kernel_
.
setArg
(
idx
++
,
relux_max_limit_
);
kernel_
.
setArg
(
idx
++
,
prelu_alpha_
);
}
const
uint32_t
gws
[
2
]
=
{
static_cast
<
uint32_t
>
(
input_tensor
->
dim
(
2
)),
...
...
mace/kernels/winograd_transform.h
浏览文件 @
79e6e50b
...
...
@@ -58,21 +58,18 @@ struct WinogradInverseTransformFunctorBase {
const
int
height
,
const
int
width
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
prelu_alpha
)
const
float
relux_max_limit
)
:
batch_
(
batch
),
height_
(
height
),
width_
(
width
),
activation_
(
activation
),
relux_max_limit_
(
relux_max_limit
),
prelu_alpha_
(
prelu_alpha
)
{}
relux_max_limit_
(
relux_max_limit
)
{}
const
int
batch_
;
const
int
height_
;
const
int
width_
;
const
ActivationType
activation_
;
const
float
relux_max_limit_
;
const
float
prelu_alpha_
;
};
template
<
DeviceType
D
,
typename
T
>
...
...
@@ -81,9 +78,8 @@ struct WinogradInverseTransformFunctor : WinogradInverseTransformFunctorBase {
const
int
height
,
const
int
width
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
prelu_alpha
)
:
WinogradInverseTransformFunctorBase
(
batch
,
height
,
width
,
activation
,
relux_max_limit
,
prelu_alpha
)
{}
const
float
relux_max_limit
)
:
WinogradInverseTransformFunctorBase
(
batch
,
height
,
width
,
activation
,
relux_max_limit
)
{}
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
bias
,
...
...
@@ -100,9 +96,8 @@ struct WinogradInverseTransformFunctor<DeviceType::OPENCL, T> : WinogradInverseT
const
int
height
,
const
int
width
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
prelu_alpha
)
:
WinogradInverseTransformFunctorBase
(
batch
,
height
,
width
,
activation
,
relux_max_limit
,
prelu_alpha
)
{}
const
float
relux_max_limit
)
:
WinogradInverseTransformFunctorBase
(
batch
,
height
,
width
,
activation
,
relux_max_limit
)
{}
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
bias
,
...
...
mace/ops/activation.h
浏览文件 @
79e6e50b
...
...
@@ -18,15 +18,15 @@ class ActivationOp : public Operator<D, T> {
functor_
(
kernels
::
StringToActivationType
(
OperatorBase
::
GetSingleArgument
<
std
::
string
>
(
"activation"
,
"NOOP"
)),
OperatorBase
::
GetSingleArgument
<
float
>
(
"max_limit"
,
0.0
f
),
OperatorBase
::
GetSingleArgument
<
float
>
(
"alpha"
,
0.0
f
))
{}
OperatorBase
::
GetSingleArgument
<
float
>
(
"max_limit"
,
0.0
f
))
{}
bool
Run
(
StatsFuture
*
future
)
override
{
const
Tensor
*
input_tensor
=
this
->
inputs_
[
0
];
const
Tensor
*
input_tensor
=
this
->
Input
(
0
);
const
Tensor
*
alpha_tensor
=
this
->
InputSize
()
>=
2
?
this
->
Input
(
1
)
:
nullptr
;
Tensor
*
output_tensor
=
this
->
outputs_
[
0
];
output_tensor
->
ResizeLike
(
input_tensor
);
functor_
(
input_tensor
,
output_tensor
,
future
);
functor_
(
input_tensor
,
alpha_tensor
,
output_tensor
,
future
);
return
true
;
}
...
...
mace/ops/activation_test.cc
浏览文件 @
79e6e50b
...
...
@@ -213,17 +213,22 @@ void TestSimplePrelu() {
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
2
,
2
,
2
,
2
},
{
-
7
,
7
,
-
6
,
6
,
-
5
,
5
,
-
4
,
4
,
-
3
,
3
,
-
2
,
2
,
-
1
,
1
,
0
,
0
});
{
-
7
,
7
,
-
6
,
6
,
-
5
,
-
5
,
-
4
,
-
4
,
-
3
,
3
,
-
2
,
2
,
-
1
,
-
1
,
0
,
0
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Alpha"
,
{
2
},
{
2.0
,
3.0
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
float
>
(
net
,
"Alpha"
,
"AlphaImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Activation"
,
"PreluTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"AlphaImage"
)
.
Output
(
"OutputImage"
)
.
AddStringArg
(
"activation"
,
"PRELU"
)
.
AddFloatArg
(
"alpha"
,
2.0
)
.
Finalize
(
net
.
NewOperatorDef
());
// Run
...
...
@@ -235,9 +240,9 @@ void TestSimplePrelu() {
}
else
{
OpDefBuilder
(
"Activation"
,
"PreluTest"
)
.
Input
(
"Input"
)
.
Input
(
"Alpha"
)
.
Output
(
"Output"
)
.
AddStringArg
(
"activation"
,
"PRELU"
)
.
AddFloatArg
(
"alpha"
,
2.0
)
.
Finalize
(
net
.
NewOperatorDef
());
// Run
...
...
@@ -245,7 +250,7 @@ void TestSimplePrelu() {
}
auto
expected
=
CreateTensor
<
float
>
(
{
2
,
2
,
2
,
2
},
{
-
14
,
7
,
-
12
,
6
,
-
10
,
5
,
-
8
,
4
,
-
6
,
3
,
-
4
,
2
,
-
2
,
1
,
0
,
0
});
{
2
,
2
,
2
,
2
},
{
-
14
,
7
,
-
12
,
6
,
-
10
,
-
15
,
-
8
,
-
12
,
-
6
,
3
,
-
4
,
2
,
-
2
,
-
3
,
0
,
0
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
}
...
...
mace/ops/batch_norm.h
浏览文件 @
79e6e50b
...
...
@@ -16,7 +16,7 @@ class BatchNormOp : public Operator<D, T> {
public:
BatchNormOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
operator_def
,
ws
),
functor_
(
false
,
kernels
::
ActivationType
::
NOOP
,
0.0
f
,
0.0
f
)
{
functor_
(
false
,
kernels
::
ActivationType
::
NOOP
,
0.0
f
)
{
epsilon_
=
OperatorBase
::
GetSingleArgument
<
float
>
(
"epsilon"
,
static_cast
<
float
>
(
1e-4
));
}
...
...
mace/ops/conv_2d.h
浏览文件 @
79e6e50b
...
...
@@ -23,7 +23,6 @@ class Conv2dOp : public ConvPool2dOpBase<D, T> {
this
->
paddings_
,
this
->
dilations_
.
data
(),
kernels
::
ActivationType
::
NOOP
,
0.0
f
,
0.0
f
)
{}
bool
Run
(
StatsFuture
*
future
)
override
{
...
...
mace/ops/conv_2d_test.cc
浏览文件 @
79e6e50b
...
...
@@ -412,9 +412,9 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape, const int s
}
TEST_F
(
Conv2dOpTest
,
OPENCLAlignedConvNxNS12
)
{
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
32
,
16
,
16
,
32
},
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
32
,
16
,
16
,
32
},
1
);
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
32
,
16
,
16
,
32
},
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
32
,
16
,
16
,
32
},
2
);
}
...
...
mace/ops/depthwise_conv2d.h
浏览文件 @
79e6e50b
...
...
@@ -26,8 +26,7 @@ class DepthwiseConv2dOp : public ConvPool2dOpBase<D, T> {
kernels
::
StringToActivationType
(
OperatorBase
::
GetSingleArgument
<
std
::
string
>
(
"activation"
,
"NOOP"
)),
OperatorBase
::
GetSingleArgument
<
float
>
(
"max_limit"
,
0.0
f
),
OperatorBase
::
GetSingleArgument
<
float
>
(
"alpha"
,
0.0
f
))
{}
OperatorBase
::
GetSingleArgument
<
float
>
(
"max_limit"
,
0.0
f
))
{}
bool
Run
(
StatsFuture
*
future
)
override
{
const
Tensor
*
input
=
this
->
Input
(
INPUT
);
...
...
mace/ops/folded_batch_norm.h
浏览文件 @
79e6e50b
...
...
@@ -19,8 +19,7 @@ class FoldedBatchNormOp : public Operator<D, T> {
kernels
::
StringToActivationType
(
OperatorBase
::
GetSingleArgument
<
std
::
string
>
(
"activation"
,
"NOOP"
)),
OperatorBase
::
GetSingleArgument
<
float
>
(
"max_limit"
,
0.0
f
),
OperatorBase
::
GetSingleArgument
<
float
>
(
"alpha"
,
0.0
f
))
{}
OperatorBase
::
GetSingleArgument
<
float
>
(
"max_limit"
,
0.0
f
))
{}
bool
Run
(
StatsFuture
*
future
)
override
{
const
Tensor
*
input
=
this
->
Input
(
INPUT
);
...
...
mace/ops/fully_connected.h
浏览文件 @
79e6e50b
...
...
@@ -19,8 +19,7 @@ class FullyConnectedOp : public Operator<D, T> {
kernels
::
StringToActivationType
(
OperatorBase
::
GetSingleArgument
<
std
::
string
>
(
"activation"
,
"NOOP"
)),
OperatorBase
::
GetSingleArgument
<
float
>
(
"max_limit"
,
0.0
f
),
OperatorBase
::
GetSingleArgument
<
float
>
(
"alpha"
,
0.0
f
))
{}
OperatorBase
::
GetSingleArgument
<
float
>
(
"max_limit"
,
0.0
f
))
{}
bool
Run
(
StatsFuture
*
future
)
override
{
const
Tensor
*
input
=
this
->
Input
(
INPUT
);
...
...
mace/ops/fused_conv_2d.h
浏览文件 @
79e6e50b
...
...
@@ -25,8 +25,7 @@ class FusedConv2dOp : public ConvPool2dOpBase<D, T> {
kernels
::
StringToActivationType
(
OperatorBase
::
GetSingleArgument
<
std
::
string
>
(
"activation"
,
"NOOP"
)),
OperatorBase
::
GetSingleArgument
<
float
>
(
"max_limit"
,
0.0
f
),
OperatorBase
::
GetSingleArgument
<
float
>
(
"alpha"
,
0.0
f
))
{}
OperatorBase
::
GetSingleArgument
<
float
>
(
"max_limit"
,
0.0
f
))
{}
bool
Run
(
StatsFuture
*
future
)
override
{
const
Tensor
*
input
=
this
->
Input
(
INPUT
);
...
...
mace/ops/winograd_inverse_transform.h
浏览文件 @
79e6e50b
...
...
@@ -24,8 +24,7 @@ class WinogradInverseTransformOp : public Operator<D, T> {
kernels
::
StringToActivationType
(
OperatorBase
::
GetSingleArgument
<
std
::
string
>
(
"activation"
,
"NOOP"
)),
OperatorBase
::
GetSingleArgument
<
float
>
(
"max_limit"
,
0.0
f
),
OperatorBase
::
GetSingleArgument
<
float
>
(
"alpha"
,
0.0
f
))
{}
OperatorBase
::
GetSingleArgument
<
float
>
(
"max_limit"
,
0.0
f
))
{}
bool
Run
(
StatsFuture
*
future
)
override
{
const
Tensor
*
input_tensor
=
this
->
Input
(
INPUT
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录