Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
10ba3061
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
10ba3061
编写于
12月 19, 2018
作者:
Y
yejianwu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
support leaky relu
上级
8b9021f7
变更
56
隐藏空白更改
内联
并排
Showing
56 changed file
with
366 addition
and
145 deletion
+366
-145
mace/ops/activation.cc
mace/ops/activation.cc
+10
-4
mace/ops/activation.h
mace/ops/activation.h
+6
-4
mace/ops/activation_test.cc
mace/ops/activation_test.cc
+36
-0
mace/ops/batch_norm.cc
mace/ops/batch_norm.cc
+9
-4
mace/ops/batch_norm_test.cc
mace/ops/batch_norm_test.cc
+6
-2
mace/ops/conv_2d.cc
mace/ops/conv_2d.cc
+12
-3
mace/ops/conv_2d_test.cc
mace/ops/conv_2d_test.cc
+7
-2
mace/ops/deconv_2d.cc
mace/ops/deconv_2d.cc
+4
-2
mace/ops/deconv_2d.h
mace/ops/deconv_2d.h
+4
-1
mace/ops/deconv_2d_test.cc
mace/ops/deconv_2d_test.cc
+7
-2
mace/ops/depthwise_conv2d.cc
mace/ops/depthwise_conv2d.cc
+6
-3
mace/ops/depthwise_conv2d_test.cc
mace/ops/depthwise_conv2d_test.cc
+6
-6
mace/ops/depthwise_deconv2d.cc
mace/ops/depthwise_deconv2d.cc
+3
-1
mace/ops/depthwise_deconv2d_test.cc
mace/ops/depthwise_deconv2d_test.cc
+7
-2
mace/ops/fully_connected.cc
mace/ops/fully_connected.cc
+7
-3
mace/ops/fully_connected_test.cc
mace/ops/fully_connected_test.cc
+8
-3
mace/ops/opencl/buffer/conv_2d.h
mace/ops/opencl/buffer/conv_2d.h
+6
-2
mace/ops/opencl/buffer/conv_2d_1x1.cc
mace/ops/opencl/buffer/conv_2d_1x1.cc
+5
-0
mace/ops/opencl/buffer/conv_2d_general.cc
mace/ops/opencl/buffer/conv_2d_general.cc
+5
-0
mace/ops/opencl/buffer/depthwise_conv2d.cc
mace/ops/opencl/buffer/depthwise_conv2d.cc
+5
-0
mace/ops/opencl/buffer/depthwise_conv2d.h
mace/ops/opencl/buffer/depthwise_conv2d.h
+4
-1
mace/ops/opencl/cl/activation.cl
mace/ops/opencl/cl/activation.cl
+3
-2
mace/ops/opencl/cl/batch_norm.cl
mace/ops/opencl/cl/batch_norm.cl
+4
-3
mace/ops/opencl/cl/common.h
mace/ops/opencl/cl/common.h
+3
-2
mace/ops/opencl/cl/conv_2d.cl
mace/ops/opencl/cl/conv_2d.cl
+6
-5
mace/ops/opencl/cl/conv_2d_1x1.cl
mace/ops/opencl/cl/conv_2d_1x1.cl
+6
-5
mace/ops/opencl/cl/conv_2d_1x1_buffer.cl
mace/ops/opencl/cl/conv_2d_1x1_buffer.cl
+4
-3
mace/ops/opencl/cl/conv_2d_3x3.cl
mace/ops/opencl/cl/conv_2d_3x3.cl
+7
-6
mace/ops/opencl/cl/conv_2d_buffer.cl
mace/ops/opencl/cl/conv_2d_buffer.cl
+6
-5
mace/ops/opencl/cl/deconv_2d.cl
mace/ops/opencl/cl/deconv_2d.cl
+7
-6
mace/ops/opencl/cl/depthwise_conv2d.cl
mace/ops/opencl/cl/depthwise_conv2d.cl
+12
-10
mace/ops/opencl/cl/depthwise_conv2d_buffer.cl
mace/ops/opencl/cl/depthwise_conv2d_buffer.cl
+6
-5
mace/ops/opencl/cl/depthwise_deconv2d.cl
mace/ops/opencl/cl/depthwise_deconv2d.cl
+8
-7
mace/ops/opencl/cl/fully_connected.cl
mace/ops/opencl/cl/fully_connected.cl
+8
-6
mace/ops/opencl/cl/winograd_transform.cl
mace/ops/opencl/cl/winograd_transform.cl
+27
-25
mace/ops/opencl/conv_2d.h
mace/ops/opencl/conv_2d.h
+1
-0
mace/ops/opencl/deconv_2d.h
mace/ops/opencl/deconv_2d.h
+1
-0
mace/ops/opencl/depthwise_conv2d.h
mace/ops/opencl/depthwise_conv2d.h
+1
-0
mace/ops/opencl/depthwise_deconv2d.h
mace/ops/opencl/depthwise_deconv2d.h
+1
-0
mace/ops/opencl/fully_connected.h
mace/ops/opencl/fully_connected.h
+1
-0
mace/ops/opencl/image/activation.h
mace/ops/opencl/image/activation.h
+6
-2
mace/ops/opencl/image/batch_norm.h
mace/ops/opencl/image/batch_norm.h
+11
-3
mace/ops/opencl/image/conv_2d.h
mace/ops/opencl/image/conv_2d.h
+10
-0
mace/ops/opencl/image/conv_2d_1x1.cc
mace/ops/opencl/image/conv_2d_1x1.cc
+5
-0
mace/ops/opencl/image/conv_2d_3x3.cc
mace/ops/opencl/image/conv_2d_3x3.cc
+5
-0
mace/ops/opencl/image/conv_2d_general.cc
mace/ops/opencl/image/conv_2d_general.cc
+5
-0
mace/ops/opencl/image/deconv_2d.h
mace/ops/opencl/image/deconv_2d.h
+6
-0
mace/ops/opencl/image/depthwise_conv2d.cc
mace/ops/opencl/image/depthwise_conv2d.cc
+5
-0
mace/ops/opencl/image/depthwise_conv2d.h
mace/ops/opencl/image/depthwise_conv2d.h
+5
-2
mace/ops/opencl/image/depthwise_deconv2d.h
mace/ops/opencl/image/depthwise_deconv2d.h
+6
-0
mace/ops/opencl/image/fully_connected.h
mace/ops/opencl/image/fully_connected.h
+6
-0
mace/ops/opencl/image/winograd_conv2d.cc
mace/ops/opencl/image/winograd_conv2d.cc
+8
-1
mace/python/tools/converter_tool/base_converter.py
mace/python/tools/converter_tool/base_converter.py
+1
-0
mace/python/tools/converter_tool/caffe_converter.py
mace/python/tools/converter_tool/caffe_converter.py
+8
-0
mace/python/tools/converter_tool/onnx_converter.py
mace/python/tools/converter_tool/onnx_converter.py
+1
-1
mace/python/tools/converter_tool/transformer.py
mace/python/tools/converter_tool/transformer.py
+3
-1
未找到文件。
mace/ops/activation.cc
浏览文件 @
10ba3061
...
...
@@ -36,9 +36,11 @@ class ActivationOp<DeviceType::CPU, float> : public Operation {
:
Operation
(
context
),
activation_
(
ops
::
StringToActivationType
(
Operation
::
GetOptionalArg
<
std
::
string
>
(
"activation"
,
"NOOP"
))),
"NOOP"
))),
relux_max_limit_
(
Operation
::
GetOptionalArg
<
float
>
(
"max_limit"
,
0.0
f
))
{}
0.0
f
)),
leakyrelu_coefficient_
(
Operation
::
GetOptionalArg
<
float
>
(
"leakyrelu_coefficient"
,
0.0
f
))
{}
MaceStatus
Run
(
OpContext
*
context
)
override
{
MACE_UNUSED
(
context
);
...
...
@@ -58,7 +60,7 @@ class ActivationOp<DeviceType::CPU, float> : public Operation {
alpha_ptr
,
output_ptr
);
}
else
{
DoActivation
(
input_ptr
,
output_ptr
,
output
->
size
(),
activation_
,
relux_max_limit_
);
relux_max_limit_
,
leakyrelu_coefficient_
);
}
return
MaceStatus
::
MACE_SUCCESS
;
}
...
...
@@ -66,6 +68,7 @@ class ActivationOp<DeviceType::CPU, float> : public Operation {
private:
ActivationType
activation_
;
float
relux_max_limit_
;
float
leakyrelu_coefficient_
;
};
...
...
@@ -80,11 +83,14 @@ class ActivationOp<DeviceType::GPU, T> : public Operation {
"NOOP"
));
auto
relux_max_limit
=
static_cast
<
T
>
(
Operation
::
GetOptionalArg
<
float
>
(
"max_limit"
,
0.0
f
));
auto
leakyrelu_coefficient
=
static_cast
<
T
>
(
Operation
::
GetOptionalArg
<
float
>
(
"leakyrelu_coefficient"
,
0.0
f
));
MemoryType
mem_type
;
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
mem_type
=
MemoryType
::
GPU_IMAGE
;
kernel_
.
reset
(
new
opencl
::
image
::
ActivationKernel
<
T
>
(
type
,
relux_max_limit
));
new
opencl
::
image
::
ActivationKernel
<
T
>
(
type
,
relux_max_limit
,
leakyrelu_coefficient
));
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/ops/activation.h
浏览文件 @
10ba3061
...
...
@@ -62,7 +62,8 @@ void DoActivation(const T *input_ptr,
T
*
output_ptr
,
const
index_t
size
,
const
ActivationType
type
,
const
float
relux_max_limit
)
{
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
)
{
MACE_CHECK
(
DataTypeToEnum
<
T
>::
value
!=
DataType
::
DT_HALF
);
switch
(
type
)
{
...
...
@@ -97,7 +98,7 @@ void DoActivation(const T *input_ptr,
#pragma omp parallel for schedule(runtime)
for
(
index_t
i
=
0
;
i
<
size
;
++
i
)
{
output_ptr
[
i
]
=
std
::
max
(
input_ptr
[
i
],
static_cast
<
T
>
(
0
))
+
std
::
min
(
input_ptr
[
i
],
static_cast
<
T
>
(
0
))
*
relux_max_limit
;
+
leakyrelu_coefficient
*
std
::
min
(
input_ptr
[
i
],
static_cast
<
T
>
(
0
))
;
}
break
;
default:
...
...
@@ -110,7 +111,8 @@ inline void DoActivation(const float *input_ptr,
float
*
output_ptr
,
const
index_t
size
,
const
ActivationType
type
,
const
float
relux_max_limit
)
{
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
)
{
switch
(
type
)
{
case
NOOP
:
break
;
...
...
@@ -133,7 +135,7 @@ inline void DoActivation(const float *input_ptr,
}
break
;
case
LEAKYRELU
:
LeakyReluNeon
(
input_ptr
,
relux_max_limi
t
,
size
,
output_ptr
);
LeakyReluNeon
(
input_ptr
,
leakyrelu_coefficien
t
,
size
,
output_ptr
);
break
;
default:
LOG
(
FATAL
)
<<
"Unknown activation type: "
<<
type
;
...
...
mace/ops/activation_test.cc
浏览文件 @
10ba3061
...
...
@@ -52,6 +52,42 @@ TEST_F(ActivationOpTest, OPENCLSimpleRelu) {
TestSimpleRelu
<
DeviceType
::
GPU
>
();
}
namespace
{
template
<
DeviceType
D
>
void
TestSimpleLeakyRelu
()
{
OpsTestNet
net
;
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
2
,
2
,
2
,
2
},
{
-
7
,
7
,
-
6
,
6
,
-
5
,
5
,
-
4
,
4
,
-
3
,
3
,
-
2
,
2
,
-
1
,
1
,
0
,
0
});
OpDefBuilder
(
"Activation"
,
"ReluTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
AddStringArg
(
"activation"
,
"LEAKYRELU"
)
.
AddFloatArg
(
"leakyrelu_coefficient"
,
0.1
)
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
auto
expected
=
net
.
CreateTensor
<
float
>
(
{
2
,
2
,
2
,
2
},
{
-
0.7
,
7
,
-
0.6
,
6
,
-
0.5
,
5
,
-
0.4
,
4
,
-
0.3
,
3
,
-
0.2
,
2
,
-
0.1
,
1
,
0
,
0
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
}
}
// namespace
TEST_F
(
ActivationOpTest
,
CPUSimpleLeakyRelu
)
{
TestSimpleLeakyRelu
<
DeviceType
::
CPU
>
();
}
TEST_F
(
ActivationOpTest
,
OPENCLSimpleLeakyRelu
)
{
TestSimpleLeakyRelu
<
DeviceType
::
GPU
>
();
}
namespace
{
template
<
DeviceType
D
>
void
TestUnalignedSimpleRelu
()
{
...
...
mace/ops/batch_norm.cc
浏览文件 @
10ba3061
...
...
@@ -35,10 +35,12 @@ class BatchNormOp<DeviceType::CPU, float> : public Operation {
explicit
BatchNormOp
(
OpConstructContext
*
context
)
:
Operation
(
context
),
epsilon_
(
Operation
::
GetOptionalArg
<
float
>
(
"epsilon"
,
static_cast
<
float
>
(
1e-4
))),
static_cast
<
float
>
(
1e-4
))),
activation_
(
ops
::
StringToActivationType
(
Operation
::
GetOptionalArg
<
std
::
string
>
(
"activation"
,
"NOOP"
))),
relux_max_limit_
(
Operation
::
GetOptionalArg
<
float
>
(
"max_limit"
,
0.0
f
))
{}
relux_max_limit_
(
Operation
::
GetOptionalArg
<
float
>
(
"max_limit"
,
0.0
f
)),
leakyrelu_coefficient_
(
Operation
::
GetOptionalArg
<
float
>
(
"leakyrelu_coefficient"
,
0.0
f
))
{}
MaceStatus
Run
(
OpContext
*
context
)
override
{
MACE_UNUSED
(
context
);
...
...
@@ -121,7 +123,7 @@ class BatchNormOp<DeviceType::CPU, float> : public Operation {
}
}
DoActivation
(
output_ptr
,
output_ptr
,
output
->
size
(),
activation_
,
relux_max_limit_
);
relux_max_limit_
,
leakyrelu_coefficient_
);
return
MaceStatus
::
MACE_SUCCESS
;
}
...
...
@@ -130,6 +132,7 @@ class BatchNormOp<DeviceType::CPU, float> : public Operation {
float
epsilon_
;
const
ActivationType
activation_
;
const
float
relux_max_limit_
;
const
float
leakyrelu_coefficient_
;
protected:
MACE_OP_INPUT_TAGS
(
INPUT
,
SCALE
,
OFFSET
,
MEAN
,
VAR
);
...
...
@@ -148,11 +151,13 @@ class BatchNormOp<DeviceType::GPU, T> : public Operation {
ActivationType
activation
=
ops
::
StringToActivationType
(
Operation
::
GetOptionalArg
<
std
::
string
>
(
"activation"
,
"NOOP"
));
float
relux_max_limit
=
Operation
::
GetOptionalArg
<
float
>
(
"max_limit"
,
0.0
f
);
float
leakyrelu_coefficient
=
Operation
::
GetOptionalArg
<
float
>
(
"leakyrelu_coefficient"
,
0.0
f
);
MemoryType
mem_type
;
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
mem_type
=
MemoryType
::
GPU_IMAGE
;
kernel_
.
reset
(
new
opencl
::
image
::
BatchNormKernel
<
T
>
(
epsilon
,
activation
,
relux_max_limit
));
epsilon
,
activation
,
relux_max_limit
,
leakyrelu_coefficient
));
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/ops/batch_norm_test.cc
浏览文件 @
10ba3061
...
...
@@ -88,8 +88,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
// Add input data
net
.
AddRandomInput
<
DeviceType
::
GPU
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
GPU
,
float
>
(
"Scale"
,
{
channels
},
true
);
net
.
AddRandomInput
<
DeviceType
::
GPU
,
float
>
(
"Offset"
,
{
channels
},
true
);
net
.
AddRandomInput
<
DeviceType
::
GPU
,
float
>
(
"Scale"
,
{
channels
},
true
,
false
);
net
.
AddRandomInput
<
DeviceType
::
GPU
,
float
>
(
"Offset"
,
{
channels
},
true
,
false
);
net
.
AddRandomInput
<
DeviceType
::
GPU
,
float
>
(
"Mean"
,
{
channels
},
true
);
net
.
AddRandomInput
<
DeviceType
::
GPU
,
float
>
(
"Var"
,
{
channels
},
true
);
...
...
@@ -105,6 +105,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
.
Input
(
"Var"
)
.
AddFloatArg
(
"epsilon"
,
1e-3
)
.
Output
(
"OutputNCHW"
)
.
AddStringArg
(
"activation"
,
"LEAKYRELU"
)
.
AddFloatArg
(
"leakyrelu_coefficient"
,
0.1
)
.
Finalize
(
net
.
NewOperatorDef
());
// run cpu
...
...
@@ -126,6 +128,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
.
Input
(
"Var"
)
.
AddFloatArg
(
"epsilon"
,
1e-3
)
.
Output
(
"Output"
)
.
AddStringArg
(
"activation"
,
"LEAKYRELU"
)
.
AddFloatArg
(
"leakyrelu_coefficient"
,
0.1
)
.
Finalize
(
net
.
NewOperatorDef
());
// Tuning
...
...
mace/ops/conv_2d.cc
浏览文件 @
10ba3061
...
...
@@ -58,6 +58,8 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase {
Operation
::
GetOptionalArg
<
std
::
string
>
(
"activation"
,
"NOOP"
))),
relux_max_limit_
(
Operation
::
GetOptionalArg
<
float
>
(
"max_limit"
,
0.0
f
)),
leakyrelu_coefficient_
(
Operation
::
GetOptionalArg
<
float
>
(
"leakyrelu_coefficient"
,
0.0
f
)),
is_filter_transformed_
(
false
)
{}
MaceStatus
Run
(
OpContext
*
context
)
override
{
...
...
@@ -520,7 +522,7 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase {
}
DoActivation
(
output_data
,
output_data
,
output
->
size
(),
activation_
,
relux_max_limit_
);
relux_max_limit_
,
leakyrelu_coefficient_
);
return
MaceStatus
::
MACE_SUCCESS
;
}
...
...
@@ -703,6 +705,7 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase {
private:
const
ActivationType
activation_
;
const
float
relux_max_limit_
;
const
float
leakyrelu_coefficient_
;
bool
is_filter_transformed_
;
SGemm
sgemm_
;
...
...
@@ -721,7 +724,9 @@ class Conv2dOp<DeviceType::CPU, uint8_t> : public ConvPool2dOpBase {
activation_
(
ops
::
StringToActivationType
(
Operation
::
GetOptionalArg
<
std
::
string
>
(
"activation"
,
"NOOP"
))),
relux_max_limit_
(
Operation
::
GetOptionalArg
<
float
>
(
"max_limit"
,
0.0
f
))
{}
relux_max_limit_
(
Operation
::
GetOptionalArg
<
float
>
(
"max_limit"
,
0.0
f
)),
leakyrelu_coefficient_
(
Operation
::
GetOptionalArg
<
float
>
(
"leakyrelu_coefficient"
,
0.0
f
))
{}
MaceStatus
Run
(
OpContext
*
context
)
override
{
const
Tensor
*
input
=
this
->
Input
(
INPUT
);
...
...
@@ -944,6 +949,7 @@ class Conv2dOp<DeviceType::CPU, uint8_t> : public ConvPool2dOpBase {
private:
const
ActivationType
activation_
;
const
float
relux_max_limit_
;
const
float
leakyrelu_coefficient_
;
private:
MACE_OP_INPUT_TAGS
(
INPUT
,
FILTER
,
BIAS
);
...
...
@@ -961,6 +967,8 @@ class Conv2dOp<DeviceType::GPU, T> : public ConvPool2dOpBase {
Operation
::
GetOptionalArg
<
std
::
string
>
(
"activation"
,
"NOOP"
))),
relux_max_limit_
(
Operation
::
GetOptionalArg
<
float
>
(
"max_limit"
,
0.0
f
)),
leakyrelu_coefficient_
(
Operation
::
GetOptionalArg
<
float
>
(
"leakyrelu_coefficient"
,
0.0
f
)),
wino_block_size_
(
Operation
::
GetOptionalArg
<
int
>
(
"wino_block_size"
,
0
))
{
MemoryType
mem_type
;
if
(
context
->
device
()
->
gpu_runtime
()
->
UseImageMemory
())
{
...
...
@@ -1007,12 +1015,13 @@ class Conv2dOp<DeviceType::GPU, T> : public ConvPool2dOpBase {
return
kernel_
->
Compute
(
context
,
input
,
filter
,
bias
,
strides_
.
data
(),
padding_type_
,
paddings_
,
dilations_
.
data
(),
activation_
,
relux_max_limit_
,
wino_block_size_
,
output
);
leakyrelu_coefficient_
,
wino_block_size_
,
output
);
}
private:
const
ActivationType
activation_
;
const
float
relux_max_limit_
;
const
float
leakyrelu_coefficient_
;
std
::
unique_ptr
<
OpenCLConv2dKernel
>
kernel_
;
int
wino_block_size_
;
...
...
mace/ops/conv_2d_test.cc
浏览文件 @
10ba3061
...
...
@@ -527,8 +527,9 @@ void TestComplexConvNxNS12(const std::vector<index_t> &shape,
// Add input data
net
.
AddRandomInput
<
D
,
T
>
(
"Input"
,
{
batch
,
height
,
width
,
input_channels
});
net
.
AddRandomInput
<
D
,
T
>
(
"Filter"
,
{
output_channels
,
input_channels
,
kernel_h
,
kernel_w
},
true
);
net
.
AddRandomInput
<
D
,
T
>
(
"Bias"
,
{
output_channels
},
true
);
"Filter"
,
{
output_channels
,
input_channels
,
kernel_h
,
kernel_w
},
true
,
false
);
net
.
AddRandomInput
<
D
,
T
>
(
"Bias"
,
{
output_channels
},
true
,
false
);
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
NHWC
,
"InputNCHW"
,
NCHW
);
...
...
@@ -541,6 +542,8 @@ void TestComplexConvNxNS12(const std::vector<index_t> &shape,
.
AddIntsArg
(
"strides"
,
{
stride_h
,
stride_w
})
.
AddIntArg
(
"padding"
,
type
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddStringArg
(
"activation"
,
"LEAKYRELU"
)
.
AddFloatArg
(
"leakyrelu_coefficient"
,
0.1
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
...
...
@@ -564,6 +567,8 @@ void TestComplexConvNxNS12(const std::vector<index_t> &shape,
.
AddIntsArg
(
"strides"
,
{
stride_h
,
stride_w
})
.
AddIntArg
(
"padding"
,
type
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddStringArg
(
"activation"
,
"LEAKYRELU"
)
.
AddFloatArg
(
"leakyrelu_coefficient"
,
0.1
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
AddIntArg
(
"wino_block_size"
,
wino_blk_size
)
.
Finalize
(
net
.
NewOperatorDef
());
...
...
mace/ops/deconv_2d.cc
浏览文件 @
10ba3061
...
...
@@ -292,7 +292,8 @@ class Deconv2dOp<DeviceType::CPU, float> : public Deconv2dOpBase {
output_data
,
output
->
size
(),
activation_
,
relux_max_limit_
);
relux_max_limit_
,
leakyrelu_coefficient_
);
return
MaceStatus
::
MACE_SUCCESS
;
}
...
...
@@ -443,7 +444,8 @@ class Deconv2dOp<DeviceType::GPU, T> : public Deconv2dOpBase {
return
kernel_
->
Compute
(
context
,
input
,
filter
,
bias
,
strides_
.
data
(),
in_paddings
.
data
(),
activation_
,
relux_max_limit_
,
out_shape
,
output
);
relux_max_limit_
,
leakyrelu_coefficient_
,
out_shape
,
output
);
}
private:
...
...
mace/ops/deconv_2d.h
浏览文件 @
10ba3061
...
...
@@ -47,7 +47,9 @@ class Deconv2dOpBase : public Operation {
Operation
::
GetOptionalArg
<
std
::
string
>
(
"activation"
,
"NOOP"
))),
relux_max_limit_
(
Operation
::
GetOptionalArg
<
float
>
(
"max_limit"
,
0.0
f
))
{}
Operation
::
GetOptionalArg
<
float
>
(
"max_limit"
,
0.0
f
)),
leakyrelu_coefficient_
(
Operation
::
GetOptionalArg
<
float
>
(
"leakyrelu_coefficient"
,
0.0
f
))
{}
static
void
CalcDeconvShape_Caffe
(
const
index_t
*
input_shape
,
// NHWC
...
...
@@ -191,6 +193,7 @@ class Deconv2dOpBase : public Operation {
const
FrameworkType
model_type_
;
const
ActivationType
activation_
;
const
float
relux_max_limit_
;
const
float
leakyrelu_coefficient_
;
};
template
<
typename
T
>
...
...
mace/ops/deconv_2d_test.cc
浏览文件 @
10ba3061
...
...
@@ -377,8 +377,9 @@ void TestComplexDeconvNxN(const int batch,
// Add input data
net
.
AddRandomInput
<
D
,
T
>
(
"Input"
,
{
batch
,
height
,
width
,
input_channels
});
net
.
AddRandomInput
<
D
,
T
>
(
"Filter"
,
{
output_channels
,
input_channels
,
kernel_h
,
kernel_w
},
true
);
net
.
AddRandomInput
<
D
,
T
>
(
"Bias"
,
{
output_channels
},
true
);
"Filter"
,
{
output_channels
,
input_channels
,
kernel_h
,
kernel_w
},
true
,
false
);
net
.
AddRandomInput
<
D
,
T
>
(
"Bias"
,
{
output_channels
},
true
,
false
);
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
NHWC
,
"InputNCHW"
,
NCHW
);
int
out_h
=
0
;
...
...
@@ -418,6 +419,8 @@ void TestComplexDeconvNxN(const int batch,
.
AddIntsArg
(
"strides"
,
{
stride_h
,
stride_w
})
.
AddIntsArg
(
"padding_values"
,
paddings
)
.
AddIntArg
(
"framework_type"
,
model_type
)
.
AddStringArg
(
"activation"
,
"LEAKYRELU"
)
.
AddFloatArg
(
"leakyrelu_coefficient"
,
0.1
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
}
else
{
...
...
@@ -454,6 +457,8 @@ void TestComplexDeconvNxN(const int batch,
.
AddIntsArg
(
"strides"
,
{
stride_h
,
stride_w
})
.
AddIntsArg
(
"padding_values"
,
paddings
)
.
AddIntArg
(
"framework_type"
,
model_type
)
.
AddStringArg
(
"activation"
,
"LEAKYRELU"
)
.
AddFloatArg
(
"leakyrelu_coefficient"
,
0.1
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
}
else
{
...
...
mace/ops/depthwise_conv2d.cc
浏览文件 @
10ba3061
...
...
@@ -49,10 +49,13 @@ class DepthwiseConv2dOpBase : public ConvPool2dOpBase {
activation_
(
ops
::
StringToActivationType
(
Operation
::
GetOptionalArg
<
std
::
string
>
(
"activation"
,
"NOOP"
))),
relux_max_limit_
(
Operation
::
GetOptionalArg
<
float
>
(
"max_limit"
,
0.0
f
))
{}
relux_max_limit_
(
Operation
::
GetOptionalArg
<
float
>
(
"max_limit"
,
0.0
f
)),
leakyrelu_coefficient_
(
Operation
::
GetOptionalArg
<
float
>
(
"leakyrelu_coefficient"
,
0.0
f
))
{}
protected:
const
ActivationType
activation_
;
const
float
relux_max_limit_
;
const
float
leakyrelu_coefficient_
;
};
template
<
DeviceType
D
,
class
T
>
...
...
@@ -218,7 +221,7 @@ class DepthwiseConv2dOp<DeviceType::CPU, float> : public DepthwiseConv2dOpBase {
}
DoActivation
(
output_data
,
output_data
,
output
->
size
(),
activation_
,
relux_max_limit_
);
relux_max_limit_
,
leakyrelu_coefficient_
);
return
MaceStatus
::
MACE_SUCCESS
;
}
...
...
@@ -524,7 +527,7 @@ class DepthwiseConv2dOp<DeviceType::GPU, T> : public DepthwiseConv2dOpBase {
return
kernel_
->
Compute
(
context
,
input
,
filter
,
bias
,
strides_
.
data
(),
padding_type_
,
paddings_
,
dilations_
.
data
(),
activation_
,
relux_max_limit_
,
output
);
leakyrelu_coefficient_
,
output
);
}
private:
...
...
mace/ops/depthwise_conv2d_test.cc
浏览文件 @
10ba3061
...
...
@@ -244,10 +244,10 @@ void TestNxNS12(const index_t height, const index_t width) {
net
.
AddRandomInput
<
DeviceType
::
GPU
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channel
});
net
.
AddRandomInput
<
DeviceType
::
GPU
,
float
>
(
"Filter"
,
{
multiplier
,
channel
,
kernel_h
,
kernel_w
},
true
);
"Filter"
,
{
multiplier
,
channel
,
kernel_h
,
kernel_w
},
true
,
false
);
net
.
AddRandomInput
<
DeviceType
::
GPU
,
float
>
(
"Bias"
,
{
multiplier
*
channel
},
true
);
true
,
false
);
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
NHWC
,
"InputNCHW"
,
NCHW
);
...
...
@@ -260,8 +260,8 @@ void TestNxNS12(const index_t height, const index_t width) {
.
AddIntArg
(
"padding"
,
type
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
float
>::
value
))
.
AddStringArg
(
"activation"
,
"
RELUX
"
)
.
AddFloatArg
(
"
max_limit"
,
6.0
)
.
AddStringArg
(
"activation"
,
"
LEAKYRELU
"
)
.
AddFloatArg
(
"
leakyrelu_coefficient"
,
0.1
)
.
Finalize
(
net
.
NewOperatorDef
());
// Run on cpu
...
...
@@ -283,8 +283,8 @@ void TestNxNS12(const index_t height, const index_t width) {
.
AddIntArg
(
"padding"
,
type
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
AddStringArg
(
"activation"
,
"
RELUX
"
)
.
AddFloatArg
(
"
max_limit"
,
6.0
)
.
AddStringArg
(
"activation"
,
"
LEAKYRELU
"
)
.
AddFloatArg
(
"
leakyrelu_coefficient"
,
0.1
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
(
DeviceType
::
GPU
);
...
...
mace/ops/depthwise_deconv2d.cc
浏览文件 @
10ba3061
...
...
@@ -281,7 +281,8 @@ class DepthwiseDeconv2dOp<DeviceType::CPU, float>
output_data
,
output
->
size
(),
activation_
,
relux_max_limit_
);
relux_max_limit_
,
leakyrelu_coefficient_
);
return
MaceStatus
::
MACE_SUCCESS
;
}
...
...
@@ -458,6 +459,7 @@ class DepthwiseDeconv2dOp<DeviceType::GPU, T> : public Deconv2dOpBase {
group_
,
activation_
,
relux_max_limit_
,
leakyrelu_coefficient_
,
out_shape
,
output
);
}
...
...
mace/ops/depthwise_deconv2d_test.cc
浏览文件 @
10ba3061
...
...
@@ -185,12 +185,13 @@ void RandomTest(index_t batch,
GenerateRandomRealTypeData
({
multiplier
,
channel
,
kernel
,
kernel
},
&
filter_data
);
net
.
AddInputFromArray
<
DeviceType
::
GPU
,
float
>
(
"Filter"
,
{
multiplier
,
channel
,
kernel
,
kernel
},
filter_data
,
true
);
"Filter"
,
{
multiplier
,
channel
,
kernel
,
kernel
},
filter_data
,
true
,
false
);
std
::
vector
<
float
>
bias_data
(
channel
*
multiplier
);
GenerateRandomRealTypeData
({
channel
*
multiplier
},
&
bias_data
);
net
.
AddInputFromArray
<
DeviceType
::
GPU
,
float
>
(
"Bias"
,
{
channel
*
multiplier
},
bias_data
,
true
);
bias_data
,
true
,
false
);
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
NHWC
,
"InputNCHW"
,
NCHW
);
...
...
@@ -203,6 +204,8 @@ void RandomTest(index_t batch,
.
AddIntsArg
(
"padding_values"
,
{
padding
,
padding
})
.
AddIntArg
(
"group"
,
channel
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddStringArg
(
"activation"
,
"LEAKYRELU"
)
.
AddFloatArg
(
"leakyrelu_coefficient"
,
0.1
f
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
float
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
// Run
...
...
@@ -224,6 +227,8 @@ void RandomTest(index_t batch,
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
})
.
AddIntsArg
(
"padding_values"
,
{
padding
,
padding
})
.
AddIntArg
(
"group"
,
channel
)
.
AddStringArg
(
"activation"
,
"LEAKYRELU"
)
.
AddFloatArg
(
"leakyrelu_coefficient"
,
0.1
f
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
...
...
mace/ops/fully_connected.cc
浏览文件 @
10ba3061
...
...
@@ -41,10 +41,13 @@ class FullyConnectedOpBase : public Operation {
activation_
(
ops
::
StringToActivationType
(
Operation
::
GetOptionalArg
<
std
::
string
>
(
"activation"
,
"NOOP"
))),
relux_max_limit_
(
Operation
::
GetOptionalArg
<
float
>
(
"max_limit"
,
0.0
f
))
{}
relux_max_limit_
(
Operation
::
GetOptionalArg
<
float
>
(
"max_limit"
,
0.0
f
)),
leakyrelu_coefficient_
(
Operation
::
GetOptionalArg
<
float
>
(
"leakyrelu_coefficient"
,
0.0
f
))
{}
protected:
const
ActivationType
activation_
;
const
float
relux_max_limit_
;
const
float
leakyrelu_coefficient_
;
MACE_OP_INPUT_TAGS
(
INPUT
,
WEIGHT
,
BIAS
);
MACE_OP_OUTPUT_TAGS
(
OUTPUT
);
...
...
@@ -104,7 +107,7 @@ class FullyConnectedOp<DeviceType::CPU, float> : public FullyConnectedOpBase {
}
DoActivation
(
output_ptr
,
output_ptr
,
output
->
size
(),
activation_
,
relux_max_limit_
);
relux_max_limit_
,
leakyrelu_coefficient_
);
return
MaceStatus
::
MACE_SUCCESS
;
}
...
...
@@ -226,7 +229,8 @@ class FullyConnectedOp<DeviceType::GPU, T> : public FullyConnectedOpBase {
"The shape of Weight: "
,
MakeString
(
weight
->
shape
()),
" don't match."
);
return
kernel_
->
Compute
(
context
,
input
,
weight
,
bias
,
activation_
,
relux_max_limit_
,
output
);
context
,
input
,
weight
,
bias
,
activation_
,
relux_max_limit_
,
leakyrelu_coefficient_
,
output
);
}
private:
...
...
mace/ops/fully_connected_test.cc
浏览文件 @
10ba3061
...
...
@@ -123,10 +123,11 @@ void Random(const index_t batch,
// Add input data
net
.
AddRandomInput
<
DeviceType
::
GPU
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
}
);
{
batch
,
height
,
width
,
channels
},
false
,
false
);
net
.
AddRandomInput
<
DeviceType
::
GPU
,
float
>
(
"Weight"
,
{
out_channel
,
channels
,
height
,
width
},
true
);
net
.
AddRandomInput
<
DeviceType
::
GPU
,
float
>
(
"Bias"
,
{
out_channel
},
true
);
"Weight"
,
{
out_channel
,
channels
,
height
,
width
},
true
,
false
);
net
.
AddRandomInput
<
DeviceType
::
GPU
,
float
>
(
"Bias"
,
{
out_channel
},
true
,
false
);
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
NHWC
,
"InputNCHW"
,
NCHW
);
...
...
@@ -135,6 +136,8 @@ void Random(const index_t batch,
.
Input
(
"Weight"
)
.
Input
(
"Bias"
)
.
Output
(
"OutputNCHW"
)
.
AddStringArg
(
"activation"
,
"LEAKYRELU"
)
.
AddFloatArg
(
"leakyrelu_coefficient"
,
0.1
f
)
.
Finalize
(
net
.
NewOperatorDef
());
// run cpu
...
...
@@ -152,6 +155,8 @@ void Random(const index_t batch,
.
Input
(
"Weight"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
AddStringArg
(
"activation"
,
"LEAKYRELU"
)
.
AddFloatArg
(
"leakyrelu_coefficient"
,
0.1
f
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
...
...
mace/ops/opencl/buffer/conv_2d.h
浏览文件 @
10ba3061
...
...
@@ -38,6 +38,7 @@ extern MaceStatus Conv2d1x1(OpContext *context,
const
DataType
dt
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
bool
input_changed
,
Tensor
*
output
,
StatsFuture
*
future
);
...
...
@@ -52,6 +53,7 @@ extern MaceStatus Conv2dGeneral(OpContext *context,
const
DataType
dt
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
bool
input_changed
,
Tensor
*
output
,
StatsFuture
*
future
);
...
...
@@ -81,6 +83,7 @@ class Conv2dKernel : public OpenCLConv2dKernel {
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
int
winograd_blk_size
,
Tensor
*
output
)
override
;
...
...
@@ -120,6 +123,7 @@ MaceStatus Conv2dKernel<T>::Compute(
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
int
winograd_blk_size
,
Tensor
*
output
)
{
MACE_UNUSED
(
winograd_blk_size
);
...
...
@@ -221,14 +225,14 @@ MaceStatus Conv2dKernel<T>::Compute(
return
conv2d
::
Conv2d1x1
(
context
,
&
kernels_
[
1
],
pad_input
,
filter
,
bias
,
strides
,
DataTypeToEnum
<
T
>::
v
(),
activation
,
relux_max_limit
,
input_changed
,
output
,
&
conv_future
);
leakyrelu_coefficient
,
input_changed
,
output
,
&
conv_future
);
};
}
else
{
conv_func
=
[
&
](
const
Tensor
*
pad_input
,
Tensor
*
output
)
->
MaceStatus
{
return
conv2d
::
Conv2dGeneral
(
context
,
&
kernels_
[
1
],
pad_input
,
filter
,
bias
,
strides
,
dilations
,
DataTypeToEnum
<
T
>::
v
(),
activation
,
relux_max_limit
,
input_changed
,
output
,
&
conv_future
);
leakyrelu_coefficient
,
input_changed
,
output
,
&
conv_future
);
};
}
MACE_RETURN_IF_ERROR
(
conv_func
(
padded_input_ptr
,
output
));
...
...
mace/ops/opencl/buffer/conv_2d_1x1.cc
浏览文件 @
10ba3061
...
...
@@ -32,6 +32,7 @@ MaceStatus Conv2d1x1(OpContext *context,
const
DataType
dt
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
bool
input_changed
,
Tensor
*
output
,
StatsFuture
*
future
)
{
...
...
@@ -71,6 +72,9 @@ MaceStatus Conv2d1x1(OpContext *context,
case
SIGMOID
:
built_options
.
emplace
(
"-DUSE_SIGMOID"
);
break
;
case
LEAKYRELU
:
built_options
.
emplace
(
"-DUSE_LEAKYRELU"
);
break
;
default:
LOG
(
FATAL
)
<<
"Unknown activation type: "
<<
activation
;
}
...
...
@@ -106,6 +110,7 @@ MaceStatus Conv2d1x1(OpContext *context,
kernel
->
setArg
(
idx
++
,
strides
[
0
]);
kernel
->
setArg
(
idx
++
,
strides
[
1
]);
kernel
->
setArg
(
idx
++
,
relux_max_limit
);
kernel
->
setArg
(
idx
++
,
leakyrelu_coefficient
);
kernel
->
setArg
(
idx
++
,
*
(
output
->
opencl_buffer
()));
}
...
...
mace/ops/opencl/buffer/conv_2d_general.cc
浏览文件 @
10ba3061
...
...
@@ -33,6 +33,7 @@ MaceStatus Conv2dGeneral(OpContext *context,
const
DataType
dt
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
bool
input_changed
,
Tensor
*
output
,
StatsFuture
*
future
)
{
...
...
@@ -76,6 +77,9 @@ MaceStatus Conv2dGeneral(OpContext *context,
case
SIGMOID
:
built_options
.
emplace
(
"-DUSE_SIGMOID"
);
break
;
case
LEAKYRELU
:
built_options
.
emplace
(
"-DUSE_LEAKYRELU"
);
break
;
default:
LOG
(
FATAL
)
<<
"Unknown activation type: "
<<
activation
;
}
...
...
@@ -120,6 +124,7 @@ MaceStatus Conv2dGeneral(OpContext *context,
kernel
->
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
dilations
[
1
]
*
in_channel
));
kernel
->
setArg
(
idx
++
,
relux_max_limit
);
kernel
->
setArg
(
idx
++
,
leakyrelu_coefficient
);
kernel
->
setArg
(
idx
++
,
*
(
output
->
opencl_buffer
()));
}
...
...
mace/ops/opencl/buffer/depthwise_conv2d.cc
浏览文件 @
10ba3061
...
...
@@ -33,6 +33,7 @@ MaceStatus DepthwiseConv2d(OpContext *context,
const
DataType
dt
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
bool
input_changed
,
Tensor
*
output
,
StatsFuture
*
future
)
{
...
...
@@ -76,6 +77,9 @@ MaceStatus DepthwiseConv2d(OpContext *context,
case
SIGMOID
:
built_options
.
emplace
(
"-DUSE_SIGMOID"
);
break
;
case
LEAKYRELU
:
built_options
.
emplace
(
"-DUSE_LEAKYRELU"
);
break
;
default:
LOG
(
FATAL
)
<<
"Unknown activation type: "
<<
activation
;
}
...
...
@@ -116,6 +120,7 @@ MaceStatus DepthwiseConv2d(OpContext *context,
kernel
->
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
dilations
[
1
]
*
in_channel
));
kernel
->
setArg
(
idx
++
,
relux_max_limit
);
kernel
->
setArg
(
idx
++
,
leakyrelu_coefficient
);
kernel
->
setArg
(
idx
++
,
*
(
output
->
opencl_buffer
()));
}
...
...
mace/ops/opencl/buffer/depthwise_conv2d.h
浏览文件 @
10ba3061
...
...
@@ -39,6 +39,7 @@ MaceStatus DepthwiseConv2d(OpContext *context,
const
DataType
dt
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
bool
input_changed
,
Tensor
*
output
,
StatsFuture
*
future
);
...
...
@@ -60,6 +61,7 @@ class DepthwiseConv2dKernel : public OpenCLDepthwiseConv2dKernel {
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
Tensor
*
output
)
override
;
private:
...
...
@@ -81,6 +83,7 @@ MaceStatus DepthwiseConv2dKernel<T>::Compute(
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
Tensor
*
output
)
{
StatsFuture
pad_future
,
dw_conv_future
;
index_t
filter_w
=
filter
->
dim
(
3
);
...
...
@@ -175,7 +178,7 @@ MaceStatus DepthwiseConv2dKernel<T>::Compute(
depthwise
::
DepthwiseConv2d
(
context
,
&
kernels_
[
1
],
padded_input_ptr
,
filter
,
bias
,
strides
,
dilations
,
DataTypeToEnum
<
T
>::
v
(),
activation
,
relux_max_limit
,
input_changed
,
output
,
&
dw_conv_future
));
leakyrelu_coefficient
,
input_changed
,
output
,
&
dw_conv_future
));
MergeMultipleFutureWaitFn
({
pad_future
,
dw_conv_future
},
context
->
future
());
return
MaceStatus
::
MACE_SUCCESS
;
}
...
...
mace/ops/opencl/cl/activation.cl
浏览文件 @
10ba3061
...
...
@@ -7,6 +7,7 @@ __kernel void activation(OUT_OF_RANGE_PARAMS
__read_only
image2d_t
alpha,
#
endif
__private
const
float
relux_max_limit,
__private
const
float
leakyrelu_coefficient,
__write_only
image2d_t
output
)
{
const
int
ch_blk
=
get_global_id
(
0
)
;
const
int
w
=
get_global_id
(
1
)
;
...
...
@@ -24,9 +25,9 @@ __kernel void activation(OUT_OF_RANGE_PARAMS
DATA_TYPE4
in
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
pos,
hb
))
;
#
ifdef
USE_PRELU
DATA_TYPE4
prelu_alpha
=
READ_IMAGET
(
alpha,
SAMPLER,
(
int2
)(
ch_blk,
0
))
;
DATA_TYPE4
out
=
do_activation
(
in,
prelu_alpha,
relux_max_limit
)
;
DATA_TYPE4
out
=
do_activation
(
in,
prelu_alpha,
relux_max_limit
,
leakyrelu_coefficient
)
;
#
else
DATA_TYPE4
out
=
do_activation
(
in,
relux_max_limit
)
;
DATA_TYPE4
out
=
do_activation
(
in,
relux_max_limit
,
leakyrelu_coefficient
)
;
#
endif
WRITE_IMAGET
(
output,
(
int2
)(
pos,
hb
)
,
out
)
;
...
...
mace/ops/opencl/cl/batch_norm.cl
浏览文件 @
10ba3061
...
...
@@ -11,7 +11,8 @@ __kernel void batch_norm(OUT_OF_RANGE_PARAMS
__private
const
float
epsilon,
#
endif
__write_only
image2d_t
output,
__private
const
float
relux_max_limit
)
{
__private
const
float
relux_max_limit,
__private
const
float
leakyrelu_coefficient
)
{
const
int
ch_blk
=
get_global_id
(
0
)
;
const
int
w
=
get_global_id
(
1
)
;
const
int
hb
=
get_global_id
(
2
)
;
...
...
@@ -43,8 +44,8 @@ __kernel void batch_norm(OUT_OF_RANGE_PARAMS
DATA_TYPE4 in = READ_IMAGET(input, SAMPLER, (int2)(pos, hb));
DATA_TYPE4 out = mad(in, bn_scale, bn_offset);
#if
defined(USE_
RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out
=
do_activation
(
out,
relux_max_limit
)
;
#if
defined(USE_RELU) || defined(USE_LEAKY
RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out
=
do_activation
(
out,
relux_max_limit
,
leakyrelu_coefficient
)
;
#
endif
WRITE_IMAGET
(
output,
(
int2
)(
pos,
hb
)
,
out
)
;
...
...
mace/ops/opencl/cl/common.h
浏览文件 @
10ba3061
...
...
@@ -86,7 +86,8 @@ inline DATA_TYPE4 do_activation(DATA_TYPE4 in,
#ifdef USE_PRELU
DATA_TYPE4
prelu_alpha
,
#endif
__private
const
float
relux_max_limit
)
{
__private
const
float
relux_max_limit
,
__private
const
float
leakyrelu_coefficient
)
{
DATA_TYPE4
out
;
#ifdef USE_RELU
out
=
fmax
(
in
,
(
DATA_TYPE
)
0
);
...
...
@@ -104,7 +105,7 @@ inline DATA_TYPE4 do_activation(DATA_TYPE4 in,
out
=
do_sigmoid
(
in
);
#endif
#ifdef USE_LEAKYRELU
out
=
fmax
(
in
,
(
DATA_TYPE
)
0
)
*
relux_max_limit
;
out
=
select
(
leakyrelu_coefficient
*
in
,
in
,
in
>=
(
DATA_TYPE
)
0
)
;
#endif
return
out
;
}
...
...
mace/ops/opencl/cl/conv_2d.cl
浏览文件 @
10ba3061
...
...
@@ -9,6 +9,7 @@ __kernel void conv_2d(OUT_OF_RANGE_PARAMS
#
endif
__write_only
image2d_t
output,
__private
const
float
relux_max_limit,
__private
const
float
leakyrelu_coefficient,
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
int
in_ch_blks,
...
...
@@ -123,11 +124,11 @@ __kernel void conv_2d(OUT_OF_RANGE_PARAMS
}
}
#if
defined(USE_
RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
)
;
out1
=
do_activation
(
out1,
relux_max_limit
)
;
out2
=
do_activation
(
out2,
relux_max_limit
)
;
out3
=
do_activation
(
out3,
relux_max_limit
)
;
#if
defined(USE_RELU) || defined(USE_LEAKY
RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
,
leakyrelu_coefficient
)
;
out1
=
do_activation
(
out1,
relux_max_limit
,
leakyrelu_coefficient
)
;
out2
=
do_activation
(
out2,
relux_max_limit
,
leakyrelu_coefficient
)
;
out3
=
do_activation
(
out3,
relux_max_limit
,
leakyrelu_coefficient
)
;
#
endif
const
int
out_x_base
=
mul24
(
out_ch_blk,
out_width
)
;
...
...
mace/ops/opencl/cl/conv_2d_1x1.cl
浏览文件 @
10ba3061
...
...
@@ -9,6 +9,7 @@ __kernel void conv_2d_1x1(OUT_OF_RANGE_PARAMS
#
endif
__write_only
image2d_t
output,
__private
const
float
relux_max_limit,
__private
const
float
leakyrelu_coefficient,
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
int
in_ch_blks,
...
...
@@ -96,11 +97,11 @@ __kernel void conv_2d_1x1(OUT_OF_RANGE_PARAMS
filter_x_base += 4;
}
#if
defined(USE_
RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
)
;
out1
=
do_activation
(
out1,
relux_max_limit
)
;
out2
=
do_activation
(
out2,
relux_max_limit
)
;
out3
=
do_activation
(
out3,
relux_max_limit
)
;
#if
defined(USE_RELU) || defined(USE_LEAKY
RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
,
leakyrelu_coefficient
)
;
out1
=
do_activation
(
out1,
relux_max_limit
,
leakyrelu_coefficient
)
;
out2
=
do_activation
(
out2,
relux_max_limit
,
leakyrelu_coefficient
)
;
out3
=
do_activation
(
out3,
relux_max_limit
,
leakyrelu_coefficient
)
;
#
endif
const
int
out_x_base
=
mul24
(
out_ch_blk,
width
)
;
...
...
mace/ops/opencl/cl/conv_2d_1x1_buffer.cl
浏览文件 @
10ba3061
...
...
@@ -17,6 +17,7 @@ __kernel void conv2d(BUFFER_OUT_OF_RANGE_PARAMS
__private
const
int
stride_h,
__private
const
int
stride_w,
__private
const
float
relux_max_limit,
__private
const
float
leakyrelu_coefficient,
__global
OUT_DATA_TYPE
*output
)
{
const
int
out_wc_blk_idx
=
get_global_id
(
0
)
;
const
int
out_hb_idx
=
get_global_id
(
1
)
;
...
...
@@ -79,9 +80,9 @@ __kernel void conv2d(BUFFER_OUT_OF_RANGE_PARAMS
in_offset += 4;
}
#if
defined(USE_
RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
)
;
out1
=
do_activation
(
out1,
relux_max_limit
)
;
#if
defined(USE_RELU) || defined(USE_LEAKY
RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
,
leakyrelu_coefficient
)
;
out1
=
do_activation
(
out1,
relux_max_limit
,
leakyrelu_coefficient
)
;
#
endif
int
out_offset
=
mad24
(
mad24
(
mad24
(
batch_idx,
out_height,
out_height_idx
)
,
...
...
mace/ops/opencl/cl/conv_2d_3x3.cl
浏览文件 @
10ba3061
...
...
@@ -9,6 +9,7 @@ __kernel void conv_2d_3x3(OUT_OF_RANGE_PARAMS
#
endif
__write_only
image2d_t
output,
__private
const
float
relux_max_limit,
__private
const
float
leakyrelu_coefficient,
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
int
in_ch_blks,
...
...
@@ -128,12 +129,12 @@ __kernel void conv_2d_3x3(OUT_OF_RANGE_PARAMS
}
}
#if
defined(USE_
RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
)
;
out1
=
do_activation
(
out1,
relux_max_limit
)
;
out2
=
do_activation
(
out2,
relux_max_limit
)
;
out3
=
do_activation
(
out3,
relux_max_limit
)
;
out4
=
do_activation
(
out4,
relux_max_limit
)
;
#if
defined(USE_RELU) || defined(USE_LEAKY
RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
,
leakyrelu_coefficient
)
;
out1
=
do_activation
(
out1,
relux_max_limit
,
leakyrelu_coefficient
)
;
out2
=
do_activation
(
out2,
relux_max_limit
,
leakyrelu_coefficient
)
;
out3
=
do_activation
(
out3,
relux_max_limit
,
leakyrelu_coefficient
)
;
out4
=
do_activation
(
out4,
relux_max_limit
,
leakyrelu_coefficient
)
;
#
endif
const
int
out_x_base
=
mul24
(
out_ch_blk,
out_width
)
;
...
...
mace/ops/opencl/cl/conv_2d_buffer.cl
浏览文件 @
10ba3061
...
...
@@ -22,6 +22,7 @@ __kernel void conv2d(BUFFER_OUT_OF_RANGE_PARAMS
__private
const
int
dilated_h_offset,
__private
const
int
dilated_w_offset,
__private
const
float
relux_max_limit,
__private
const
float
leakyrelu_coefficient,
__global
OUT_DATA_TYPE
*output
)
{
const
int
out_wc_blk_idx
=
get_global_id
(
0
)
;
const
int
out_hb_idx
=
get_global_id
(
1
)
;
...
...
@@ -107,11 +108,11 @@ __kernel void conv2d(BUFFER_OUT_OF_RANGE_PARAMS
}
}
#if
defined(USE_
RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
)
;
out1
=
do_activation
(
out1,
relux_max_limit
)
;
out2
=
do_activation
(
out2,
relux_max_limit
)
;
out3
=
do_activation
(
out3,
relux_max_limit
)
;
#if
defined(USE_RELU) || defined(USE_LEAKY
RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
,
leakyrelu_coefficient
)
;
out1
=
do_activation
(
out1,
relux_max_limit
,
leakyrelu_coefficient
)
;
out2
=
do_activation
(
out2,
relux_max_limit
,
leakyrelu_coefficient
)
;
out3
=
do_activation
(
out3,
relux_max_limit
,
leakyrelu_coefficient
)
;
#
endif
int
out_offset
=
mad24
(
mad24
(
mad24
(
batch_idx,
out_height,
out_height_idx
)
,
...
...
mace/ops/opencl/cl/deconv_2d.cl
浏览文件 @
10ba3061
...
...
@@ -9,6 +9,7 @@ __kernel void deconv_2d(OUT_OF_RANGE_PARAMS
#
endif
__write_only
image2d_t
output,
__private
const
float
relux_max_limit,
__private
const
float
leakyrelu_coefficient,
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
int
in_channels,
...
...
@@ -127,12 +128,12 @@ __kernel void deconv_2d(OUT_OF_RANGE_PARAMS
}
}
#if
defined(USE_
RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
)
;
out1
=
do_activation
(
out1,
relux_max_limit
)
;
out2
=
do_activation
(
out2,
relux_max_limit
)
;
out3
=
do_activation
(
out3,
relux_max_limit
)
;
out4
=
do_activation
(
out4,
relux_max_limit
)
;
#if
defined(USE_RELU) || defined(USE_LEAKY
RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
,
leakyrelu_coefficient
)
;
out1
=
do_activation
(
out1,
relux_max_limit
,
leakyrelu_coefficient
)
;
out2
=
do_activation
(
out2,
relux_max_limit
,
leakyrelu_coefficient
)
;
out3
=
do_activation
(
out3,
relux_max_limit
,
leakyrelu_coefficient
)
;
out4
=
do_activation
(
out4,
relux_max_limit
,
leakyrelu_coefficient
)
;
#
endif
int2
out_pos
;
...
...
mace/ops/opencl/cl/depthwise_conv2d.cl
浏览文件 @
10ba3061
...
...
@@ -10,6 +10,7 @@ __kernel void depthwise_conv2d(OUT_OF_RANGE_PARAMS
#
endif
__write_only
image2d_t
output,
__private
const
float
relux_max_limit,
__private
const
float
leakyrelu_coefficient,
__private
const
short
in_height,
__private
const
short
in_width,
__private
const
short
in_ch_blks,
...
...
@@ -112,11 +113,11 @@ __kernel void depthwise_conv2d(OUT_OF_RANGE_PARAMS
in_hb_idx += dilation_h;
}
#if
defined(USE_
RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit);
out1 = do_activation(out1, relux_max_limit);
out2 = do_activation(out2, relux_max_limit);
out3 = do_activation(out3, relux_max_limit);
#if
defined(USE_RELU) || defined(USE_LEAKY
RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit
, leakyrelu_coefficient
);
out1 = do_activation(out1, relux_max_limit
, leakyrelu_coefficient
);
out2 = do_activation(out2, relux_max_limit
, leakyrelu_coefficient
);
out3 = do_activation(out3, relux_max_limit
, leakyrelu_coefficient
);
#endif
const short out_x_base = mul24(out_ch_blk, out_width);
...
...
@@ -145,6 +146,7 @@ __kernel void depthwise_conv2d_s1(OUT_OF_RANGE_PARAMS
#endif
__write_only image2d_t output,
__private const DATA_TYPE relux_max_limit,
__private const DATA_TYPE leakyrelu_coefficient,
__private const short in_height,
__private const short in_width,
__private const short in_ch_blks,
...
...
@@ -238,11 +240,11 @@ __kernel void depthwise_conv2d_s1(OUT_OF_RANGE_PARAMS
in_hb_idx += 1;
}
#if
defined(USE_
RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
)
;
out1
=
do_activation
(
out1,
relux_max_limit
)
;
out2
=
do_activation
(
out2,
relux_max_limit
)
;
out3
=
do_activation
(
out3,
relux_max_limit
)
;
#if
defined(USE_RELU) || defined(USE_LEAKY
RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
,
leakyrelu_coefficient
)
;
out1
=
do_activation
(
out1,
relux_max_limit
,
leakyrelu_coefficient
)
;
out2
=
do_activation
(
out2,
relux_max_limit
,
leakyrelu_coefficient
)
;
out3
=
do_activation
(
out3,
relux_max_limit
,
leakyrelu_coefficient
)
;
#
endif
const
short
out_x_base
=
mul24
(
out_ch_blk,
out_width
)
;
...
...
mace/ops/opencl/cl/depthwise_conv2d_buffer.cl
浏览文件 @
10ba3061
...
...
@@ -22,6 +22,7 @@ __kernel void depthwise_conv2d(BUFFER_OUT_OF_RANGE_PARAMS
__private
const
int
dilated_h_offset,
__private
const
int
dilated_w_offset,
__private
const
float
relux_max_limit,
__private
const
float
leakyrelu_coefficient,
__global
OUT_DATA_TYPE
*output
)
{
const
int
out_wc_blk_idx
=
get_global_id
(
0
)
;
const
int
out_hb_idx
=
get_global_id
(
1
)
;
...
...
@@ -85,11 +86,11 @@ __kernel void depthwise_conv2d(BUFFER_OUT_OF_RANGE_PARAMS
}
}
#if
defined(USE_
RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
)
;
out1
=
do_activation
(
out1,
relux_max_limit
)
;
out2
=
do_activation
(
out2,
relux_max_limit
)
;
out3
=
do_activation
(
out3,
relux_max_limit
)
;
#if
defined(USE_RELU) || defined(USE_LEAKY
RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
,
leakyrelu_coefficient
)
;
out1
=
do_activation
(
out1,
relux_max_limit
,
leakyrelu_coefficient
)
;
out2
=
do_activation
(
out2,
relux_max_limit
,
leakyrelu_coefficient
)
;
out3
=
do_activation
(
out3,
relux_max_limit
,
leakyrelu_coefficient
)
;
#
endif
int
out_offset
=
mad24
(
mad24
(
mad24
(
batch_idx,
out_height,
out_height_idx
)
,
...
...
mace/ops/opencl/cl/depthwise_deconv2d.cl
浏览文件 @
10ba3061
...
...
@@ -9,6 +9,7 @@ __kernel void depthwise_deconv2d(OUT_OF_RANGE_PARAMS
#
endif
__write_only
image2d_t
output,
__private
const
float
relux_max_limit,
__private
const
float
leakyrelu_coefficient,
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
int
out_height,
...
...
@@ -108,12 +109,12 @@ __kernel void depthwise_deconv2d(OUT_OF_RANGE_PARAMS
}
}
#if
defined(USE_
RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
)
;
out1
=
do_activation
(
out1,
relux_max_limit
)
;
out2
=
do_activation
(
out2,
relux_max_limit
)
;
out3
=
do_activation
(
out3,
relux_max_limit
)
;
out4
=
do_activation
(
out4,
relux_max_limit
)
;
#if
defined(USE_RELU) || defined(USE_LEAKY
RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
,
leakyrelu_coefficient
)
;
out1
=
do_activation
(
out1,
relux_max_limit
,
leakyrelu_coefficient
)
;
out2
=
do_activation
(
out2,
relux_max_limit
,
leakyrelu_coefficient
)
;
out3
=
do_activation
(
out3,
relux_max_limit
,
leakyrelu_coefficient
)
;
out4
=
do_activation
(
out4,
relux_max_limit
,
leakyrelu_coefficient
)
;
#
endif
...
...
@@ -146,4 +147,4 @@ __kernel void depthwise_deconv2d(OUT_OF_RANGE_PARAMS
out_pos.x
+=
stride_w
;
WRITE_IMAGET
(
output,
out_pos,
out4
)
;
}
}
\ No newline at end of file
}
mace/ops/opencl/cl/fully_connected.cl
浏览文件 @
10ba3061
...
...
@@ -12,7 +12,8 @@ __kernel void fully_connected(OUT_OF_RANGE_PARAMS
__private
const
int
input_height,
__private
const
int
input_width,
__private
const
int
input_channel,
__private
const
float
relux_max_limit
)
{
__private
const
float
relux_max_limit,
__private
const
float
leakyrelu_coefficient
)
{
const
int
batch_idx
=
get_global_id
(
0
)
;
const
int
out_blk_idx
=
get_global_id
(
1
)
;
const
int
input_chan_blk
=
(
input_channel
+
3
)
>>
2
;
...
...
@@ -56,8 +57,8 @@ __kernel void fully_connected(OUT_OF_RANGE_PARAMS
input_coord.y++;
}
#if
defined(USE_
RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
result = do_activation(result, relux_max_limit);
#if
defined(USE_RELU) || defined(USE_LEAKY
RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
result = do_activation(result, relux_max_limit
, leakyrelu_coefficient
);
#endif
WRITE_IMAGET(output, (int2)(out_blk_idx, batch_idx), result);
...
...
@@ -77,7 +78,8 @@ __kernel void fully_connected_width(OUT_OF_RANGE_PARAMS
__private const int input_width,
__private const int in_chan_blks,
__private const int out_blks,
__private const float relux_max_limit) {
__private const float relux_max_limit,
__private const float leakyrelu_coefficient) {
const int inter_out_idx = get_global_id(0);
const int width_blk_idx = get_global_id(1);
const int width_blk_count = global_size_dim1;
...
...
@@ -147,8 +149,8 @@ __kernel void fully_connected_width(OUT_OF_RANGE_PARAMS
inter_idx += 4;
}
#if
defined(USE_
RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
result
=
do_activation
(
result,
relux_max_limit
)
;
#if
defined(USE_RELU) || defined(USE_LEAKY
RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
result
=
do_activation
(
result,
relux_max_limit
,
leakyrelu_coefficient
)
;
#
endif
WRITE_IMAGET
(
output,
(
int2
)(
out_blk_idx,
batch_idx
)
,
result
)
;
...
...
mace/ops/opencl/cl/winograd_transform.cl
浏览文件 @
10ba3061
...
...
@@ -127,7 +127,8 @@ __kernel void winograd_inverse_transform_2x2(OUT_OF_RANGE_PARAMS
__private const int out_width,
__private const int round_hw,
__private const int round_w,
__private const float relux_max_limit) {
__private const float relux_max_limit,
__private const float leakyrelu_coefficient) {
const int width_idx = get_global_id(0);
const int height_idx = get_global_id(1);
...
...
@@ -203,11 +204,11 @@ __kernel void winograd_inverse_transform_2x2(OUT_OF_RANGE_PARAMS
#endif
#if
defined(USE_
RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
in0[0] = do_activation(in0[0], relux_max_limit);
in0[1] = do_activation(in0[1], relux_max_limit);
in1[0] = do_activation(in1[0], relux_max_limit);
in1[1] = do_activation(in1[1], relux_max_limit);
#if
defined(USE_RELU) || defined(USE_LEAKY
RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
in0[0] = do_activation(in0[0], relux_max_limit
, leakyrelu_coefficient
);
in0[1] = do_activation(in0[1], relux_max_limit
, leakyrelu_coefficient
);
in1[0] = do_activation(in1[0], relux_max_limit
, leakyrelu_coefficient
);
in1[1] = do_activation(in1[1], relux_max_limit
, leakyrelu_coefficient
);
#endif
WRITE_IMAGET(output, (int2)(coord_x, coord_y), in0[0]);
...
...
@@ -395,7 +396,8 @@ __kernel void winograd_inverse_transform_4x4(OUT_OF_RANGE_PARAMS
__private const int out_width,
__private const int round_hw,
__private const int round_w,
__private const float relux_max_limit) {
__private const float relux_max_limit,
__private const float leakyrelu_coefficient) {
const int width_idx = get_global_id(0);
const int height_idx = get_global_id(1);
...
...
@@ -515,23 +517,23 @@ __kernel void winograd_inverse_transform_4x4(OUT_OF_RANGE_PARAMS
out3[3] += bias_value;
#endif
#if
defined(USE_
RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out0[0]
=
do_activation
(
out0[0],
relux_max_limit
)
;
out0[1]
=
do_activation
(
out0[1],
relux_max_limit
)
;
out0[2]
=
do_activation
(
out0[2],
relux_max_limit
)
;
out0[3]
=
do_activation
(
out0[3],
relux_max_limit
)
;
out1[0]
=
do_activation
(
out1[0],
relux_max_limit
)
;
out1[1]
=
do_activation
(
out1[1],
relux_max_limit
)
;
out1[2]
=
do_activation
(
out1[2],
relux_max_limit
)
;
out1[3]
=
do_activation
(
out1[3],
relux_max_limit
)
;
out2[0]
=
do_activation
(
out2[0],
relux_max_limit
)
;
out2[1]
=
do_activation
(
out2[1],
relux_max_limit
)
;
out2[2]
=
do_activation
(
out2[2],
relux_max_limit
)
;
out2[3]
=
do_activation
(
out2[3],
relux_max_limit
)
;
out3[0]
=
do_activation
(
out3[0],
relux_max_limit
)
;
out3[1]
=
do_activation
(
out3[1],
relux_max_limit
)
;
out3[2]
=
do_activation
(
out3[2],
relux_max_limit
)
;
out3[3]
=
do_activation
(
out3[3],
relux_max_limit
)
;
#if
defined(USE_RELU) || defined(USE_LEAKY
RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out0[0]
=
do_activation
(
out0[0],
relux_max_limit
,
leakyrelu_coefficient
)
;
out0[1]
=
do_activation
(
out0[1],
relux_max_limit
,
leakyrelu_coefficient
)
;
out0[2]
=
do_activation
(
out0[2],
relux_max_limit
,
leakyrelu_coefficient
)
;
out0[3]
=
do_activation
(
out0[3],
relux_max_limit
,
leakyrelu_coefficient
)
;
out1[0]
=
do_activation
(
out1[0],
relux_max_limit
,
leakyrelu_coefficient
)
;
out1[1]
=
do_activation
(
out1[1],
relux_max_limit
,
leakyrelu_coefficient
)
;
out1[2]
=
do_activation
(
out1[2],
relux_max_limit
,
leakyrelu_coefficient
)
;
out1[3]
=
do_activation
(
out1[3],
relux_max_limit
,
leakyrelu_coefficient
)
;
out2[0]
=
do_activation
(
out2[0],
relux_max_limit
,
leakyrelu_coefficient
)
;
out2[1]
=
do_activation
(
out2[1],
relux_max_limit
,
leakyrelu_coefficient
)
;
out2[2]
=
do_activation
(
out2[2],
relux_max_limit
,
leakyrelu_coefficient
)
;
out2[3]
=
do_activation
(
out2[3],
relux_max_limit
,
leakyrelu_coefficient
)
;
out3[0]
=
do_activation
(
out3[0],
relux_max_limit
,
leakyrelu_coefficient
)
;
out3[1]
=
do_activation
(
out3[1],
relux_max_limit
,
leakyrelu_coefficient
)
;
out3[2]
=
do_activation
(
out3[2],
relux_max_limit
,
leakyrelu_coefficient
)
;
out3[3]
=
do_activation
(
out3[3],
relux_max_limit
,
leakyrelu_coefficient
)
;
#
endif
const
int
num
=
min
(
4
,
out_width
-
out_width_idx
)
;
...
...
@@ -556,4 +558,4 @@ __kernel void winograd_inverse_transform_4x4(OUT_OF_RANGE_PARAMS
for
(
int
i
=
0
; i < num; ++i) {
WRITE_IMAGET
(
output,
(
int2
)(
coord_x
+
i,
coord_y
+
3
)
,
out3[i]
)
;
}
}
\ No newline at end of file
}
mace/ops/opencl/conv_2d.h
浏览文件 @
10ba3061
...
...
@@ -45,6 +45,7 @@ class OpenCLConv2dKernel {
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
int
winograd_blk_size
,
Tensor
*
output
)
=
0
;
MACE_EMPTY_VIRTUAL_DESTRUCTOR
(
OpenCLConv2dKernel
);
...
...
mace/ops/opencl/deconv_2d.h
浏览文件 @
10ba3061
...
...
@@ -36,6 +36,7 @@ class OpenCLDeconv2dKernel {
const
int
*
padding_data
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
std
::
vector
<
index_t
>
&
output_shape
,
Tensor
*
output
)
=
0
;
MACE_EMPTY_VIRTUAL_DESTRUCTOR
(
OpenCLDeconv2dKernel
);
...
...
mace/ops/opencl/depthwise_conv2d.h
浏览文件 @
10ba3061
...
...
@@ -38,6 +38,7 @@ class OpenCLDepthwiseConv2dKernel {
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
Tensor
*
output
)
=
0
;
MACE_EMPTY_VIRTUAL_DESTRUCTOR
(
OpenCLDepthwiseConv2dKernel
);
};
...
...
mace/ops/opencl/depthwise_deconv2d.h
浏览文件 @
10ba3061
...
...
@@ -39,6 +39,7 @@ class OpenCLDepthwiseDeconv2dKernel {
const
int
group
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
std
::
vector
<
index_t
>
&
output_shape
,
Tensor
*
output
)
=
0
;
MACE_EMPTY_VIRTUAL_DESTRUCTOR
(
OpenCLDepthwiseDeconv2dKernel
);
...
...
mace/ops/opencl/fully_connected.h
浏览文件 @
10ba3061
...
...
@@ -35,6 +35,7 @@ class OpenCLFullyConnectedKernel {
const
Tensor
*
bias
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
Tensor
*
output
)
=
0
;
MACE_EMPTY_VIRTUAL_DESTRUCTOR
(
OpenCLFullyConnectedKernel
);
};
...
...
mace/ops/opencl/image/activation.h
浏览文件 @
10ba3061
...
...
@@ -35,8 +35,10 @@ template <typename T>
class
ActivationKernel
:
public
OpenCLActivationKernel
{
public:
ActivationKernel
(
ActivationType
type
,
T
relux_max_limit
)
:
activation_
(
type
),
relux_max_limit_
(
relux_max_limit
)
{}
T
relux_max_limit
,
T
leakyrelu_coefficient
)
:
activation_
(
type
),
relux_max_limit_
(
relux_max_limit
),
leakyrelu_coefficient_
(
leakyrelu_coefficient
)
{}
MaceStatus
Compute
(
OpContext
*
context
,
...
...
@@ -47,6 +49,7 @@ class ActivationKernel : public OpenCLActivationKernel {
private:
ActivationType
activation_
;
T
relux_max_limit_
;
T
leakyrelu_coefficient_
;
cl
::
Kernel
kernel_
;
uint32_t
kwg_size_
;
std
::
vector
<
index_t
>
input_shape_
;
...
...
@@ -128,6 +131,7 @@ MaceStatus ActivationKernel<T>::Compute(
kernel_
.
setArg
(
idx
++
,
*
(
alpha
->
opencl_image
()));
}
kernel_
.
setArg
(
idx
++
,
static_cast
<
float
>
(
relux_max_limit_
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
float
>
(
leakyrelu_coefficient_
));
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
input_shape_
=
input
->
shape
();
...
...
mace/ops/opencl/image/batch_norm.h
浏览文件 @
10ba3061
...
...
@@ -37,7 +37,8 @@ class BatchNormKernel : public OpenCLBatchNormKernel {
BatchNormKernel
(
const
float
epsilon
,
const
ActivationType
activation
,
const
float
relux_max_limit
);
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
);
MaceStatus
Compute
(
OpContext
*
context
,
const
Tensor
*
input
,
const
Tensor
*
scale
,
...
...
@@ -50,6 +51,7 @@ class BatchNormKernel : public OpenCLBatchNormKernel {
const
float
epsilon_
;
const
ActivationType
activation_
;
const
float
relux_max_limit_
;
const
float
leakyrelu_coefficient_
;
cl
::
Kernel
kernel_
;
uint32_t
kwg_size_
;
std
::
vector
<
index_t
>
input_shape_
;
...
...
@@ -58,10 +60,12 @@ class BatchNormKernel : public OpenCLBatchNormKernel {
template
<
typename
T
>
BatchNormKernel
<
T
>::
BatchNormKernel
(
const
float
epsilon
,
const
ActivationType
activation
,
const
float
relux_max_limit
)
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
)
:
epsilon_
(
epsilon
),
activation_
(
activation
),
relux_max_limit_
(
relux_max_limit
)
{}
relux_max_limit_
(
relux_max_limit
),
leakyrelu_coefficient_
(
leakyrelu_coefficient
)
{}
template
<
typename
T
>
MaceStatus
BatchNormKernel
<
T
>::
Compute
(
...
...
@@ -115,6 +119,9 @@ MaceStatus BatchNormKernel<T>::Compute(
case
SIGMOID
:
built_options
.
emplace
(
"-DUSE_SIGMOID"
);
break
;
case
LEAKYRELU
:
built_options
.
emplace
(
"-DUSE_LEAKYRELU"
);
break
;
default:
LOG
(
FATAL
)
<<
"Unknown activation type: "
<<
activation_
;
}
...
...
@@ -140,6 +147,7 @@ MaceStatus BatchNormKernel<T>::Compute(
}
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
relux_max_limit_
);
kernel_
.
setArg
(
idx
++
,
leakyrelu_coefficient_
);
input_shape_
=
input
->
shape
();
}
...
...
mace/ops/opencl/image/conv_2d.h
浏览文件 @
10ba3061
...
...
@@ -38,6 +38,7 @@ extern MaceStatus Conv2dK1x1(OpContext *context,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
...
...
@@ -53,6 +54,7 @@ extern MaceStatus Conv2dK3x3(OpContext *context,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
...
...
@@ -68,6 +70,7 @@ extern MaceStatus Conv2d(OpContext *context,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
...
...
@@ -81,6 +84,7 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context,
const
int
*
padding
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
DataType
dt
,
const
int
wino_blk_size
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
...
...
@@ -109,6 +113,7 @@ class Conv2dKernel : public OpenCLConv2dKernel {
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
int
wino_blk_size
,
Tensor
*
output
)
override
;
...
...
@@ -169,6 +174,7 @@ MaceStatus Conv2dKernel<T>::Compute(
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
int
wino_blk_size
,
Tensor
*
output
)
{
index_t
kernel_h
=
filter
->
dim
(
2
);
...
...
@@ -217,6 +223,7 @@ MaceStatus Conv2dKernel<T>::Compute(
paddings
.
data
(),
activation
,
relux_max_limit
,
leakyrelu_coefficient
,
DataTypeToEnum
<
T
>::
value
,
wino_blk_size
,
&
input_shape_
,
...
...
@@ -235,6 +242,7 @@ MaceStatus Conv2dKernel<T>::Compute(
dilations
,
activation
,
relux_max_limit
,
leakyrelu_coefficient
,
DataTypeToEnum
<
T
>::
value
,
&
input_shape_
,
output
,
...
...
@@ -252,6 +260,7 @@ MaceStatus Conv2dKernel<T>::Compute(
dilations
,
activation
,
relux_max_limit
,
leakyrelu_coefficient
,
DataTypeToEnum
<
T
>::
value
,
&
input_shape_
,
output
,
...
...
@@ -269,6 +278,7 @@ MaceStatus Conv2dKernel<T>::Compute(
dilations
,
activation
,
relux_max_limit
,
leakyrelu_coefficient
,
DataTypeToEnum
<
T
>::
value
,
&
input_shape_
,
output
,
...
...
mace/ops/opencl/image/conv_2d_1x1.cc
浏览文件 @
10ba3061
...
...
@@ -76,6 +76,7 @@ extern MaceStatus Conv2dK1x1(OpContext *context,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
...
...
@@ -125,6 +126,9 @@ extern MaceStatus Conv2dK1x1(OpContext *context,
case
SIGMOID
:
built_options
.
emplace
(
"-DUSE_SIGMOID"
);
break
;
case
LEAKYRELU
:
built_options
.
emplace
(
"-DUSE_LEAKYRELU"
);
break
;
default:
LOG
(
FATAL
)
<<
"Unknown activation type: "
<<
activation
;
}
...
...
@@ -154,6 +158,7 @@ extern MaceStatus Conv2dK1x1(OpContext *context,
kernel
->
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
// FIXME handle flexable data type: half not supported
kernel
->
setArg
(
idx
++
,
relux_max_limit
);
kernel
->
setArg
(
idx
++
,
leakyrelu_coefficient
);
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
input_height
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
input_width
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
input_channel_blocks
));
...
...
mace/ops/opencl/image/conv_2d_3x3.cc
浏览文件 @
10ba3061
...
...
@@ -69,6 +69,7 @@ extern MaceStatus Conv2dK3x3(OpContext *context,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
...
...
@@ -110,6 +111,9 @@ extern MaceStatus Conv2dK3x3(OpContext *context,
case
SIGMOID
:
built_options
.
emplace
(
"-DUSE_SIGMOID"
);
break
;
case
LEAKYRELU
:
built_options
.
emplace
(
"-DUSE_LEAKYRELU"
);
break
;
default:
LOG
(
FATAL
)
<<
"Unknown activation type: "
<<
activation
;
}
...
...
@@ -138,6 +142,7 @@ extern MaceStatus Conv2dK3x3(OpContext *context,
}
kernel
->
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
relux_max_limit
);
kernel
->
setArg
(
idx
++
,
leakyrelu_coefficient
);
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
input
->
dim
(
1
)));
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
input
->
dim
(
2
)));
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
input_channel_blocks
));
...
...
mace/ops/opencl/image/conv_2d_general.cc
浏览文件 @
10ba3061
...
...
@@ -77,6 +77,7 @@ extern MaceStatus Conv2d(OpContext *context,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
...
...
@@ -118,6 +119,9 @@ extern MaceStatus Conv2d(OpContext *context,
case
SIGMOID
:
built_options
.
emplace
(
"-DUSE_SIGMOID"
);
break
;
case
LEAKYRELU
:
built_options
.
emplace
(
"-DUSE_LEAKYRELU"
);
break
;
default:
LOG
(
FATAL
)
<<
"Unknown activation type: "
<<
activation
;
}
...
...
@@ -146,6 +150,7 @@ extern MaceStatus Conv2d(OpContext *context,
}
kernel
->
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
relux_max_limit
);
kernel
->
setArg
(
idx
++
,
leakyrelu_coefficient
);
kernel
->
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
input
->
dim
(
1
)));
kernel
->
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
input
->
dim
(
2
)));
kernel
->
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
input_channel_blocks
));
...
...
mace/ops/opencl/image/deconv_2d.h
浏览文件 @
10ba3061
...
...
@@ -42,6 +42,7 @@ class Deconv2dKernel : public OpenCLDeconv2dKernel {
const
int
*
padding_data
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
std
::
vector
<
index_t
>
&
output_shape
,
Tensor
*
output
)
override
;
...
...
@@ -61,6 +62,7 @@ MaceStatus Deconv2dKernel<T>::Compute(
const
int
*
padding_data
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
std
::
vector
<
index_t
>
&
output_shape
,
Tensor
*
output
)
{
std
::
vector
<
size_t
>
output_image_shape
;
...
...
@@ -119,6 +121,9 @@ MaceStatus Deconv2dKernel<T>::Compute(
case
SIGMOID
:
built_options
.
emplace
(
"-DUSE_SIGMOID"
);
break
;
case
LEAKYRELU
:
built_options
.
emplace
(
"-DUSE_LEAKYRELU"
);
break
;
default:
LOG
(
FATAL
)
<<
"Unknown activation type: "
<<
activation
;
}
...
...
@@ -146,6 +151,7 @@ MaceStatus Deconv2dKernel<T>::Compute(
}
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
relux_max_limit
);
kernel_
.
setArg
(
idx
++
,
leakyrelu_coefficient
);
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input
->
dim
(
1
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input
->
dim
(
2
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input
->
dim
(
3
)));
...
...
mace/ops/opencl/image/depthwise_conv2d.cc
浏览文件 @
10ba3061
...
...
@@ -73,6 +73,7 @@ MaceStatus DepthwiseConv2d(OpContext *context,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
...
...
@@ -126,6 +127,9 @@ MaceStatus DepthwiseConv2d(OpContext *context,
case
SIGMOID
:
built_options
.
emplace
(
"-DUSE_SIGMOID"
);
break
;
case
LEAKYRELU
:
built_options
.
emplace
(
"-DUSE_LEAKYRELU"
);
break
;
default:
LOG
(
FATAL
)
<<
"Unknown activation type: "
<<
activation
;
}
...
...
@@ -159,6 +163,7 @@ MaceStatus DepthwiseConv2d(OpContext *context,
}
kernel
->
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
relux_max_limit
);
kernel
->
setArg
(
idx
++
,
leakyrelu_coefficient
);
kernel
->
setArg
(
idx
++
,
static_cast
<
int16_t
>
(
input_height
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int16_t
>
(
input_width
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int16_t
>
(
input_channel_blocks
));
...
...
mace/ops/opencl/image/depthwise_conv2d.h
浏览文件 @
10ba3061
...
...
@@ -39,6 +39,7 @@ MaceStatus DepthwiseConv2d(OpContext *context,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
...
...
@@ -60,6 +61,7 @@ class DepthwiseConv2dKernel : public OpenCLDepthwiseConv2dKernel {
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
Tensor
*
output
)
override
;
private:
...
...
@@ -80,6 +82,7 @@ MaceStatus DepthwiseConv2dKernel<T>::Compute(
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
Tensor
*
output
)
{
index_t
kernel_h
=
filter
->
dim
(
2
);
index_t
kernel_w
=
filter
->
dim
(
3
);
...
...
@@ -118,8 +121,8 @@ MaceStatus DepthwiseConv2dKernel<T>::Compute(
return
depthwise
::
DepthwiseConv2d
(
context
,
&
kernel_
,
input
,
filter
,
bias
,
strides
[
0
],
paddings
.
data
(),
dilations
,
activation
,
relux_max_limit
,
DataTypeToEnum
<
T
>::
value
,
&
input_shape_
,
output
,
&
kwg_size_
);
dilations
,
activation
,
relux_max_limit
,
leakyrelu_coefficient
,
DataTypeToEnum
<
T
>::
value
,
&
input_shape_
,
output
,
&
kwg_size_
);
}
}
// namespace image
...
...
mace/ops/opencl/image/depthwise_deconv2d.h
浏览文件 @
10ba3061
...
...
@@ -43,6 +43,7 @@ class DepthwiseDeconv2dKernel : public OpenCLDepthwiseDeconv2dKernel {
const
int
group
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
std
::
vector
<
index_t
>
&
output_shape
,
Tensor
*
output
)
override
;
...
...
@@ -63,6 +64,7 @@ MaceStatus DepthwiseDeconv2dKernel<T>::Compute(
const
int
group
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
std
::
vector
<
index_t
>
&
output_shape
,
Tensor
*
output
)
{
const
index_t
batch
=
output_shape
[
0
];
...
...
@@ -125,6 +127,9 @@ MaceStatus DepthwiseDeconv2dKernel<T>::Compute(
case
SIGMOID
:
built_options
.
emplace
(
"-DUSE_SIGMOID"
);
break
;
case
LEAKYRELU
:
built_options
.
emplace
(
"-DUSE_LEAKYRELU"
);
break
;
default:
LOG
(
FATAL
)
<<
"Unknown activation type: "
<<
activation
;
}
...
...
@@ -152,6 +157,7 @@ MaceStatus DepthwiseDeconv2dKernel<T>::Compute(
}
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
relux_max_limit
);
kernel_
.
setArg
(
idx
++
,
leakyrelu_coefficient
);
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input
->
dim
(
1
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input
->
dim
(
2
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
height
));
...
...
mace/ops/opencl/image/fully_connected.h
浏览文件 @
10ba3061
...
...
@@ -40,6 +40,7 @@ class FullyConnectedKernel : public OpenCLFullyConnectedKernel {
const
Tensor
*
bias
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
Tensor
*
output
)
override
;
private:
...
...
@@ -57,6 +58,7 @@ MaceStatus FullyConnectedKernel<T>::Compute(
const
Tensor
*
bias
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
Tensor
*
output
)
{
std
::
vector
<
index_t
>
output_shape
=
{
input
->
dim
(
0
),
1
,
1
,
weight
->
dim
(
0
)};
std
::
vector
<
size_t
>
output_image_shape
;
...
...
@@ -98,6 +100,9 @@ MaceStatus FullyConnectedKernel<T>::Compute(
case
SIGMOID
:
built_options
.
emplace
(
"-DUSE_SIGMOID"
);
break
;
case
LEAKYRELU
:
built_options
.
emplace
(
"-DUSE_LEAKYRELU"
);
break
;
default:
LOG
(
FATAL
)
<<
"Unknown activation type: "
<<
activation
;
}
...
...
@@ -148,6 +153,7 @@ MaceStatus FullyConnectedKernel<T>::Compute(
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
RoundUpDiv4
(
input
->
dim
(
3
))));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
output_blocks
));
kernel_
.
setArg
(
idx
++
,
relux_max_limit
);
kernel_
.
setArg
(
idx
++
,
leakyrelu_coefficient
);
input_shape_
=
input
->
shape
();
}
...
...
mace/ops/opencl/image/winograd_conv2d.cc
浏览文件 @
10ba3061
...
...
@@ -115,6 +115,7 @@ MaceStatus WinogradOutputTransform(OpContext *context,
const
int
wino_blk_size
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
bool
input_changed
,
Tensor
*
output_tensor
,
uint32_t
*
kwg_size
,
...
...
@@ -164,6 +165,9 @@ MaceStatus WinogradOutputTransform(OpContext *context,
case
SIGMOID
:
built_options
.
emplace
(
"-DUSE_SIGMOID"
);
break
;
case
LEAKYRELU
:
built_options
.
emplace
(
"-DUSE_LEAKYRELU"
);
break
;
default:
LOG
(
FATAL
)
<<
"Unknown activation type: "
<<
activation
;
}
...
...
@@ -199,6 +203,7 @@ MaceStatus WinogradOutputTransform(OpContext *context,
kernel
->
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
round_h
*
round_w
));
kernel
->
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
round_w
));
kernel
->
setArg
(
idx
++
,
relux_max_limit
);
kernel
->
setArg
(
idx
++
,
leakyrelu_coefficient
);
}
const
std
::
vector
<
uint32_t
>
lws
=
{
*
kwg_size
/
8
,
8
,
0
};
std
::
string
tuning_key
=
...
...
@@ -222,6 +227,7 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context,
const
int
*
paddings
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
float
leakyrelu_coefficient
,
const
DataType
dt
,
const
int
wino_blk_size
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
...
...
@@ -338,7 +344,8 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context,
MACE_RETURN_IF_ERROR
(
WinogradOutputTransform
(
context
,
kernels
[
2
],
mm_output
.
get
(),
bias
,
dt
,
round_h
,
round_w
,
wino_blk_size
,
activation
,
relux_max_limit
,
input_changed
,
output
,
kwg_size
[
2
],
&
t_output_future
))
leakyrelu_coefficient
,
input_changed
,
output
,
kwg_size
[
2
],
&
t_output_future
))
MergeMultipleFutureWaitFn
({
t_input_future
,
mm_future
,
t_output_future
},
context
->
future
());
...
...
mace/python/tools/converter_tool/base_converter.py
浏览文件 @
10ba3061
...
...
@@ -168,6 +168,7 @@ class MaceKeyword(object):
mace_element_type_str
=
'type'
mace_activation_type_str
=
'activation'
mace_activation_max_limit_str
=
'max_limit'
mace_activation_leakyrelu_coefficient_str
=
'leakyrelu_coefficient'
mace_resize_size_str
=
'size'
mace_batch_to_space_crops_str
=
'crops'
mace_paddings_str
=
'paddings'
...
...
mace/python/tools/converter_tool/caffe_converter.py
浏览文件 @
10ba3061
...
...
@@ -493,6 +493,14 @@ class CaffeConverter(base_converter.ConverterInterface):
mace_pb2
.
DT_FLOAT
,
alpha_data
)
op
.
input
.
extend
([
alpha_tensor_name
])
negative_slope
=
caffe_op
.
layer
.
relu_param
.
negative_slope
if
caffe_op
.
type
==
'ReLU'
and
negative_slope
!=
0
:
param_arg
=
op
.
arg
.
add
()
param_arg
.
name
=
MaceKeyword
.
mace_activation_leakyrelu_coefficient_str
# noqa
param_arg
.
f
=
caffe_op
.
layer
.
relu_param
.
negative_slope
type_arg
.
s
=
six
.
b
(
ActivationType
.
LEAKYRELU
.
name
)
def
convert_folded_batchnorm
(
self
,
caffe_op
):
op
=
self
.
convert_general_op
(
caffe_op
)
op
.
type
=
MaceOp
.
BatchNorm
.
name
...
...
mace/python/tools/converter_tool/onnx_converter.py
浏览文件 @
10ba3061
...
...
@@ -286,10 +286,10 @@ class OnnxConverter(base_converter.ConverterInterface):
activation_type
=
{
OnnxOpType
.
Relu
.
name
:
ActivationType
.
RELU
,
OnnxOpType
.
LeakyRelu
.
name
:
ActivationType
.
LEAKYRELU
,
OnnxOpType
.
PRelu
.
name
:
ActivationType
.
PRELU
,
OnnxOpType
.
Tanh
.
name
:
ActivationType
.
TANH
,
OnnxOpType
.
Sigmoid
.
name
:
ActivationType
.
SIGMOID
,
OnnxOpType
.
LeakyRelu
.
name
:
ActivationType
.
LEAKYRELU
,
}
def
__init__
(
self
,
option
,
src_model_file
):
...
...
mace/python/tools/converter_tool/transformer.py
浏览文件 @
10ba3061
...
...
@@ -893,7 +893,9 @@ class Transformer(base_converter.ConverterInterface):
op
.
output
[
0
]
=
consumer_op
.
output
[
0
]
for
arg
in
consumer_op
.
arg
:
if
arg
.
name
==
MaceKeyword
.
mace_activation_type_str
\
or
arg
.
name
==
MaceKeyword
.
mace_activation_max_limit_str
:
# noqa
or
arg
.
name
==
\
MaceKeyword
.
mace_activation_max_limit_str
\
or
arg
.
name
==
MaceKeyword
.
mace_activation_leakyrelu_coefficient_str
:
# noqa
op
.
arg
.
extend
([
arg
])
self
.
replace_quantize_info
(
op
,
consumer_op
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录