Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
毕竟曾有刹那
Mace
提交
c057b7ec
Mace
项目概览
毕竟曾有刹那
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
c057b7ec
编写于
9月 28, 2018
作者:
L
liutuo
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add depthwise deconv
上级
487ad9dc
变更
22
展开全部
隐藏空白更改
内联
并排
Showing
22 changed file
with
3037 addition
and
116 deletion
+3037
-116
mace/ops/arm/common_neon.h
mace/ops/arm/common_neon.h
+70
-0
mace/ops/arm/deconv_2d_neon.h
mace/ops/arm/deconv_2d_neon.h
+1
-46
mace/ops/arm/depthwise_deconv2d_neon.h
mace/ops/arm/depthwise_deconv2d_neon.h
+79
-0
mace/ops/arm/depthwise_deconv2d_neon_3x3.cc
mace/ops/arm/depthwise_deconv2d_neon_3x3.cc
+627
-0
mace/ops/arm/depthwise_deconv2d_neon_4x4.cc
mace/ops/arm/depthwise_deconv2d_neon_4x4.cc
+805
-0
mace/ops/deconv_2d.cc
mace/ops/deconv_2d.cc
+6
-33
mace/ops/deconv_2d.h
mace/ops/deconv_2d.h
+30
-0
mace/ops/depthwise_deconv2d.cc
mace/ops/depthwise_deconv2d.cc
+547
-0
mace/ops/depthwise_deconv2d_benchmark.cc
mace/ops/depthwise_deconv2d_benchmark.cc
+130
-0
mace/ops/depthwise_deconv2d_test.cc
mace/ops/depthwise_deconv2d_test.cc
+286
-0
mace/ops/opencl/cl/deconv_2d.cl
mace/ops/opencl/cl/deconv_2d.cl
+2
-1
mace/ops/opencl/cl/depthwise_deconv2d.cl
mace/ops/opencl/cl/depthwise_deconv2d.cl
+149
-0
mace/ops/opencl/depthwise_deconv2d.h
mace/ops/opencl/depthwise_deconv2d.h
+49
-0
mace/ops/opencl/image/deconv_2d.h
mace/ops/opencl/image/deconv_2d.h
+2
-2
mace/ops/opencl/image/depthwise_deconv2d.h
mace/ops/opencl/image/depthwise_deconv2d.h
+196
-0
mace/ops/ops_registry.cc
mace/ops/ops_registry.cc
+2
-0
mace/python/tools/converter_tool/base_converter.py
mace/python/tools/converter_tool/base_converter.py
+2
-0
mace/python/tools/converter_tool/caffe_converter.py
mace/python/tools/converter_tool/caffe_converter.py
+7
-10
mace/python/tools/converter_tool/shape_inference.py
mace/python/tools/converter_tool/shape_inference.py
+5
-0
mace/python/tools/converter_tool/transformer.py
mace/python/tools/converter_tool/transformer.py
+32
-21
repository/opencl-kernel/opencl_kernel_configure.bzl
repository/opencl-kernel/opencl_kernel_configure.bzl
+1
-0
tools/validate.py
tools/validate.py
+9
-3
未找到文件。
mace/ops/arm/common_neon.h
0 → 100644
浏览文件 @
c057b7ec
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_COMMON_NEON_H_
#define MACE_OPS_ARM_COMMON_NEON_H_
#if defined(MACE_ENABLE_NEON)
#include <arm_neon.h>
#endif
namespace
mace
{
namespace
ops
{
#ifdef MACE_ENABLE_NEON
inline
float32x4_t
neon_vfma_lane_0
(
float32x4_t
a
,
float32x4_t
b
,
float32x4_t
c
)
{
#ifdef __aarch64__
return
vfmaq_laneq_f32
(
a
,
b
,
c
,
0
);
#else
return
vmlaq_lane_f32
(
a
,
b
,
vget_low_f32
(
c
),
0
);
#endif
}
inline
float32x4_t
neon_vfma_lane_1
(
float32x4_t
a
,
float32x4_t
b
,
float32x4_t
c
)
{
#ifdef __aarch64__
return
vfmaq_laneq_f32
(
a
,
b
,
c
,
1
);
#else
return
vmlaq_lane_f32
(
a
,
b
,
vget_low_f32
(
c
),
1
);
#endif
}
inline
float32x4_t
neon_vfma_lane_2
(
float32x4_t
a
,
float32x4_t
b
,
float32x4_t
c
)
{
#ifdef __aarch64__
return
vfmaq_laneq_f32
(
a
,
b
,
c
,
2
);
#else
return
vmlaq_lane_f32
(
a
,
b
,
vget_high_f32
(
c
),
0
);
#endif
}
inline
float32x4_t
neon_vfma_lane_3
(
float32x4_t
a
,
float32x4_t
b
,
float32x4_t
c
)
{
#ifdef __aarch64__
return
vfmaq_laneq_f32
(
a
,
b
,
c
,
3
);
#else
return
vmlaq_lane_f32
(
a
,
b
,
vget_high_f32
(
c
),
1
);
#endif
}
#endif
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_ARM_COMMON_NEON_H_
mace/ops/arm/deconv_2d_neon.h
浏览文件 @
c057b7ec
...
@@ -15,11 +15,8 @@
...
@@ -15,11 +15,8 @@
#ifndef MACE_OPS_ARM_DECONV_2D_NEON_H_
#ifndef MACE_OPS_ARM_DECONV_2D_NEON_H_
#define MACE_OPS_ARM_DECONV_2D_NEON_H_
#define MACE_OPS_ARM_DECONV_2D_NEON_H_
#if defined(MACE_ENABLE_NEON)
#include <arm_neon.h>
#endif
#include "mace/core/types.h"
#include "mace/core/types.h"
#include "mace/ops/arm/common_neon.h"
namespace
mace
{
namespace
mace
{
namespace
ops
{
namespace
ops
{
...
@@ -48,48 +45,6 @@ void Deconv2dNeonK4x4S2(const float *input,
...
@@ -48,48 +45,6 @@ void Deconv2dNeonK4x4S2(const float *input,
const
index_t
*
out_shape
,
const
index_t
*
out_shape
,
float
*
output
);
float
*
output
);
#ifdef MACE_ENABLE_NEON
inline
float32x4_t
neon_vfma_lane_0
(
float32x4_t
a
,
float32x4_t
b
,
float32x4_t
c
)
{
#ifdef __aarch64__
return
vfmaq_laneq_f32
(
a
,
b
,
c
,
0
);
#else
return
vmlaq_lane_f32
(
a
,
b
,
vget_low_f32
(
c
),
0
);
#endif
}
inline
float32x4_t
neon_vfma_lane_1
(
float32x4_t
a
,
float32x4_t
b
,
float32x4_t
c
)
{
#ifdef __aarch64__
return
vfmaq_laneq_f32
(
a
,
b
,
c
,
1
);
#else
return
vmlaq_lane_f32
(
a
,
b
,
vget_low_f32
(
c
),
1
);
#endif
}
inline
float32x4_t
neon_vfma_lane_2
(
float32x4_t
a
,
float32x4_t
b
,
float32x4_t
c
)
{
#ifdef __aarch64__
return
vfmaq_laneq_f32
(
a
,
b
,
c
,
2
);
#else
return
vmlaq_lane_f32
(
a
,
b
,
vget_high_f32
(
c
),
0
);
#endif
}
inline
float32x4_t
neon_vfma_lane_3
(
float32x4_t
a
,
float32x4_t
b
,
float32x4_t
c
)
{
#ifdef __aarch64__
return
vfmaq_laneq_f32
(
a
,
b
,
c
,
3
);
#else
return
vmlaq_lane_f32
(
a
,
b
,
vget_high_f32
(
c
),
1
);
#endif
}
#endif
}
// namespace ops
}
// namespace ops
}
// namespace mace
}
// namespace mace
...
...
mace/ops/arm/depthwise_deconv2d_neon.h
0 → 100644
浏览文件 @
c057b7ec
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_DEPTHWISE_DECONV2D_NEON_H_
#define MACE_OPS_ARM_DEPTHWISE_DECONV2D_NEON_H_
#include "mace/core/types.h"
#include "mace/ops/arm/common_neon.h"
namespace
mace
{
namespace
ops
{
void
DepthwiseDeconv2dNeonK3x3S1
(
const
float
*
input
,
const
float
*
filter
,
const
index_t
*
in_shape
,
const
index_t
*
out_shape
,
float
*
output
);
void
DepthwiseDeconv2dNeonK3x3S2
(
const
float
*
input
,
const
float
*
filter
,
const
index_t
*
in_shape
,
const
index_t
*
out_shape
,
float
*
output
);
void
DepthwiseDeconv2dNeonK4x4S1
(
const
float
*
input
,
const
float
*
filter
,
const
index_t
*
in_shape
,
const
index_t
*
out_shape
,
float
*
output
);
void
DepthwiseDeconv2dNeonK4x4S2
(
const
float
*
input
,
const
float
*
filter
,
const
index_t
*
in_shape
,
const
index_t
*
out_shape
,
float
*
output
);
void
GroupDeconv2dNeonK3x3S1
(
const
float
*
input
,
const
float
*
filter
,
const
int
group
,
const
index_t
*
in_shape
,
const
index_t
*
out_shape
,
float
*
output
);
void
GroupDeconv2dNeonK3x3S2
(
const
float
*
input
,
const
float
*
filter
,
const
int
group
,
const
index_t
*
in_shape
,
const
index_t
*
out_shape
,
float
*
output
);
void
GroupDeconv2dNeonK4x4S1
(
const
float
*
input
,
const
float
*
filter
,
const
int
group
,
const
index_t
*
in_shape
,
const
index_t
*
out_shape
,
float
*
output
);
void
GroupDeconv2dNeonK4x4S2
(
const
float
*
input
,
const
float
*
filter
,
const
int
group
,
const
index_t
*
in_shape
,
const
index_t
*
out_shape
,
float
*
output
);
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_ARM_DEPTHWISE_DECONV2D_NEON_H_
mace/ops/arm/depthwise_deconv2d_neon_3x3.cc
0 → 100644
浏览文件 @
c057b7ec
此差异已折叠。
点击以展开。
mace/ops/arm/depthwise_deconv2d_neon_4x4.cc
0 → 100644
浏览文件 @
c057b7ec
此差异已折叠。
点击以展开。
mace/ops/deconv_2d.cc
浏览文件 @
c057b7ec
...
@@ -359,12 +359,12 @@ class Deconv2dOp<DeviceType::CPU, float> : public Deconv2dOpBase {
...
@@ -359,12 +359,12 @@ class Deconv2dOp<DeviceType::CPU, float> : public Deconv2dOpBase {
padded_out_shape
.
data
(),
padded_out_shape
.
data
(),
out_data
);
out_data
);
if
(
!
no_pad
)
{
if
(
!
no_pad
)
{
CropPadOut
(
out_data
,
CropPadOut
<
float
>
(
out_data
,
padded_out_shape
.
data
(),
padded_out_shape
.
data
(),
output_shape
.
data
(),
output_shape
.
data
(),
pad_h
,
pad_h
,
pad_w
,
pad_w
,
output_data
);
output_data
);
}
}
if
(
bias_data
!=
nullptr
)
{
if
(
bias_data
!=
nullptr
)
{
...
@@ -445,33 +445,6 @@ class Deconv2dOp<DeviceType::CPU, float> : public Deconv2dOpBase {
...
@@ -445,33 +445,6 @@ class Deconv2dOp<DeviceType::CPU, float> : public Deconv2dOpBase {
}
}
}
}
}
}
void
CropPadOut
(
const
float
*
input
,
const
index_t
*
in_shape
,
const
index_t
*
out_shape
,
const
index_t
pad_h
,
const
index_t
pad_w
,
float
*
output
)
{
const
index_t
batch
=
in_shape
[
0
];
const
index_t
channel
=
in_shape
[
1
];
const
index_t
in_height
=
in_shape
[
2
];
const
index_t
in_width
=
in_shape
[
3
];
const
index_t
out_height
=
out_shape
[
2
];
const
index_t
out_width
=
out_shape
[
3
];
#pragma omp parallel for collapse(3)
for
(
int
i
=
0
;
i
<
batch
;
++
i
)
{
for
(
int
j
=
0
;
j
<
channel
;
++
j
)
{
for
(
int
k
=
0
;
k
<
out_height
;
++
k
)
{
const
float
*
input_base
=
input
+
((
i
*
channel
+
j
)
*
in_height
+
(
k
+
pad_h
))
*
in_width
;
float
*
output_base
=
output
+
((
i
*
channel
+
j
)
*
out_height
+
k
)
*
out_width
;
memcpy
(
output_base
,
input_base
+
pad_w
,
out_width
*
sizeof
(
float
));
}
}
}
}
};
};
#ifdef MACE_ENABLE_OPENCL
#ifdef MACE_ENABLE_OPENCL
...
...
mace/ops/deconv_2d.h
浏览文件 @
c057b7ec
...
@@ -15,6 +15,8 @@
...
@@ -15,6 +15,8 @@
#ifndef MACE_OPS_DECONV_2D_H_
#ifndef MACE_OPS_DECONV_2D_H_
#define MACE_OPS_DECONV_2D_H_
#define MACE_OPS_DECONV_2D_H_
#include "mace/core/types.h"
namespace
mace
{
namespace
mace
{
namespace
ops
{
namespace
ops
{
...
@@ -23,6 +25,34 @@ enum FrameworkType {
...
@@ -23,6 +25,34 @@ enum FrameworkType {
CAFFE
=
1
,
CAFFE
=
1
,
};
};
template
<
typename
T
>
void
CropPadOut
(
const
T
*
input
,
const
index_t
*
in_shape
,
const
index_t
*
out_shape
,
const
index_t
pad_h
,
const
index_t
pad_w
,
T
*
output
)
{
const
index_t
batch
=
in_shape
[
0
];
const
index_t
channel
=
in_shape
[
1
];
const
index_t
in_height
=
in_shape
[
2
];
const
index_t
in_width
=
in_shape
[
3
];
const
index_t
out_height
=
out_shape
[
2
];
const
index_t
out_width
=
out_shape
[
3
];
#pragma omp parallel for collapse(3)
for
(
int
i
=
0
;
i
<
batch
;
++
i
)
{
for
(
int
j
=
0
;
j
<
channel
;
++
j
)
{
for
(
int
k
=
0
;
k
<
out_height
;
++
k
)
{
const
T
*
input_base
=
input
+
((
i
*
channel
+
j
)
*
in_height
+
(
k
+
pad_h
))
*
in_width
;
T
*
output_base
=
output
+
((
i
*
channel
+
j
)
*
out_height
+
k
)
*
out_width
;
memcpy
(
output_base
,
input_base
+
pad_w
,
out_width
*
sizeof
(
T
));
}
}
}
}
}
// namespace ops
}
// namespace ops
}
// namespace mace
}
// namespace mace
...
...
mace/ops/depthwise_deconv2d.cc
0 → 100644
浏览文件 @
c057b7ec
此差异已折叠。
点击以展开。
mace/ops/depthwise_deconv2d_benchmark.cc
0 → 100644
浏览文件 @
c057b7ec
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include "mace/core/operator.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
template
<
DeviceType
D
,
typename
T
>
static
void
DepthwiseDeconv2d
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
,
int
kernel_h
,
int
kernel_w
,
int
stride
,
int
padding
)
{
mace
::
testing
::
StopTiming
();
OpsTestNet
net
;
// Add input data
if
(
D
==
DeviceType
::
CPU
)
{
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
}
else
{
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
}
net
.
AddRandomInput
<
D
,
float
>
(
"Filter"
,
{
1
,
channels
,
kernel_h
,
kernel_w
});
if
(
D
==
DeviceType
::
GPU
)
{
BufferToImage
<
D
,
T
>
(
&
net
,
"Input"
,
"InputImage"
,
ops
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
T
>
(
&
net
,
"Filter"
,
"FilterImage"
,
ops
::
BufferType
::
DW_CONV2D_FILTER
);
OpDefBuilder
(
"DepthwiseDeconv2d"
,
"DepthwiseDeconv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
.
Output
(
"Output"
)
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
})
.
AddIntsArg
(
"padding_values"
,
{
padding
,
padding
})
.
AddIntArg
(
"group"
,
channels
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
}
else
{
OpDefBuilder
(
"DepthwiseDeconv2d"
,
"DepthwiseDeconv2dTest"
)
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
.
Output
(
"Output"
)
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
})
.
AddIntsArg
(
"padding_values"
,
{
padding
,
padding
})
.
AddIntArg
(
"group"
,
channels
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
float
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
}
net
.
Setup
(
D
);
// Warm-up
for
(
int
i
=
0
;
i
<
2
;
++
i
)
{
net
.
Run
();
net
.
Sync
();
}
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
net
.
Run
();
net
.
Sync
();
}
}
// In common network, there are usually more than 1 layers, this is used to
// approximate the amortized latency. The OpenCL runtime for Mali/Adreno is
// in-order.
#define MACE_BM_DEPTHWISE_DECONV2D_MACRO( \
N, C, H, W, KH, KW, S, P, TYPE, DEVICE) \
static void \
MACE_BM_DEPTHWISE_DECONV2D_##N##_##C##_##H##_##W##_##KH##_##KW##_##S##_##P\
##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
const int64_t macc = \
static_cast<int64_t>(iters) * N * H * W * KH * KW * C; \
mace::testing::MaccProcessed(macc); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
DepthwiseDeconv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, S, P); \
} \
MACE_BENCHMARK( \
MACE_BM_DEPTHWISE_DECONV2D_##N##_##C##_##H##_##W##_##KH##_##KW##_##S##_##P\
##_##TYPE##_##DEVICE)
#define MACE_BM_DEPTHWISE_DECONV2D(N, C, H, W, KH, KW, S, P) \
MACE_BM_DEPTHWISE_DECONV2D_MACRO(N, C, H, W, KH, KW, S, P, float, CPU); \
MACE_BM_DEPTHWISE_DECONV2D_MACRO(N, C, H, W, KH, KW, S, P, float, GPU); \
MACE_BM_DEPTHWISE_DECONV2D_MACRO(N, C, H, W, KH, KW, S, P, half, GPU);
MACE_BM_DEPTHWISE_DECONV2D
(
1
,
128
,
15
,
15
,
1
,
1
,
1
,
0
);
MACE_BM_DEPTHWISE_DECONV2D
(
1
,
32
,
60
,
60
,
1
,
1
,
1
,
0
);
MACE_BM_DEPTHWISE_DECONV2D
(
1
,
32
,
60
,
60
,
3
,
3
,
1
,
0
);
MACE_BM_DEPTHWISE_DECONV2D
(
1
,
128
,
60
,
60
,
4
,
4
,
1
,
0
);
MACE_BM_DEPTHWISE_DECONV2D
(
1
,
3
,
224
,
224
,
4
,
4
,
2
,
0
);
MACE_BM_DEPTHWISE_DECONV2D
(
1
,
3
,
512
,
512
,
7
,
7
,
2
,
0
);
MACE_BM_DEPTHWISE_DECONV2D
(
1
,
128
,
16
,
16
,
5
,
5
,
1
,
0
);
MACE_BM_DEPTHWISE_DECONV2D
(
1
,
64
,
32
,
32
,
1
,
1
,
1
,
0
);
MACE_BM_DEPTHWISE_DECONV2D
(
1
,
64
,
33
,
32
,
3
,
3
,
2
,
0
);
MACE_BM_DEPTHWISE_DECONV2D
(
1
,
3
,
224
,
224
,
3
,
3
,
2
,
0
);
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/depthwise_deconv2d_test.cc
0 → 100644
浏览文件 @
c057b7ec
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <fstream>
#include <vector>
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
class
DepthwiseDeconv2dOpTest
:
public
OpsTestBase
{};
namespace
{
template
<
DeviceType
D
>
void
RunTestSimple
(
const
int
group
,
const
std
::
vector
<
index_t
>
&
input_shape
,
const
std
::
vector
<
float
>
&
input_data
,
const
std
::
vector
<
float
>
&
bias_data
,
const
int
stride
,
const
std
::
vector
<
int
>
&
paddings
,
const
std
::
vector
<
index_t
>
&
filter_shape
,
const
std
::
vector
<
float
>
&
filter_data
,
const
std
::
vector
<
index_t
>
&
expected_shape
,
const
std
::
vector
<
float
>
&
expected_data
)
{
OpsTestNet
net
;
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
input_shape
,
input_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Filter"
,
filter_shape
,
filter_data
);
net
.
TransformDataFormat
<
D
,
float
>
(
"Filter"
,
HWOI
,
"FilterOIHW"
,
OIHW
);
const
index_t
out_channels
=
expected_shape
[
3
];
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
out_channels
},
bias_data
);
if
(
D
==
DeviceType
::
GPU
)
{
BufferToImage
<
D
,
float
>
(
&
net
,
"Input"
,
"InputImage"
,
ops
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
float
>
(
&
net
,
"FilterOIHW"
,
"FilterImage"
,
ops
::
BufferType
::
DW_CONV2D_FILTER
);
BufferToImage
<
D
,
float
>
(
&
net
,
"Bias"
,
"BiasImage"
,
ops
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"DepthwiseDeconv2d"
,
"DepthwiseDeconv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
.
Input
(
"BiasImage"
)
.
Output
(
"OutputImage"
)
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
})
.
AddIntArg
(
"group"
,
group
)
.
AddIntsArg
(
"padding_values"
,
paddings
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
(
D
);
// Transfer output
ImageToBuffer
<
D
,
float
>
(
&
net
,
"OutputImage"
,
"Output"
,
ops
::
BufferType
::
IN_OUT_CHANNEL
);
}
else
{
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
NHWC
,
"InputNCHW"
,
NCHW
);
OpDefBuilder
(
"DepthwiseDeconv2d"
,
"DepthwiseDeconv2dTest"
)
.
Input
(
"InputNCHW"
)
.
Input
(
"FilterOIHW"
)
.
Input
(
"Bias"
)
.
Output
(
"OutputNCHW"
)
.
AddIntArg
(
"group"
,
group
)
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
})
.
AddIntsArg
(
"padding_values"
,
paddings
)
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"OutputNCHW"
,
NCHW
,
"Output"
,
NHWC
);
}
auto
expected
=
net
.
CreateTensor
<
float
>
(
expected_shape
,
expected_data
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.0001
);
}
template
<
DeviceType
D
>
void
TestNHWCSimple3x3_DW
()
{
RunTestSimple
<
D
>
(
3
,
{
1
,
3
,
3
,
3
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
{
0
,
0
,
0
},
1
,
{
0
,
0
},
{
3
,
3
,
1
,
3
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
},
{
1
,
5
,
5
,
3
},
{
1
,
1
,
1
,
2
,
2
,
2
,
3
,
3
,
3
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
4
,
4
,
4
,
6
,
6
,
6
,
4
,
4
,
4
,
2
,
2
,
2
,
3
,
3
,
3
,
6
,
6
,
6
,
9
,
9
,
9
,
6
,
6
,
6
,
3
,
3
,
3
,
2
,
2
,
2
,
4
,
4
,
4
,
6
,
6
,
6
,
4
,
4
,
4
,
2
,
2
,
2
,
1
,
1
,
1
,
2
,
2
,
2
,
3
,
3
,
3
,
2
,
2
,
2
,
1
,
1
,
1
});
}
template
<
DeviceType
D
>
void
TestNHWCSimple3x3_Group
()
{
RunTestSimple
<
D
>
(
2
,
{
1
,
3
,
3
,
4
},
{
1
,
2
,
3
,
4
,
1
,
2
,
3
,
4
,
1
,
2
,
3
,
4
,
1
,
2
,
3
,
4
,
1
,
2
,
3
,
4
,
1
,
2
,
3
,
4
,
1
,
2
,
3
,
4
,
1
,
2
,
3
,
4
,
1
,
2
,
3
,
4
},
{
0
,
0
,
0
,
0
,
0
,
0
},
1
,
{
0
,
0
},
{
3
,
3
,
3
,
4
},
{
1
,
1
,
1
,
1
,
2
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
2
,
1
,
1
,
1
,
1
},
{
1
,
5
,
5
,
6
},
{
3
,
6
,
3
,
7
,
14
,
7
,
6
,
12
,
6
,
14
,
28
,
14
,
9
,
18
,
9
,
21
,
42
,
21
,
6
,
12
,
6
,
14
,
28
,
14
,
3
,
6
,
3
,
7
,
14
,
7
,
6
,
12
,
6
,
14
,
28
,
14
,
12
,
24
,
12
,
28
,
56
,
28
,
18
,
36
,
18
,
42
,
84
,
42
,
12
,
24
,
12
,
28
,
56
,
28
,
6
,
12
,
6
,
14
,
28
,
14
,
9
,
18
,
9
,
21
,
42
,
21
,
18
,
36
,
18
,
42
,
84
,
42
,
27
,
54
,
27
,
63
,
126
,
63
,
18
,
36
,
18
,
42
,
84
,
42
,
9
,
18
,
9
,
21
,
42
,
21
,
6
,
12
,
6
,
14
,
28
,
14
,
12
,
24
,
12
,
28
,
56
,
28
,
18
,
36
,
18
,
42
,
84
,
42
,
12
,
24
,
12
,
28
,
56
,
28
,
6
,
12
,
6
,
14
,
28
,
14
,
3
,
6
,
3
,
7
,
14
,
7
,
6
,
12
,
6
,
14
,
28
,
14
,
9
,
18
,
9
,
21
,
42
,
21
,
6
,
12
,
6
,
14
,
28
,
14
,
3
,
6
,
3
,
7
,
14
,
7
});
}
}
// namespace
TEST_F
(
DepthwiseDeconv2dOpTest
,
CPUSimple3X3Depthwise
)
{
TestNHWCSimple3x3_DW
<
DeviceType
::
CPU
>
();
}
TEST_F
(
DepthwiseDeconv2dOpTest
,
CPUSimple3X3Group
)
{
TestNHWCSimple3x3_Group
<
DeviceType
::
CPU
>
();
}
TEST_F
(
DepthwiseDeconv2dOpTest
,
GPUSimple3X3Depthwise
)
{
TestNHWCSimple3x3_DW
<
DeviceType
::
GPU
>
();
}
namespace
{
template
<
typename
T
>
void
RandomTest
(
index_t
batch
,
index_t
channel
,
index_t
height
,
index_t
width
,
index_t
kernel
,
int
stride
,
int
padding
)
{
testing
::
internal
::
LogToStderr
();
// Construct graph
OpsTestNet
net
;
int
multiplier
=
1
;
// Add input data
std
::
vector
<
float
>
input_data
(
batch
*
height
*
width
*
channel
);
GenerateRandomRealTypeData
({
batch
,
height
,
width
,
channel
},
&
input_data
);
net
.
AddInputFromArray
<
DeviceType
::
GPU
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channel
},
input_data
);
std
::
vector
<
float
>
filter_data
(
kernel
*
kernel
*
channel
*
multiplier
);
GenerateRandomRealTypeData
({
multiplier
,
channel
,
kernel
,
kernel
},
&
filter_data
);
net
.
AddInputFromArray
<
DeviceType
::
GPU
,
float
>
(
"Filter"
,
{
multiplier
,
channel
,
kernel
,
kernel
},
filter_data
);
std
::
vector
<
float
>
bias_data
(
channel
*
multiplier
);
GenerateRandomRealTypeData
({
channel
*
multiplier
},
&
bias_data
);
net
.
AddInputFromArray
<
DeviceType
::
GPU
,
float
>
(
"Bias"
,
{
channel
*
multiplier
},
bias_data
);
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
NHWC
,
"InputNCHW"
,
NCHW
);
OpDefBuilder
(
"DepthwiseDeconv2d"
,
"DepthwiseDeconv2dTest"
)
.
Input
(
"InputNCHW"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"OutputNCHW"
)
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
})
.
AddIntsArg
(
"padding_values"
,
{
padding
,
padding
})
.
AddIntArg
(
"group"
,
channel
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
float
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
DeviceType
::
CPU
);
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"OutputNCHW"
,
NCHW
,
"Output"
,
NHWC
);
// Check
auto
expected
=
net
.
CreateTensor
<
float
>
();
expected
->
Copy
(
*
net
.
GetOutput
(
"Output"
));
BufferToImage
<
DeviceType
::
GPU
,
T
>
(
&
net
,
"Input"
,
"InputImage"
,
ops
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
DeviceType
::
GPU
,
T
>
(
&
net
,
"Filter"
,
"FilterImage"
,
ops
::
BufferType
::
DW_CONV2D_FILTER
);
BufferToImage
<
DeviceType
::
GPU
,
T
>
(
&
net
,
"Bias"
,
"BiasImage"
,
ops
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"DepthwiseDeconv2d"
,
"DepthwiseDeconv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
.
Input
(
"BiasImage"
)
.
Output
(
"OutputImage"
)
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
})
.
AddIntsArg
(
"padding_values"
,
{
padding
,
padding
})
.
AddIntArg
(
"group"
,
channel
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
(
DeviceType
::
GPU
);
// Transfer output
ImageToBuffer
<
DeviceType
::
GPU
,
float
>
(
&
net
,
"OutputImage"
,
"OPENCLOutput"
,
ops
::
BufferType
::
IN_OUT_CHANNEL
);
if
(
DataTypeToEnum
<
T
>::
value
==
DT_FLOAT
)
{
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-5
);
}
else
{
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-2
);
}
}
TEST_F
(
DepthwiseDeconv2dOpTest
,
RandomTestFloat
)
{
RandomTest
<
float
>
(
1
,
32
,
256
,
256
,
5
,
1
,
2
);
RandomTest
<
float
>
(
1
,
3
,
256
,
256
,
5
,
1
,
1
);
RandomTest
<
float
>
(
1
,
3
,
256
,
256
,
5
,
2
,
2
);
RandomTest
<
float
>
(
1
,
3
,
256
,
256
,
5
,
1
,
3
);
RandomTest
<
float
>
(
1
,
3
,
256
,
256
,
5
,
2
,
4
);
RandomTest
<
float
>
(
1
,
4
,
256
,
256
,
5
,
1
,
1
);
RandomTest
<
float
>
(
1
,
4
,
256
,
256
,
5
,
2
,
2
);
RandomTest
<
float
>
(
1
,
4
,
256
,
256
,
5
,
1
,
3
);
RandomTest
<
float
>
(
1
,
4
,
256
,
256
,
5
,
2
,
4
);
}
//
TEST_F
(
DepthwiseDeconv2dOpTest
,
RandomTestHalf
)
{
RandomTest
<
half
>
(
1
,
32
,
256
,
256
,
5
,
1
,
2
);
RandomTest
<
half
>
(
1
,
3
,
256
,
256
,
5
,
1
,
1
);
RandomTest
<
half
>
(
1
,
3
,
256
,
256
,
5
,
2
,
2
);
RandomTest
<
half
>
(
1
,
3
,
256
,
256
,
5
,
1
,
3
);
RandomTest
<
half
>
(
1
,
3
,
256
,
256
,
5
,
2
,
4
);
RandomTest
<
half
>
(
1
,
4
,
256
,
256
,
5
,
1
,
1
);
RandomTest
<
half
>
(
1
,
4
,
256
,
256
,
5
,
2
,
2
);
RandomTest
<
half
>
(
1
,
4
,
256
,
256
,
5
,
1
,
3
);
RandomTest
<
half
>
(
1
,
4
,
256
,
256
,
5
,
2
,
4
);
}
}
// namespace
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/ops/opencl/cl/deconv_2d.cl
浏览文件 @
c057b7ec
...
@@ -163,4 +163,5 @@ __kernel void deconv_2d(OUT_OF_RANGE_PARAMS
...
@@ -163,4 +163,5 @@ __kernel void deconv_2d(OUT_OF_RANGE_PARAMS
out_pos.x
+=
stride_w
;
out_pos.x
+=
stride_w
;
WRITE_IMAGET
(
output,
out_pos,
out4
)
;
WRITE_IMAGET
(
output,
out_pos,
out4
)
;
}
}
}
}
\ No newline at end of file
mace/ops/opencl/cl/depthwise_deconv2d.cl
0 → 100644
浏览文件 @
c057b7ec
#
include
<common.h>
__kernel
void
depthwise_deconv2d
(
OUT_OF_RANGE_PARAMS
GLOBAL_WORK_GROUP_SIZE_DIM3
__read_only
image2d_t
input,
__read_only
image2d_t
weights,
#
ifdef
BIAS
__read_only
image2d_t
bias,
#
endif
__write_only
image2d_t
output,
__private
const
float
relux_max_limit,
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
int
out_height,
__private
const
int
out_width,
__private
const
int
out_channel,
__private
const
int
stride_h,
__private
const
int
stride_w,
__private
const
float
stride_h_r,
__private
const
float
stride_w_r,
__private
const
int
align_h,
__private
const
int
align_w,
__private
const
int
padding_h,
__private
const
int
padding_w,
__private
const
int
kernel_h,
__private
const
int
kernel_w,
__private
const
int
kernel_size,
__private
const
int
out_channel_blocks
)
{
const
int
c
=
get_global_id
(
0
)
;
const
int
w_id
=
get_global_id
(
1
)
;
const
int
hb
=
get_global_id
(
2
)
;
#
ifndef
NON_UNIFORM_WORK_GROUP
if
(
c
>=
global_size_dim0
|
| w_id >= global_size_dim1
|| hb >= global_size_dim2) {
return;
}
#endif
#ifdef BIAS
DATA_TYPE4 out0 =
READ_IMAGET(bias, SAMPLER, (int2)(c, 0));
DATA_TYPE4 out1 = out0;
DATA_TYPE4 out2 = out0;
DATA_TYPE4 out3 = out0;
DATA_TYPE4 out4 = out0;
#else
DATA_TYPE4 out0 = 0;
DATA_TYPE4 out1 = 0;
DATA_TYPE4 out2 = 0;
DATA_TYPE4 out3 = 0;
DATA_TYPE4 out4 = 0;
#endif
const int n_stride = mad(w_id, stride_w_r, 0);
const int mod_stride = w_id - mul24(n_stride, stride_w);
const int w = mad24(mul24(n_stride, 5), stride_w, mod_stride);
const int b = hb / out_height;
const int h = hb - mul24(b, out_height);
if (w < out_width) {
int start_x = floor((float) (w + align_w) * stride_w_r);
int start_y = (h + align_h) * stride_h_r;
start_y = max(0, start_y);
int f_start_x = mad24(start_x, stride_w, padding_w) - w;
int f_start_y = mad24(start_y, stride_h, padding_h) - h;
f_start_x = kernel_w - 1 - f_start_x;
f_start_y = kernel_h - 1 - f_start_y;
int2 in_pos;
int f_pos;
DATA_TYPE4 in0, in1, in2, in3, in4;
DATA_TYPE4 weight;
int idx_w0, idx_w1, idx_w2, idx_w3, idx_w4;
int index_x, index_y;
for (int f_y = f_start_y, idx_h = start_y ; f_y >= 0; f_y -= stride_h, ++idx_h) {
index_y = mad24(b, in_height, idx_h);
in_pos.y = select(index_y, -1, idx_h < 0 || idx_h >= in_height);
for (int f_x = f_start_x, idx_w = start_x; f_x >= 0; f_x -= stride_w, ++idx_w) {
idx_w0 = idx_w;
idx_w1 = idx_w + 1;
idx_w2 = idx_w + 2;
idx_w3 = idx_w + 3;
idx_w4 = idx_w + 4;
#define READ_INPUT(i) \
index_x = mad24(c, in_width, idx_w##i); \
in_pos.x = \
select(index_x, -1, idx_w##i < 0 || idx_w##i >= in_width); \
in##i = READ_IMAGET(input, SAMPLER, in_pos);
READ_INPUT(0);
READ_INPUT(1);
READ_INPUT(2);
READ_INPUT(3);
READ_INPUT(4);
#undef READ_INPUT
f_pos = mad24(f_y, kernel_w, f_x);
weight = READ_IMAGET(weights, SAMPLER, (int2)(f_pos, c));
out0 = mad(in0, weight, out0);
out1 = mad(in1, weight, out1);
out2 = mad(in2, weight, out2);
out3 = mad(in3, weight, out3);
out4 = mad(in4, weight, out4);
}
}
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) |
|
defined
(
USE_SIGMOID
)
out0
=
do_activation
(
out0,
relux_max_limit
)
;
out1
=
do_activation
(
out1,
relux_max_limit
)
;
out2
=
do_activation
(
out2,
relux_max_limit
)
;
out3
=
do_activation
(
out3,
relux_max_limit
)
;
out4
=
do_activation
(
out4,
relux_max_limit
)
;
#
endif
int2
out_pos
;
out_pos.y
=
hb
;
int
ow
=
w
;
if
(
ow
>=
out_width
)
return
;
out_pos.x
=
mad24
(
c,
out_width,
ow
)
;
WRITE_IMAGET
(
output,
out_pos,
out0
)
;
ow
+=
stride_w
;
if
(
ow
>=
out_width
)
return
;
out_pos.x
+=
stride_w
;
WRITE_IMAGET
(
output,
out_pos,
out1
)
;
ow
+=
stride_w
;
if
(
ow
>=
out_width
)
return
;
out_pos.x
+=
stride_w
;
WRITE_IMAGET
(
output,
out_pos,
out2
)
;
ow
+=
stride_w
;
if
(
ow
>=
out_width
)
return
;
out_pos.x
+=
stride_w
;
WRITE_IMAGET
(
output,
out_pos,
out3
)
;
ow
+=
stride_w
;
if
(
ow
>=
out_width
)
return
;
out_pos.x
+=
stride_w
;
WRITE_IMAGET
(
output,
out_pos,
out4
)
;
}
}
\ No newline at end of file
mace/ops/opencl/depthwise_deconv2d.h
0 → 100644
浏览文件 @
c057b7ec
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_OPENCL_DEPTHWISE_DECONV2D_H_
#define MACE_OPS_OPENCL_DEPTHWISE_DECONV2D_H_
#include <vector>
#include "mace/ops/activation.h"
namespace
mace
{
class
OpContext
;
class
Tensor
;
namespace
ops
{
class
OpenCLDepthwiseDeconv2dKernel
{
public:
virtual
MaceStatus
Compute
(
OpContext
*
context
,
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
*
strides
,
const
int
*
padding_data
,
const
int
group
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
std
::
vector
<
index_t
>
&
output_shape
,
Tensor
*
output
)
=
0
;
MACE_EMPTY_VIRTUAL_DESTRUCTOR
(
OpenCLDepthwiseDeconv2dKernel
);
};
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_OPENCL_DEPTHWISE_DECONV2D_H_
mace/ops/opencl/image/deconv_2d.h
浏览文件 @
c057b7ec
...
@@ -79,10 +79,10 @@ MaceStatus Deconv2dKernel<T>::Compute(
...
@@ -79,10 +79,10 @@ MaceStatus Deconv2dKernel<T>::Compute(
const
int
stride_h
=
strides
[
0
];
const
int
stride_h
=
strides
[
0
];
const
int
stride_w
=
strides
[
1
];
const
int
stride_w
=
strides
[
1
];
MACE_CHECK
(
stride_w
>
0
&&
stride_h
>
0
,
"strides should be > 0."
);
MACE_CHECK
(
stride_w
>
0
&&
stride_h
>
0
,
"strides should be > 0."
);
#define MACE_WIDTH_BLK 5
const
int
width_tile
=
5
;
const
index_t
n_strides
=
(
width
+
stride_w
-
1
)
/
stride_w
;
const
index_t
n_strides
=
(
width
+
stride_w
-
1
)
/
stride_w
;
const
index_t
width_blocks
=
const
index_t
width_blocks
=
((
n_strides
+
MACE_WIDTH_BLK
-
1
)
/
MACE_WIDTH_BLK
)
*
stride_w
;
((
n_strides
+
width_tile
-
1
)
/
width_tile
)
*
stride_w
;
const
float
stride_h_r
=
1.
f
/
static_cast
<
float
>
(
stride_h
);
const
float
stride_h_r
=
1.
f
/
static_cast
<
float
>
(
stride_h
);
const
float
stride_w_r
=
1.
f
/
static_cast
<
float
>
(
stride_w
);
const
float
stride_w_r
=
1.
f
/
static_cast
<
float
>
(
stride_w
);
const
int
padding_h
=
(
padding_data
[
0
]
+
1
)
>>
1
;
const
int
padding_h
=
(
padding_data
[
0
]
+
1
)
>>
1
;
...
...
mace/ops/opencl/image/depthwise_deconv2d.h
0 → 100644
浏览文件 @
c057b7ec
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_OPENCL_IMAGE_DEPTHWISE_DECONV2D_H_
#define MACE_OPS_OPENCL_IMAGE_DEPTHWISE_DECONV2D_H_
#include "mace/ops/opencl/depthwise_deconv2d.h"
#include <memory>
#include <set>
#include <string>
#include <vector>
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/ops/opencl/helper.h"
namespace
mace
{
namespace
ops
{
namespace
opencl
{
namespace
image
{
template
<
typename
T
>
class
DepthwiseDeconv2dKernel
:
public
OpenCLDepthwiseDeconv2dKernel
{
public:
MaceStatus
Compute
(
OpContext
*
context
,
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
*
strides
,
const
int
*
padding_data
,
const
int
group
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
std
::
vector
<
index_t
>
&
output_shape
,
Tensor
*
output
)
override
;
private:
cl
::
Kernel
kernel_
;
uint32_t
kwg_size_
;
std
::
vector
<
index_t
>
input_shape_
;
};
template
<
typename
T
>
MaceStatus
DepthwiseDeconv2dKernel
<
T
>::
Compute
(
OpContext
*
context
,
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
*
strides
,
const
int
*
padding_data
,
const
int
group
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
std
::
vector
<
index_t
>
&
output_shape
,
Tensor
*
output
)
{
const
index_t
batch
=
output_shape
[
0
];
const
index_t
height
=
output_shape
[
1
];
const
index_t
width
=
output_shape
[
2
];
const
index_t
channels
=
output_shape
[
3
];
const
index_t
input_channels
=
input
->
dim
(
3
);
const
index_t
multiplier
=
filter
->
dim
(
0
);
MACE_CHECK
(
group
==
channels
&&
group
==
input_channels
&&
multiplier
==
1
,
"opencl image deconv only supports depthwise type group."
);
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
&
output_image_shape
);
MACE_RETURN_IF_ERROR
(
output
->
ResizeImage
(
output_shape
,
output_image_shape
));
const
DataType
dt
=
DataTypeToEnum
<
T
>::
value
;
const
index_t
channel_blocks
=
RoundUpDiv4
(
channels
);
const
int
stride_h
=
strides
[
0
];
const
int
stride_w
=
strides
[
1
];
MACE_CHECK
(
stride_w
>
0
&&
stride_h
>
0
,
"strides should be > 0."
);
const
int
width_tile
=
5
;
const
index_t
n_strides
=
(
width
+
stride_w
-
1
)
/
stride_w
;
const
index_t
width_blocks
=
((
n_strides
+
width_tile
-
1
)
/
width_tile
)
*
stride_w
;
const
float
stride_h_r
=
1.
f
/
static_cast
<
float
>
(
stride_h
);
const
float
stride_w_r
=
1.
f
/
static_cast
<
float
>
(
stride_w
);
const
int
padding_h
=
(
padding_data
[
0
]
+
1
)
>>
1
;
const
int
padding_w
=
(
padding_data
[
1
]
+
1
)
>>
1
;
const
int
align_h
=
stride_h
-
1
-
padding_h
;
const
int
align_w
=
stride_w
-
1
-
padding_w
;
const
int
kernel_size
=
filter
->
dim
(
2
)
*
filter
->
dim
(
3
);
auto
runtime
=
context
->
device
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
if
(
kernel_
.
get
()
==
nullptr
)
{
std
::
set
<
std
::
string
>
built_options
;
MACE_OUT_OF_RANGE_CONFIG
;
MACE_NON_UNIFORM_WG_CONFIG
;
std
::
string
kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
"depthwise_deconv2d"
);
built_options
.
emplace
(
"-Ddepthwise_deconv2d="
+
kernel_name
);
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpCompatibleCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpCompatibleCLCMDDt
(
dt
));
built_options
.
emplace
(
bias
!=
nullptr
?
"-DBIAS"
:
""
);
switch
(
activation
)
{
case
NOOP
:
break
;
case
RELU
:
built_options
.
emplace
(
"-DUSE_RELU"
);
break
;
case
RELUX
:
built_options
.
emplace
(
"-DUSE_RELUX"
);
break
;
case
TANH
:
built_options
.
emplace
(
"-DUSE_TANH"
);
break
;
case
SIGMOID
:
built_options
.
emplace
(
"-DUSE_SIGMOID"
);
break
;
default:
LOG
(
FATAL
)
<<
"Unknown activation type: "
<<
activation
;
}
MACE_RETURN_IF_ERROR
(
runtime
->
BuildKernel
(
"depthwise_deconv2d"
,
kernel_name
,
built_options
,
&
kernel_
));
kwg_size_
=
static_cast
<
uint32_t
>
(
runtime
->
GetKernelMaxWorkGroupSize
(
kernel_
));
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
static_cast
<
uint32_t
>
(
width_blocks
),
static_cast
<
uint32_t
>
(
height
*
batch
)};
MACE_OUT_OF_RANGE_INIT
(
kernel_
);
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
MACE_OUT_OF_RANGE_SET_ARGS
(
kernel_
);
MACE_SET_3D_GWS_ARGS
(
kernel_
,
gws
);
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
filter
->
opencl_image
()));
if
(
bias
!=
nullptr
)
{
kernel_
.
setArg
(
idx
++
,
*
(
bias
->
opencl_image
()));
}
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
relux_max_limit
);
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input
->
dim
(
1
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input
->
dim
(
2
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
height
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
width
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
channels
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
stride_h
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
stride_w
));
kernel_
.
setArg
(
idx
++
,
stride_h_r
);
kernel_
.
setArg
(
idx
++
,
stride_w_r
);
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
align_h
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
align_w
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
padding_h
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
padding_w
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
filter
->
dim
(
2
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
filter
->
dim
(
3
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
kernel_size
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
channel_blocks
));
input_shape_
=
input
->
shape
();
}
const
std
::
vector
<
uint32_t
>
lws
=
Default3DLocalWS
(
runtime
,
gws
,
kwg_size_
);
std
::
string
tuning_key
=
Concat
(
"depthwise_deconv2d_kernel_"
,
activation
,
output
->
dim
(
0
),
output
->
dim
(
1
),
output
->
dim
(
2
),
output
->
dim
(
3
));
MACE_RETURN_IF_ERROR
(
TuningOrRun3DKernel
(
runtime
,
kernel_
,
tuning_key
,
gws
,
lws
,
context
->
future
()));
MACE_OUT_OF_RANGE_VALIDATION
;
return
MaceStatus
::
MACE_SUCCESS
;
}
}
// namespace image
}
// namespace opencl
}
// namespace ops
}
// namespace mace
#endif // MACE_OPS_OPENCL_IMAGE_DEPTHWISE_DECONV2D_H_
mace/ops/ops_registry.cc
浏览文件 @
c057b7ec
...
@@ -32,6 +32,7 @@ extern void RegisterCrop(OpRegistryBase *op_registry);
...
@@ -32,6 +32,7 @@ extern void RegisterCrop(OpRegistryBase *op_registry);
extern
void
RegisterDeconv2D
(
OpRegistryBase
*
op_registry
);
extern
void
RegisterDeconv2D
(
OpRegistryBase
*
op_registry
);
extern
void
RegisterDepthToSpace
(
OpRegistryBase
*
op_registry
);
extern
void
RegisterDepthToSpace
(
OpRegistryBase
*
op_registry
);
extern
void
RegisterDepthwiseConv2d
(
OpRegistryBase
*
op_registry
);
extern
void
RegisterDepthwiseConv2d
(
OpRegistryBase
*
op_registry
);
extern
void
RegisterDepthwiseDeconv2d
(
OpRegistryBase
*
op_registry
);
extern
void
RegisterDequantize
(
OpRegistryBase
*
op_registry
);
extern
void
RegisterDequantize
(
OpRegistryBase
*
op_registry
);
extern
void
RegisterEltwise
(
OpRegistryBase
*
op_registry
);
extern
void
RegisterEltwise
(
OpRegistryBase
*
op_registry
);
extern
void
RegisterExpandDims
(
OpRegistryBase
*
op_registry
);
extern
void
RegisterExpandDims
(
OpRegistryBase
*
op_registry
);
...
@@ -89,6 +90,7 @@ OpRegistry::OpRegistry() : OpRegistryBase() {
...
@@ -89,6 +90,7 @@ OpRegistry::OpRegistry() : OpRegistryBase() {
ops
::
RegisterDeconv2D
(
this
);
ops
::
RegisterDeconv2D
(
this
);
ops
::
RegisterDepthToSpace
(
this
);
ops
::
RegisterDepthToSpace
(
this
);
ops
::
RegisterDepthwiseConv2d
(
this
);
ops
::
RegisterDepthwiseConv2d
(
this
);
ops
::
RegisterDepthwiseDeconv2d
(
this
);
ops
::
RegisterDequantize
(
this
);
ops
::
RegisterDequantize
(
this
);
ops
::
RegisterEltwise
(
this
);
ops
::
RegisterEltwise
(
this
);
ops
::
RegisterExpandDims
(
this
);
ops
::
RegisterExpandDims
(
this
);
...
...
mace/python/tools/converter_tool/base_converter.py
浏览文件 @
c057b7ec
...
@@ -91,6 +91,7 @@ MaceSupportedOps = [
...
@@ -91,6 +91,7 @@ MaceSupportedOps = [
'Deconv2D'
,
'Deconv2D'
,
'DepthToSpace'
,
'DepthToSpace'
,
'DepthwiseConv2d'
,
'DepthwiseConv2d'
,
'DepthwiseDeconv2d'
,
'Dequantize'
,
'Dequantize'
,
'Eltwise'
,
'Eltwise'
,
'ExpandDims'
,
'ExpandDims'
,
...
@@ -183,6 +184,7 @@ class MaceKeyword(object):
...
@@ -183,6 +184,7 @@ class MaceKeyword(object):
mace_scalar_input_index_str
=
'scalar_input_index'
mace_scalar_input_index_str
=
'scalar_input_index'
mace_opencl_mem_type
=
"opencl_mem_type"
mace_opencl_mem_type
=
"opencl_mem_type"
mace_framework_type_str
=
"framework_type"
mace_framework_type_str
=
"framework_type"
mace_group_str
=
"group"
class
TransformerRule
(
Enum
):
class
TransformerRule
(
Enum
):
...
...
mace/python/tools/converter_tool/caffe_converter.py
浏览文件 @
c057b7ec
...
@@ -411,17 +411,14 @@ class CaffeConverter(base_converter.ConverterInterface):
...
@@ -411,17 +411,14 @@ class CaffeConverter(base_converter.ConverterInterface):
def
convert_deconv2d
(
self
,
caffe_op
):
def
convert_deconv2d
(
self
,
caffe_op
):
op
=
self
.
convert_general_op
(
caffe_op
)
op
=
self
.
convert_general_op
(
caffe_op
)
param
=
caffe_op
.
layer
.
convolution_param
param
=
caffe_op
.
layer
.
convolution_param
is_depthwise
=
False
if
param
.
HasField
(
caffe_group_str
)
and
param
.
group
>
1
:
filter_data
=
caffe_op
.
blobs
[
0
]
mace_check
(
param
.
group
==
filter_data
.
shape
[
0
]
and
filter_data
.
shape
[
1
]
==
1
,
"Mace does not support group deconvolution yet"
)
is_depthwise
=
True
mace_check
(
is_depthwise
is
False
,
"Mace do not support depthwise deconvolution yet"
)
op
.
type
=
MaceOp
.
Deconv2D
.
name
if
param
.
HasField
(
caffe_group_str
)
and
param
.
group
>
1
:
group_arg
=
op
.
arg
.
add
()
group_arg
.
name
=
MaceKeyword
.
mace_group_str
group_arg
.
i
=
param
.
group
op
.
type
=
MaceOp
.
DepthwiseDeconv2d
.
name
else
:
op
.
type
=
MaceOp
.
Deconv2D
.
name
self
.
add_stride_pad_kernel_arg
(
param
,
op
)
self
.
add_stride_pad_kernel_arg
(
param
,
op
)
# dilation is specific for convolution in caffe
# dilation is specific for convolution in caffe
...
...
mace/python/tools/converter_tool/shape_inference.py
浏览文件 @
c057b7ec
...
@@ -36,6 +36,7 @@ class ShapeInference(object):
...
@@ -36,6 +36,7 @@ class ShapeInference(object):
MaceOp
.
Conv2D
.
name
:
self
.
infer_shape_conv_pool_shape
,
MaceOp
.
Conv2D
.
name
:
self
.
infer_shape_conv_pool_shape
,
MaceOp
.
Deconv2D
.
name
:
self
.
infer_shape_deconv
,
MaceOp
.
Deconv2D
.
name
:
self
.
infer_shape_deconv
,
MaceOp
.
DepthwiseConv2d
.
name
:
self
.
infer_shape_conv_pool_shape
,
MaceOp
.
DepthwiseConv2d
.
name
:
self
.
infer_shape_conv_pool_shape
,
MaceOp
.
DepthwiseDeconv2d
.
name
:
self
.
infer_shape_deconv
,
MaceOp
.
Eltwise
.
name
:
self
.
infer_shape_general
,
MaceOp
.
Eltwise
.
name
:
self
.
infer_shape_general
,
MaceOp
.
BatchNorm
.
name
:
self
.
infer_shape_general
,
MaceOp
.
BatchNorm
.
name
:
self
.
infer_shape_general
,
MaceOp
.
AddN
.
name
:
self
.
infer_shape_general
,
MaceOp
.
AddN
.
name
:
self
.
infer_shape_general
,
...
@@ -159,11 +160,15 @@ class ShapeInference(object):
...
@@ -159,11 +160,15 @@ class ShapeInference(object):
dilations
=
[
1
,
1
]
dilations
=
[
1
,
1
]
round_func
=
math
.
floor
round_func
=
math
.
floor
group_arg
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_group_str
)
output_shape
[
0
]
=
input_shape
[
0
]
output_shape
[
0
]
=
input_shape
[
0
]
if
ConverterUtil
.
data_format
(
op
)
==
DataFormat
.
NCHW
\
if
ConverterUtil
.
data_format
(
op
)
==
DataFormat
.
NCHW
\
and
ConverterUtil
.
filter_format
(
self
.
_net
)
==
FilterFormat
.
OIHW
:
# noqa
and
ConverterUtil
.
filter_format
(
self
.
_net
)
==
FilterFormat
.
OIHW
:
# noqa
# filter format: IOHW
# filter format: IOHW
output_shape
[
1
]
=
filter_shape
[
1
]
output_shape
[
1
]
=
filter_shape
[
1
]
if
group_arg
is
not
None
and
group_arg
.
i
>
1
:
output_shape
[
1
]
=
group_arg
.
i
*
filter_shape
[
1
]
output_shape
[
2
]
=
int
(
output_shape
[
2
]
=
int
(
round_func
((
input_shape
[
2
]
-
1
)
*
strides
[
0
]
+
round_func
((
input_shape
[
2
]
-
1
)
*
strides
[
0
]
+
(
filter_shape
[
2
]
-
1
)
*
(
dilations
[
0
]
-
1
)
+
(
filter_shape
[
2
]
-
1
)
*
(
dilations
[
0
]
-
1
)
+
...
...
mace/python/tools/converter_tool/transformer.py
浏览文件 @
c057b7ec
...
@@ -583,7 +583,7 @@ class Transformer(base_converter.ConverterInterface):
...
@@ -583,7 +583,7 @@ class Transformer(base_converter.ConverterInterface):
def
fold_deconv_and_bn
(
self
):
def
fold_deconv_and_bn
(
self
):
net
=
self
.
_model
net
=
self
.
_model
for
op
in
net
.
op
:
for
op
in
net
.
op
:
if
(
op
.
type
==
MaceOp
.
Deconv2D
.
name
)
\
if
(
op
.
type
in
[
MaceOp
.
Deconv2D
.
name
,
MaceOp
.
DepthwiseDeconv2d
]
)
\
and
self
.
consumer_count
(
op
.
output
[
0
])
==
1
:
and
self
.
consumer_count
(
op
.
output
[
0
])
==
1
:
consumer_op
=
self
.
_consumers
[
op
.
output
[
0
]][
0
]
consumer_op
=
self
.
_consumers
[
op
.
output
[
0
]][
0
]
if
consumer_op
.
type
==
MaceOp
.
BatchNorm
.
name
:
if
consumer_op
.
type
==
MaceOp
.
BatchNorm
.
name
:
...
@@ -1365,7 +1365,8 @@ class Transformer(base_converter.ConverterInterface):
...
@@ -1365,7 +1365,8 @@ class Transformer(base_converter.ConverterInterface):
self
.
set_filter_format
(
FilterFormat
.
OIHW
)
self
.
set_filter_format
(
FilterFormat
.
OIHW
)
# deconv's filter's output channel and input channel is reversed
# deconv's filter's output channel and input channel is reversed
for
op
in
net
.
op
:
for
op
in
net
.
op
:
if
op
.
type
==
MaceOp
.
Deconv2D
.
name
\
if
op
.
type
in
[
MaceOp
.
Deconv2D
.
name
,
MaceOp
.
DepthwiseDeconv2d
]
\
and
op
.
input
[
1
]
not
in
transposed_deconv_filter
:
and
op
.
input
[
1
]
not
in
transposed_deconv_filter
:
filter
=
self
.
_consts
[
op
.
input
[
1
]]
filter
=
self
.
_consts
[
op
.
input
[
1
]]
filter_data
=
np
.
array
(
filter
.
float_data
).
reshape
(
filter_data
=
np
.
array
(
filter
.
float_data
).
reshape
(
...
@@ -1427,11 +1428,17 @@ class Transformer(base_converter.ConverterInterface):
...
@@ -1427,11 +1428,17 @@ class Transformer(base_converter.ConverterInterface):
self
.
buffer_transform
(
op
,
1
,
OpenCLBufferType
.
CONV2D_FILTER
)
self
.
buffer_transform
(
op
,
1
,
OpenCLBufferType
.
CONV2D_FILTER
)
if
len
(
op
.
input
)
>=
3
:
if
len
(
op
.
input
)
>=
3
:
self
.
buffer_transform
(
op
,
2
,
OpenCLBufferType
.
ARGUMENT
)
self
.
buffer_transform
(
op
,
2
,
OpenCLBufferType
.
ARGUMENT
)
elif
op
.
type
==
MaceOp
.
Deconv2D
.
name
:
elif
op
.
type
==
MaceOp
.
Deconv2D
.
name
\
self
.
buffer_transform
(
op
,
1
,
OpenCLBufferType
.
CONV2D_FILTER
)
or
op
.
type
==
MaceOp
.
DepthwiseDeconv2d
.
name
:
if
op
.
type
==
MaceOp
.
Deconv2D
.
name
:
self
.
buffer_transform
(
op
,
1
,
OpenCLBufferType
.
CONV2D_FILTER
)
elif
op
.
type
==
MaceOp
.
DepthwiseDeconv2d
.
name
:
self
.
buffer_transform
(
op
,
1
,
OpenCLBufferType
.
DW_CONV2D_FILTER
)
if
ConverterUtil
.
get_arg
(
if
ConverterUtil
.
get_arg
(
op
,
op
,
MaceKeyword
.
mace_framework_type_str
).
i
==
\
MaceKeyword
.
mace_framework_type_str
).
i
==
\
FrameworkType
.
CAFFE
.
value
:
FrameworkType
.
CAFFE
.
value
:
if
len
(
op
.
input
)
>=
3
:
if
len
(
op
.
input
)
>=
3
:
self
.
buffer_transform
(
op
,
2
,
OpenCLBufferType
.
ARGUMENT
)
self
.
buffer_transform
(
op
,
2
,
OpenCLBufferType
.
ARGUMENT
)
...
@@ -1456,8 +1463,10 @@ class Transformer(base_converter.ConverterInterface):
...
@@ -1456,8 +1463,10 @@ class Transformer(base_converter.ConverterInterface):
if
len
(
op
.
input
)
>=
4
:
if
len
(
op
.
input
)
>=
4
:
self
.
buffer_transform
(
op
,
3
,
OpenCLBufferType
.
ARGUMENT
)
self
.
buffer_transform
(
op
,
3
,
OpenCLBufferType
.
ARGUMENT
)
elif
op
.
type
==
MaceOp
.
MatMul
.
name
and
\
elif
op
.
type
==
MaceOp
.
MatMul
.
name
and
\
ConverterUtil
.
get_arg
(
op
,
ConverterUtil
.
get_arg
(
MaceKeyword
.
mace_winograd_filter_transformed
)
is
not
None
:
# noqa
op
,
MaceKeyword
.
mace_winograd_filter_transformed
)
is
not
None
:
# noqa
self
.
buffer_transform
(
op
,
0
,
OpenCLBufferType
.
WINOGRAD_FILTER
)
self
.
buffer_transform
(
op
,
0
,
OpenCLBufferType
.
WINOGRAD_FILTER
)
elif
op
.
type
==
MaceOp
.
WinogradInverseTransform
.
name
\
elif
op
.
type
==
MaceOp
.
WinogradInverseTransform
.
name
\
and
len
(
op
.
input
)
>=
3
:
and
len
(
op
.
input
)
>=
3
:
...
@@ -1467,8 +1476,10 @@ class Transformer(base_converter.ConverterInterface):
...
@@ -1467,8 +1476,10 @@ class Transformer(base_converter.ConverterInterface):
if
len
(
op
.
input
)
>=
3
:
if
len
(
op
.
input
)
>=
3
:
self
.
buffer_transform
(
op
,
2
,
OpenCLBufferType
.
ARGUMENT
)
self
.
buffer_transform
(
op
,
2
,
OpenCLBufferType
.
ARGUMENT
)
elif
op
.
type
==
MaceOp
.
Activation
.
name
:
elif
op
.
type
==
MaceOp
.
Activation
.
name
:
if
ConverterUtil
.
get_arg
(
op
,
if
ConverterUtil
.
get_arg
(
MaceKeyword
.
mace_activation_type_str
).
s
==
ActivationType
.
PRELU
.
name
:
# noqa
op
,
MaceKeyword
.
mace_activation_type_str
).
s
==
ActivationType
.
PRELU
.
name
:
# noqa
self
.
buffer_transform
(
op
,
1
,
OpenCLBufferType
.
ARGUMENT
)
self
.
buffer_transform
(
op
,
1
,
OpenCLBufferType
.
ARGUMENT
)
elif
op
.
type
==
MaceOp
.
LSTMCell
.
name
:
elif
op
.
type
==
MaceOp
.
LSTMCell
.
name
:
if
op
.
input
[
1
]
in
self
.
_consts
:
if
op
.
input
[
1
]
in
self
.
_consts
:
...
@@ -1793,24 +1804,24 @@ class Transformer(base_converter.ConverterInterface):
...
@@ -1793,24 +1804,24 @@ class Transformer(base_converter.ConverterInterface):
check_conv
=
False
check_conv
=
False
check_deconv
=
False
check_deconv
=
False
if
ops
is
not
None
and
len
(
ops
)
==
1
:
if
ops
is
not
None
and
len
(
ops
)
==
1
:
check_conv
=
\
if
len
(
ops
[
0
].
input
)
>=
3
:
ops
[
0
].
type
in
[
MaceOp
.
Conv2D
.
name
,
check_conv
=
\
MaceOp
.
DepthwiseConv2d
.
name
,
ops
[
0
].
type
in
[
MaceOp
.
Conv2D
.
name
,
MaceOp
.
FullyConnected
.
name
]
\
MaceOp
.
DepthwiseConv2d
.
name
,
and
len
(
ops
[
0
].
input
)
>=
3
\
MaceOp
.
FullyConnected
.
name
]
\
and
ops
[
0
].
input
[
2
]
==
tensor
.
name
and
ops
[
0
].
input
[
2
]
==
tensor
.
name
# in tensorflow deconv's bias is the forth input
# in tensorflow deconv's bias is the forth input
if
ops
[
0
].
type
==
MaceOp
.
Deconv2D
.
name
:
if
ops
[
0
].
type
in
[
MaceOp
.
Deconv2D
.
name
,
MaceOp
.
DepthwiseDeconv2d
]:
from_caffe
=
ConverterUtil
.
get_arg
(
from_caffe
=
ConverterUtil
.
get_arg
(
ops
[
0
],
ops
[
0
],
MaceKeyword
.
mace_framework_type_str
).
i
==
\
MaceKeyword
.
mace_framework_type_str
).
i
==
\
FrameworkType
.
CAFFE
.
value
FrameworkType
.
CAFFE
.
value
if
from_caffe
:
if
from_caffe
and
len
(
ops
[
0
].
input
)
>=
3
:
check_deconv
=
len
(
ops
[
0
].
input
)
>=
3
\
check_deconv
=
ops
[
0
].
input
[
2
]
==
tensor
.
name
and
ops
[
0
].
input
[
2
]
==
tensor
.
name
else
:
else
:
check_deconv
=
len
(
ops
[
0
].
input
)
>=
4
\
if
len
(
ops
[
0
].
input
)
>=
4
:
and
ops
[
0
].
input
[
3
]
==
tensor
.
name
check_deconv
=
ops
[
0
].
input
[
3
]
==
tensor
.
name
if
check_conv
or
check_deconv
:
if
check_conv
or
check_deconv
:
if
self
.
_option
.
device
==
DeviceType
.
CPU
.
value
:
if
self
.
_option
.
device
==
DeviceType
.
CPU
.
value
:
conv_op
=
ops
[
0
]
conv_op
=
ops
[
0
]
...
...
repository/opencl-kernel/opencl_kernel_configure.bzl
浏览文件 @
c057b7ec
...
@@ -37,6 +37,7 @@ def _opencl_encrypt_kernel_impl(repository_ctx):
...
@@ -37,6 +37,7 @@ def _opencl_encrypt_kernel_impl(repository_ctx):
unused_var
=
repository_ctx
.
path
(
Label
(
"//:mace/ops/opencl/cl/conv_2d_buffer.cl"
))
unused_var
=
repository_ctx
.
path
(
Label
(
"//:mace/ops/opencl/cl/conv_2d_buffer.cl"
))
unused_var
=
repository_ctx
.
path
(
Label
(
"//:mace/ops/opencl/cl/crop.cl"
))
unused_var
=
repository_ctx
.
path
(
Label
(
"//:mace/ops/opencl/cl/crop.cl"
))
unused_var
=
repository_ctx
.
path
(
Label
(
"//:mace/ops/opencl/cl/deconv_2d.cl"
))
unused_var
=
repository_ctx
.
path
(
Label
(
"//:mace/ops/opencl/cl/deconv_2d.cl"
))
unused_var
=
repository_ctx
.
path
(
Label
(
"//:mace/ops/opencl/cl/depthwise_deconv2d.cl"
))
unused_var
=
repository_ctx
.
path
(
Label
(
"//:mace/ops/opencl/cl/depth_to_space.cl"
))
unused_var
=
repository_ctx
.
path
(
Label
(
"//:mace/ops/opencl/cl/depth_to_space.cl"
))
unused_var
=
repository_ctx
.
path
(
Label
(
"//:mace/ops/opencl/cl/depthwise_conv2d.cl"
))
unused_var
=
repository_ctx
.
path
(
Label
(
"//:mace/ops/opencl/cl/depthwise_conv2d.cl"
))
unused_var
=
repository_ctx
.
path
(
Label
(
"//:mace/ops/opencl/cl/depthwise_conv2d_buffer.cl"
))
unused_var
=
repository_ctx
.
path
(
Label
(
"//:mace/ops/opencl/cl/depthwise_conv2d_buffer.cl"
))
...
...
tools/validate.py
浏览文件 @
c057b7ec
...
@@ -18,8 +18,6 @@ import os
...
@@ -18,8 +18,6 @@ import os
import
os.path
import
os.path
import
numpy
as
np
import
numpy
as
np
import
re
import
re
from
scipy
import
spatial
from
scipy
import
stats
import
common
import
common
...
@@ -60,14 +58,22 @@ def calculate_sqnr(expected, actual):
...
@@ -60,14 +58,22 @@ def calculate_sqnr(expected, actual):
return
signal_power_sum
/
(
noise_power_sum
+
1e-15
)
return
signal_power_sum
/
(
noise_power_sum
+
1e-15
)
def
calculate_similarity
(
u
,
v
,
data_type
=
np
.
float64
):
if
u
.
dtype
is
not
data_type
:
u
=
u
.
astype
(
data_type
)
if
v
.
dtype
is
not
data_type
:
v
=
v
.
astype
(
data_type
)
return
np
.
dot
(
u
,
v
)
/
(
np
.
linalg
.
norm
(
u
)
*
np
.
linalg
.
norm
(
v
))
def
compare_output
(
platform
,
device_type
,
output_name
,
mace_out_value
,
def
compare_output
(
platform
,
device_type
,
output_name
,
mace_out_value
,
out_value
,
validation_threshold
):
out_value
,
validation_threshold
):
if
mace_out_value
.
size
!=
0
:
if
mace_out_value
.
size
!=
0
:
out_value
=
out_value
.
reshape
(
-
1
)
out_value
=
out_value
.
reshape
(
-
1
)
mace_out_value
=
mace_out_value
.
reshape
(
-
1
)
mace_out_value
=
mace_out_value
.
reshape
(
-
1
)
assert
len
(
out_value
)
==
len
(
mace_out_value
)
assert
len
(
out_value
)
==
len
(
mace_out_value
)
similarity
=
(
1
-
spatial
.
distance
.
cosine
(
out_value
,
mace_out_value
))
sqnr
=
calculate_sqnr
(
out_value
,
mace_out_value
)
sqnr
=
calculate_sqnr
(
out_value
,
mace_out_value
)
similarity
=
calculate_similarity
(
out_value
,
mace_out_value
)
common
.
MaceLogger
.
summary
(
common
.
MaceLogger
.
summary
(
output_name
+
' MACE VS '
+
platform
.
upper
()
output_name
+
' MACE VS '
+
platform
.
upper
()
+
' similarity: '
+
str
(
similarity
)
+
' , sqnr: '
+
str
(
sqnr
))
+
' similarity: '
+
str
(
similarity
)
+
' , sqnr: '
+
str
(
sqnr
))
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录