Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
16703420
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
16703420
编写于
3月 20, 2018
作者:
W
wuchenghui
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix cpplint for mace/kernels
上级
564833a5
变更
41
隐藏空白更改
内联
并排
Showing
41 changed file
with
194 addition
and
164 deletion
+194
-164
mace/kernels/activation.h
mace/kernels/activation.h
+4
-0
mace/kernels/addn.h
mace/kernels/addn.h
+1
-2
mace/kernels/batch_norm.h
mace/kernels/batch_norm.h
+2
-1
mace/kernels/bias_add.h
mace/kernels/bias_add.h
+3
-1
mace/kernels/buffer_to_image.h
mace/kernels/buffer_to_image.h
+6
-4
mace/kernels/channel_shuffle.h
mace/kernels/channel_shuffle.h
+4
-2
mace/kernels/concat.h
mace/kernels/concat.h
+6
-4
mace/kernels/conv_2d.h
mace/kernels/conv_2d.h
+9
-9
mace/kernels/conv_pool_2d_util.cc
mace/kernels/conv_pool_2d_util.cc
+5
-3
mace/kernels/depthwise_conv2d.h
mace/kernels/depthwise_conv2d.h
+2
-4
mace/kernels/eltwise.h
mace/kernels/eltwise.h
+3
-0
mace/kernels/fully_connected.h
mace/kernels/fully_connected.h
+2
-0
mace/kernels/matmul.h
mace/kernels/matmul.h
+0
-3
mace/kernels/neon/batch_norm_neon.cc
mace/kernels/neon/batch_norm_neon.cc
+1
-1
mace/kernels/neon/conv_2d_neon_1x1.cc
mace/kernels/neon/conv_2d_neon_1x1.cc
+4
-4
mace/kernels/opencl/addn.cc
mace/kernels/opencl/addn.cc
+3
-3
mace/kernels/opencl/bias_add_opencl.cc
mace/kernels/opencl/bias_add_opencl.cc
+0
-1
mace/kernels/opencl/buffer_to_image.cc
mace/kernels/opencl/buffer_to_image.cc
+1
-1
mace/kernels/opencl/channel_shuffle.cc
mace/kernels/opencl/channel_shuffle.cc
+2
-2
mace/kernels/opencl/concat.cc
mace/kernels/opencl/concat.cc
+2
-3
mace/kernels/opencl/conv_2d_opencl.cc
mace/kernels/opencl/conv_2d_opencl.cc
+2
-1
mace/kernels/opencl/conv_2d_opencl_1x1.cc
mace/kernels/opencl/conv_2d_opencl_1x1.cc
+0
-1
mace/kernels/opencl/depthwise_conv_opencl.cc
mace/kernels/opencl/depthwise_conv_opencl.cc
+13
-12
mace/kernels/opencl/eltwise_opencl.cc
mace/kernels/opencl/eltwise_opencl.cc
+0
-1
mace/kernels/opencl/fully_connected_opencl.cc
mace/kernels/opencl/fully_connected_opencl.cc
+24
-22
mace/kernels/opencl/helper.cc
mace/kernels/opencl/helper.cc
+46
-40
mace/kernels/opencl/helper.h
mace/kernels/opencl/helper.h
+6
-5
mace/kernels/opencl/matmul.cc
mace/kernels/opencl/matmul.cc
+2
-2
mace/kernels/opencl/pooling_opencl.cc
mace/kernels/opencl/pooling_opencl.cc
+7
-6
mace/kernels/opencl/resize_bilinear_opencl.cc
mace/kernels/opencl/resize_bilinear_opencl.cc
+1
-3
mace/kernels/opencl/slice.cc
mace/kernels/opencl/slice.cc
+1
-1
mace/kernels/opencl/softmax_opencl.cc
mace/kernels/opencl/softmax_opencl.cc
+0
-1
mace/kernels/opencl/space_to_batch_opencl.cc
mace/kernels/opencl/space_to_batch_opencl.cc
+2
-2
mace/kernels/opencl/winograd_transform.cc
mace/kernels/opencl/winograd_transform.cc
+2
-4
mace/kernels/pooling.h
mace/kernels/pooling.h
+6
-3
mace/kernels/reshape.h
mace/kernels/reshape.h
+3
-1
mace/kernels/resize_bilinear.h
mace/kernels/resize_bilinear.h
+3
-2
mace/kernels/slice.h
mace/kernels/slice.h
+3
-4
mace/kernels/softmax.h
mace/kernels/softmax.h
+6
-2
mace/kernels/space_to_batch.h
mace/kernels/space_to_batch.h
+5
-3
mace/kernels/winograd_transform.h
mace/kernels/winograd_transform.h
+2
-0
未找到文件。
mace/kernels/activation.h
浏览文件 @
16703420
...
@@ -5,6 +5,10 @@
...
@@ -5,6 +5,10 @@
#ifndef MACE_KERNELS_ACTIVATION_H_
#ifndef MACE_KERNELS_ACTIVATION_H_
#define MACE_KERNELS_ACTIVATION_H_
#define MACE_KERNELS_ACTIVATION_H_
#include <algorithm>
#include <string>
#include <vector>
#include "mace/core/future.h"
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/core/tensor.h"
...
...
mace/kernels/addn.h
浏览文件 @
16703420
...
@@ -8,6 +8,7 @@
...
@@ -8,6 +8,7 @@
#if defined(MACE_ENABLE_NEON) && defined(__aarch64__)
#if defined(MACE_ENABLE_NEON) && defined(__aarch64__)
#include <arm_neon.h>
#include <arm_neon.h>
#endif
#endif
#include <algorithm>
#include <vector>
#include <vector>
#include "mace/core/future.h"
#include "mace/core/future.h"
...
@@ -17,9 +18,7 @@
...
@@ -17,9 +18,7 @@
namespace
mace
{
namespace
mace
{
namespace
kernels
{
namespace
kernels
{
namespace
{
constexpr
int
kCostPerGroup
=
1024
;
constexpr
int
kCostPerGroup
=
1024
;
}
// namespace
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
struct
AddNFunctor
{
struct
AddNFunctor
{
...
...
mace/kernels/batch_norm.h
浏览文件 @
16703420
...
@@ -8,6 +8,7 @@
...
@@ -8,6 +8,7 @@
#if defined(MACE_ENABLE_NEON) && defined(__aarch64__)
#if defined(MACE_ENABLE_NEON) && defined(__aarch64__)
#include <arm_neon.h>
#include <arm_neon.h>
#endif
#endif
#include <vector>
#include "mace/core/future.h"
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/cl2_header.h"
...
@@ -159,7 +160,7 @@ struct BatchNormFunctor<DeviceType::OPENCL, T> : BatchNormFunctorBase {
...
@@ -159,7 +160,7 @@ struct BatchNormFunctor<DeviceType::OPENCL, T> : BatchNormFunctorBase {
std
::
vector
<
index_t
>
input_shape_
;
std
::
vector
<
index_t
>
input_shape_
;
};
};
}
// name
ps
ace kernels
}
// name
sp
ace kernels
}
// namespace mace
}
// namespace mace
#endif // MACE_KERNELS_BATCH_NORM_H_
#endif // MACE_KERNELS_BATCH_NORM_H_
mace/kernels/bias_add.h
浏览文件 @
16703420
...
@@ -5,6 +5,8 @@
...
@@ -5,6 +5,8 @@
#ifndef MACE_KERNELS_BIAS_ADD_H_
#ifndef MACE_KERNELS_BIAS_ADD_H_
#define MACE_KERNELS_BIAS_ADD_H_
#define MACE_KERNELS_BIAS_ADD_H_
#include <vector>
#include "mace/core/future.h"
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/core/tensor.h"
...
@@ -65,7 +67,7 @@ struct BiasAddFunctor<DeviceType::OPENCL, T> {
...
@@ -65,7 +67,7 @@ struct BiasAddFunctor<DeviceType::OPENCL, T> {
std
::
vector
<
index_t
>
input_shape_
;
std
::
vector
<
index_t
>
input_shape_
;
};
};
}
// name
ps
ace kernels
}
// name
sp
ace kernels
}
// namespace mace
}
// namespace mace
#endif // MACE_KERNELS_BIAS_ADD_H_
#endif // MACE_KERNELS_BIAS_ADD_H_
mace/kernels/buffer_to_image.h
浏览文件 @
16703420
...
@@ -13,13 +13,14 @@ namespace mace {
...
@@ -13,13 +13,14 @@ namespace mace {
namespace
kernels
{
namespace
kernels
{
struct
BufferToImageFunctorBase
{
struct
BufferToImageFunctorBase
{
BufferToImageFunctorBase
(
bool
i2b
)
:
i2b_
(
i2b
)
{}
explicit
BufferToImageFunctorBase
(
bool
i2b
)
:
i2b_
(
i2b
)
{}
bool
i2b_
;
bool
i2b_
;
};
};
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
struct
BufferToImageFunctor
:
BufferToImageFunctorBase
{
struct
BufferToImageFunctor
:
BufferToImageFunctorBase
{
BufferToImageFunctor
(
bool
i2b
=
false
)
:
BufferToImageFunctorBase
(
i2b
)
{}
explicit
BufferToImageFunctor
(
bool
i2b
=
false
)
:
BufferToImageFunctorBase
(
i2b
)
{}
void
operator
()(
Tensor
*
input
,
void
operator
()(
Tensor
*
input
,
const
BufferType
type
,
const
BufferType
type
,
Tensor
*
output
,
Tensor
*
output
,
...
@@ -30,14 +31,15 @@ struct BufferToImageFunctor : BufferToImageFunctorBase {
...
@@ -30,14 +31,15 @@ struct BufferToImageFunctor : BufferToImageFunctorBase {
template
<
typename
T
>
template
<
typename
T
>
struct
BufferToImageFunctor
<
DeviceType
::
OPENCL
,
T
>
:
BufferToImageFunctorBase
{
struct
BufferToImageFunctor
<
DeviceType
::
OPENCL
,
T
>
:
BufferToImageFunctorBase
{
BufferToImageFunctor
(
bool
i2b
=
false
)
:
BufferToImageFunctorBase
(
i2b
)
{}
explicit
BufferToImageFunctor
(
bool
i2b
=
false
)
:
BufferToImageFunctorBase
(
i2b
)
{}
void
operator
()(
Tensor
*
input
,
void
operator
()(
Tensor
*
input
,
const
BufferType
type
,
const
BufferType
type
,
Tensor
*
output
,
Tensor
*
output
,
StatsFuture
*
future
);
StatsFuture
*
future
);
};
};
}
// name
ps
ace kernels
}
// name
sp
ace kernels
}
// namespace mace
}
// namespace mace
#endif // MACE_KERNELS_BUFFER_TO_IMAGE_H_
#endif // MACE_KERNELS_BUFFER_TO_IMAGE_H_
mace/kernels/channel_shuffle.h
浏览文件 @
16703420
...
@@ -5,6 +5,8 @@
...
@@ -5,6 +5,8 @@
#ifndef MACE_KERNELS_CHANNEL_SHUFFLE_H_
#ifndef MACE_KERNELS_CHANNEL_SHUFFLE_H_
#define MACE_KERNELS_CHANNEL_SHUFFLE_H_
#define MACE_KERNELS_CHANNEL_SHUFFLE_H_
#include <vector>
#include "mace/core/future.h"
#include "mace/core/future.h"
#include "mace/core/tensor.h"
#include "mace/core/tensor.h"
...
@@ -13,7 +15,7 @@ namespace kernels {
...
@@ -13,7 +15,7 @@ namespace kernels {
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
struct
ChannelShuffleFunctor
{
struct
ChannelShuffleFunctor
{
ChannelShuffleFunctor
(
const
int
groups
)
:
groups_
(
groups
)
{}
explicit
ChannelShuffleFunctor
(
const
int
groups
)
:
groups_
(
groups
)
{}
void
operator
()(
const
Tensor
*
input
,
void
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
Tensor
*
output
,
...
@@ -49,7 +51,7 @@ struct ChannelShuffleFunctor {
...
@@ -49,7 +51,7 @@ struct ChannelShuffleFunctor {
template
<
typename
T
>
template
<
typename
T
>
struct
ChannelShuffleFunctor
<
DeviceType
::
OPENCL
,
T
>
{
struct
ChannelShuffleFunctor
<
DeviceType
::
OPENCL
,
T
>
{
ChannelShuffleFunctor
(
const
int
groups
)
:
groups_
(
groups
)
{}
explicit
ChannelShuffleFunctor
(
const
int
groups
)
:
groups_
(
groups
)
{}
void
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
);
void
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
);
...
...
mace/kernels/concat.h
浏览文件 @
16703420
...
@@ -5,6 +5,8 @@
...
@@ -5,6 +5,8 @@
#ifndef MACE_KERNELS_CONCAT_H_
#ifndef MACE_KERNELS_CONCAT_H_
#define MACE_KERNELS_CONCAT_H_
#define MACE_KERNELS_CONCAT_H_
#include <vector>
#include "mace/core/future.h"
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/core/tensor.h"
...
@@ -15,14 +17,14 @@ namespace mace {
...
@@ -15,14 +17,14 @@ namespace mace {
namespace
kernels
{
namespace
kernels
{
struct
ConcatFunctorBase
{
struct
ConcatFunctorBase
{
ConcatFunctorBase
(
const
int32_t
axis
)
:
axis_
(
axis
)
{}
explicit
ConcatFunctorBase
(
const
int32_t
axis
)
:
axis_
(
axis
)
{}
int32_t
axis_
;
int32_t
axis_
;
};
};
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
struct
ConcatFunctor
:
ConcatFunctorBase
{
struct
ConcatFunctor
:
ConcatFunctorBase
{
ConcatFunctor
(
const
int32_t
axis
)
:
ConcatFunctorBase
(
axis
)
{}
explicit
ConcatFunctor
(
const
int32_t
axis
)
:
ConcatFunctorBase
(
axis
)
{}
void
operator
()(
const
std
::
vector
<
const
Tensor
*>
&
input_list
,
void
operator
()(
const
std
::
vector
<
const
Tensor
*>
&
input_list
,
Tensor
*
output
,
Tensor
*
output
,
...
@@ -77,7 +79,7 @@ struct ConcatFunctor : ConcatFunctorBase {
...
@@ -77,7 +79,7 @@ struct ConcatFunctor : ConcatFunctorBase {
template
<
typename
T
>
template
<
typename
T
>
struct
ConcatFunctor
<
DeviceType
::
OPENCL
,
T
>
:
ConcatFunctorBase
{
struct
ConcatFunctor
<
DeviceType
::
OPENCL
,
T
>
:
ConcatFunctorBase
{
ConcatFunctor
(
const
int32_t
axis
)
:
ConcatFunctorBase
(
axis
)
{}
explicit
ConcatFunctor
(
const
int32_t
axis
)
:
ConcatFunctorBase
(
axis
)
{}
void
operator
()(
const
std
::
vector
<
const
Tensor
*>
&
input_list
,
void
operator
()(
const
std
::
vector
<
const
Tensor
*>
&
input_list
,
Tensor
*
output
,
Tensor
*
output
,
...
@@ -86,7 +88,7 @@ struct ConcatFunctor<DeviceType::OPENCL, T> : ConcatFunctorBase {
...
@@ -86,7 +88,7 @@ struct ConcatFunctor<DeviceType::OPENCL, T> : ConcatFunctorBase {
std
::
vector
<
index_t
>
input_shape_
;
std
::
vector
<
index_t
>
input_shape_
;
};
};
}
// name
ps
ace kernels
}
// name
sp
ace kernels
}
// namespace mace
}
// namespace mace
#endif // MACE_KERNELS_CONCAT_H_
#endif // MACE_KERNELS_CONCAT_H_
mace/kernels/conv_2d.h
浏览文件 @
16703420
...
@@ -8,6 +8,8 @@
...
@@ -8,6 +8,8 @@
#if defined(MACE_ENABLE_NEON) && defined(__aarch64__)
#if defined(MACE_ENABLE_NEON) && defined(__aarch64__)
#include <arm_neon.h>
#include <arm_neon.h>
#endif
#endif
#include <algorithm>
#include <vector>
#include "mace/core/future.h"
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/cl2_header.h"
...
@@ -18,7 +20,6 @@
...
@@ -18,7 +20,6 @@
namespace
mace
{
namespace
mace
{
namespace
kernels
{
namespace
kernels
{
namespace
{
template
<
typename
T
,
template
<
typename
T
,
int
inc_tile_size
,
int
inc_tile_size
,
...
@@ -61,9 +62,9 @@ void Conv2dKernelFunc(const T *input_ptr, // batch start
...
@@ -61,9 +62,9 @@ void Conv2dKernelFunc(const T *input_ptr, // batch start
#if defined(MACE_ENABLE_NEON) && defined(__aarch64__)
#if defined(MACE_ENABLE_NEON) && defined(__aarch64__)
// AArch64 NEON has 32 128-bit general purpose registers
// AArch64 NEON has 32 128-bit general purpose registers
static_assert
(
inc_tile_size
==
4
,
"input channels tile size must be 4"
);
static_assert
(
inc_tile_size
==
4
,
"input channels tile size must be 4"
);
float32x4_t
in
[
h_count
*
w_count
];
float32x4_t
in
[
h_count
*
w_count
];
// NOLINT(runtime/arrays)
#else
#else
T
in
[
h_count
*
w_count
*
inc_tile_size
];
T
in
[
h_count
*
w_count
*
inc_tile_size
];
// NOLINT(runtime/arrays)
#endif
#endif
for
(
int
hi
=
0
;
hi
<
h_count
;
++
hi
)
{
for
(
int
hi
=
0
;
hi
<
h_count
;
++
hi
)
{
for
(
int
wi
=
0
;
wi
<
w_count
;
++
wi
)
{
for
(
int
wi
=
0
;
wi
<
w_count
;
++
wi
)
{
...
@@ -86,9 +87,9 @@ void Conv2dKernelFunc(const T *input_ptr, // batch start
...
@@ -86,9 +87,9 @@ void Conv2dKernelFunc(const T *input_ptr, // batch start
#if defined(MACE_ENABLE_NEON) && defined(__aarch64__)
#if defined(MACE_ENABLE_NEON) && defined(__aarch64__)
static_assert
(
inc_tile_size
==
4
,
"input channels tile size must be 4"
);
static_assert
(
inc_tile_size
==
4
,
"input channels tile size must be 4"
);
float32x4_t
weights
[
c_count
];
float32x4_t
weights
[
c_count
];
// NOLINT(runtime/arrays)
#else
#else
T
weights
[
c_count
*
inc_tile_size
];
T
weights
[
c_count
*
inc_tile_size
];
// NOLINT(runtime/arrays)
#endif
#endif
for
(
int
ci
=
0
;
ci
<
c_count
;
++
ci
)
{
for
(
int
ci
=
0
;
ci
<
c_count
;
++
ci
)
{
const
int
weights_idx
=
ci
;
const
int
weights_idx
=
ci
;
...
@@ -126,7 +127,7 @@ void Conv2dKernelFunc(const T *input_ptr, // batch start
...
@@ -126,7 +127,7 @@ void Conv2dKernelFunc(const T *input_ptr, // batch start
}
}
// handling the remaining input channels
// handling the remaining input channels
for
(;
inc
<
input_channels
;
++
inc
)
{
for
(;
inc
<
input_channels
;
++
inc
)
{
T
in
[
h_count
*
w_count
];
T
in
[
h_count
*
w_count
];
// NOLINT(runtime/arrays)
for
(
int
hi
=
0
;
hi
<
h_count
;
++
hi
)
{
for
(
int
hi
=
0
;
hi
<
h_count
;
++
hi
)
{
for
(
int
wi
=
0
;
wi
<
w_count
;
++
wi
)
{
for
(
int
wi
=
0
;
wi
<
w_count
;
++
wi
)
{
const
int
in_idx
=
hi
*
w_count
+
wi
;
const
int
in_idx
=
hi
*
w_count
+
wi
;
...
@@ -138,7 +139,7 @@ void Conv2dKernelFunc(const T *input_ptr, // batch start
...
@@ -138,7 +139,7 @@ void Conv2dKernelFunc(const T *input_ptr, // batch start
}
}
}
}
T
weights
[
c_count
];
T
weights
[
c_count
];
// NOLINT(runtime/arrays)
for
(
int
ci
=
0
;
ci
<
c_count
;
++
ci
)
{
for
(
int
ci
=
0
;
ci
<
c_count
;
++
ci
)
{
const
int
weights_idx
=
ci
;
const
int
weights_idx
=
ci
;
const
int
filter_offset
=
const
int
filter_offset
=
...
@@ -173,7 +174,6 @@ void Conv2dKernelFunc(const T *input_ptr, // batch start
...
@@ -173,7 +174,6 @@ void Conv2dKernelFunc(const T *input_ptr, // batch start
}
}
}
}
}
}
};
// namespace
struct
Conv2dFunctorBase
{
struct
Conv2dFunctorBase
{
Conv2dFunctorBase
(
const
int
*
strides
,
Conv2dFunctorBase
(
const
int
*
strides
,
...
@@ -331,7 +331,7 @@ struct Conv2dFunctor : Conv2dFunctorBase {
...
@@ -331,7 +331,7 @@ struct Conv2dFunctor : Conv2dFunctorBase {
auto
output_data
=
output
->
mutable_data
<
T
>
();
auto
output_data
=
output
->
mutable_data
<
T
>
();
constexpr
int
inc_tile_size
=
4
;
constexpr
int
inc_tile_size
=
4
;
// TODO Auto tuning these parameters
// TODO
(heliangliang)
Auto tuning these parameters
#if defined(MACE_ENABLE_NEON) && defined(__aarch64__)
#if defined(MACE_ENABLE_NEON) && defined(__aarch64__)
const
int
c_tile_size
=
4
;
const
int
c_tile_size
=
4
;
const
int
h_tile_size
=
2
;
const
int
h_tile_size
=
2
;
...
...
mace/kernels/conv_pool_2d_util.cc
浏览文件 @
16703420
...
@@ -4,6 +4,8 @@
...
@@ -4,6 +4,8 @@
#include "mace/kernels/conv_pool_2d_util.h"
#include "mace/kernels/conv_pool_2d_util.h"
#include <vector>
namespace
mace
{
namespace
mace
{
namespace
kernels
{
namespace
kernels
{
...
@@ -56,7 +58,7 @@ void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW
...
@@ -56,7 +58,7 @@ void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW
}
}
// Note: TensorFlow may padded one more on the right/bottom side
// Note: TensorFlow may padded one more on the right/bottom side
// TODO may be it's better to also truncate the left/top to
// TODO
(liuqi):
may be it's better to also truncate the left/top to
// utilize the more centered features. We need to benchmark
// utilize the more centered features. We need to benchmark
// based on the model accuracy.
// based on the model accuracy.
...
@@ -120,7 +122,7 @@ void CalcNHWCPaddingAndOutputSize(const index_t *input_shape, // NHWC
...
@@ -120,7 +122,7 @@ void CalcNHWCPaddingAndOutputSize(const index_t *input_shape, // NHWC
}
}
// Note: TensorFlow may padded one more on the right/bottom side
// Note: TensorFlow may padded one more on the right/bottom side
// TODO may be it's better to also truncate the left/top to
// TODO
(liuqi):
may be it's better to also truncate the left/top to
// utilize the more centered features. We need to benchmark
// utilize the more centered features. We need to benchmark
// based on the model accuracy.
// based on the model accuracy.
...
@@ -219,7 +221,7 @@ void CalPaddingSize(const index_t *input_shape, // NCHW
...
@@ -219,7 +221,7 @@ void CalPaddingSize(const index_t *input_shape, // NCHW
}
}
// Note: TensorFlow may padded one more on the right/bottom side
// Note: TensorFlow may padded one more on the right/bottom side
// TODO may be it's better to also truncate the left/top to
// TODO
(liuqi):
may be it's better to also truncate the left/top to
// utilize the more centered features. We need to benchmark
// utilize the more centered features. We need to benchmark
// based on the model accuracy.
// based on the model accuracy.
padding_size
[
0
]
=
std
::
max
<
int
>
(
padding_size
[
0
]
=
std
::
max
<
int
>
(
...
...
mace/kernels/depthwise_conv2d.h
浏览文件 @
16703420
...
@@ -8,6 +8,8 @@
...
@@ -8,6 +8,8 @@
#if defined(MACE_ENABLE_NEON) && defined(__aarch64__)
#if defined(MACE_ENABLE_NEON) && defined(__aarch64__)
#include <arm_neon.h>
#include <arm_neon.h>
#endif
#endif
#include <algorithm>
#include <vector>
#include "mace/core/future.h"
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/cl2_header.h"
...
@@ -17,8 +19,6 @@
...
@@ -17,8 +19,6 @@
namespace
mace
{
namespace
mace
{
namespace
kernels
{
namespace
kernels
{
namespace
{
template
<
typename
T
>
template
<
typename
T
>
void
DepthwiseConv2dKernel
(
const
T
*
input_ptr
,
void
DepthwiseConv2dKernel
(
const
T
*
input_ptr
,
const
T
*
filter_ptr
,
const
T
*
filter_ptr
,
...
@@ -233,8 +233,6 @@ void DepthwiseConv2dNoOOBCheckKernel(const T *input_ptr,
...
@@ -233,8 +233,6 @@ void DepthwiseConv2dNoOOBCheckKernel(const T *input_ptr,
}
}
}
}
}
// namespace
struct
DepthwiseConv2dFunctorBase
{
struct
DepthwiseConv2dFunctorBase
{
DepthwiseConv2dFunctorBase
(
const
int
*
strides
,
DepthwiseConv2dFunctorBase
(
const
int
*
strides
,
const
Padding
padding_type
,
const
Padding
padding_type
,
...
...
mace/kernels/eltwise.h
浏览文件 @
16703420
...
@@ -4,6 +4,9 @@
...
@@ -4,6 +4,9 @@
#ifndef MACE_KERNELS_ELTWISE_H_
#ifndef MACE_KERNELS_ELTWISE_H_
#define MACE_KERNELS_ELTWISE_H_
#define MACE_KERNELS_ELTWISE_H_
#include <algorithm>
#include <vector>
#include "mace/core/future.h"
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/core/tensor.h"
...
...
mace/kernels/fully_connected.h
浏览文件 @
16703420
...
@@ -5,6 +5,8 @@
...
@@ -5,6 +5,8 @@
#ifndef MACE_KERNELS_FULLY_CONNECTED_H_
#ifndef MACE_KERNELS_FULLY_CONNECTED_H_
#define MACE_KERNELS_FULLY_CONNECTED_H_
#define MACE_KERNELS_FULLY_CONNECTED_H_
#include <vector>
#include "mace/core/future.h"
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/core/tensor.h"
...
...
mace/kernels/matmul.h
浏览文件 @
16703420
...
@@ -21,7 +21,6 @@
...
@@ -21,7 +21,6 @@
namespace
mace
{
namespace
mace
{
namespace
kernels
{
namespace
kernels
{
namespace
{
template
<
typename
T
,
template
<
typename
T
,
int
register_tile_size
,
int
register_tile_size
,
int
h_count
,
int
h_count
,
...
@@ -87,7 +86,6 @@ inline void MatMulKernelFunc(const T *A,
...
@@ -87,7 +86,6 @@ inline void MatMulKernelFunc(const T *A,
}
}
}
}
}
}
}
// namespace
#define MACE_DO_MATMUL(HC, WC, KC) \
#define MACE_DO_MATMUL(HC, WC, KC) \
MatMulKernelFunc<T, register_tile_size, HC, WC, KC>(a_ptr_batch_base, \
MatMulKernelFunc<T, register_tile_size, HC, WC, KC>(a_ptr_batch_base, \
...
@@ -118,7 +116,6 @@ switch (k_count) { \
...
@@ -118,7 +116,6 @@ switch (k_count) { \
LOG(FATAL) << "Unsupported k tile: " << k_count; \
LOG(FATAL) << "Unsupported k tile: " << k_count; \
}
}
#define MACE_CASE_W_MATMUL(HC) \
#define MACE_CASE_W_MATMUL(HC) \
switch (w_count) { \
switch (w_count) { \
case 1: \
case 1: \
...
...
mace/kernels/neon/batch_norm_neon.cc
浏览文件 @
16703420
...
@@ -78,7 +78,7 @@ void BatchNormFunctor<DeviceType::NEON, float>::operator()(
...
@@ -78,7 +78,7 @@ void BatchNormFunctor<DeviceType::NEON, float>::operator()(
}
}
}
}
}
}
}
;
}
}
// namespace kernels
}
// namespace kernels
}
// namespace mace
}
// namespace mace
mace/kernels/neon/conv_2d_neon_1x1.cc
浏览文件 @
16703420
...
@@ -296,7 +296,7 @@ void Conv2dNeonK1x1S1(const float *input, // NCHW
...
@@ -296,7 +296,7 @@ void Conv2dNeonK1x1S1(const float *input, // NCHW
}
}
}
}
}
}
}
;
}
void
Conv2dNeonPixelK1x1S1
(
void
Conv2dNeonPixelK1x1S1
(
const
float
*
input
,
// NCHW
const
float
*
input
,
// NCHW
...
@@ -321,7 +321,7 @@ void Conv2dNeonPixelK1x1S1(
...
@@ -321,7 +321,7 @@ void Conv2dNeonPixelK1x1S1(
const
index_t
total_pixels
=
height
*
width
;
const
index_t
total_pixels
=
height
*
width
;
// Process 4 * 2 = 8 pixels for each innermost loop
// Process 4 * 2 = 8 pixels for each innermost loop
// TODO Does 64 bit v.s. 32 bit index matters? need benchmark
// TODO
(heliangliang):
Does 64 bit v.s. 32 bit index matters? need benchmark
const
index_t
total_loops
=
total_pixels
>>
3
;
const
index_t
total_loops
=
total_pixels
>>
3
;
const
index_t
loop_remaining
=
total_pixels
&
7
;
const
index_t
loop_remaining
=
total_pixels
&
7
;
...
@@ -329,7 +329,7 @@ void Conv2dNeonPixelK1x1S1(
...
@@ -329,7 +329,7 @@ void Conv2dNeonPixelK1x1S1(
for
(
index_t
n
=
0
;
n
<
batch
;
++
n
)
{
for
(
index_t
n
=
0
;
n
<
batch
;
++
n
)
{
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
const
float
*
filter_ptr
=
filter
+
c
*
input_channels
;
const
float
*
filter_ptr
=
filter
+
c
*
input_channels
;
// TODO Will GCC opt these out?
// TODO
(heliangliang):
Will GCC opt these out?
float
*
channel_output_start
=
float
*
channel_output_start
=
output
+
n
*
channels
*
height
*
width
+
c
*
height
*
width
;
output
+
n
*
channels
*
height
*
width
+
c
*
height
*
width
;
const
float
*
input_ptr
=
const
float
*
input_ptr
=
...
@@ -469,7 +469,7 @@ void Conv2dNeonPixelK1x1S1(
...
@@ -469,7 +469,7 @@ void Conv2dNeonPixelK1x1S1(
}
}
}
}
}
}
}
;
}
}
// namespace kernels
}
// namespace kernels
}
// namespace mace
}
// namespace mace
mace/kernels/opencl/addn.cc
浏览文件 @
16703420
...
@@ -45,7 +45,6 @@ void AddNFunctor<DeviceType::OPENCL, T>::operator()(
...
@@ -45,7 +45,6 @@ void AddNFunctor<DeviceType::OPENCL, T>::operator()(
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
built_options
.
emplace
(
MakeString
(
"-DINPUT_NUM="
,
input_tensors
.
size
()));
built_options
.
emplace
(
MakeString
(
"-DINPUT_NUM="
,
input_tensors
.
size
()));
kernel_
=
runtime
->
BuildKernel
(
"addn"
,
kernel_name
,
built_options
);
kernel_
=
runtime
->
BuildKernel
(
"addn"
,
kernel_name
,
built_options
);
}
}
std
::
vector
<
index_t
>
output_shape
=
input_tensors
[
0
]
->
shape
();
std
::
vector
<
index_t
>
output_shape
=
input_tensors
[
0
]
->
shape
();
...
@@ -56,7 +55,8 @@ void AddNFunctor<DeviceType::OPENCL, T>::operator()(
...
@@ -56,7 +55,8 @@ void AddNFunctor<DeviceType::OPENCL, T>::operator()(
if
(
!
IsVecEqual
(
input_shape_
,
input_tensors
[
0
]
->
shape
()))
{
if
(
!
IsVecEqual
(
input_shape_
,
input_tensors
[
0
]
->
shape
()))
{
std
::
vector
<
size_t
>
output_image_shape
;
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
output_image_shape
);
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
&
output_image_shape
);
output_tensor
->
ResizeImage
(
output_shape
,
output_image_shape
);
output_tensor
->
ResizeImage
(
output_shape
,
output_image_shape
);
uint32_t
idx
=
0
;
uint32_t
idx
=
0
;
...
@@ -75,7 +75,7 @@ void AddNFunctor<DeviceType::OPENCL, T>::operator()(
...
@@ -75,7 +75,7 @@ void AddNFunctor<DeviceType::OPENCL, T>::operator()(
ss
<<
"addn_opencl_kernel_"
<<
output_shape
[
0
]
<<
"_"
<<
output_shape
[
1
]
ss
<<
"addn_opencl_kernel_"
<<
output_shape
[
0
]
<<
"_"
<<
output_shape
[
1
]
<<
"_"
<<
output_shape
[
2
]
<<
"_"
<<
output_shape
[
3
];
<<
"_"
<<
output_shape
[
2
]
<<
"_"
<<
output_shape
[
3
];
TuningOrRun2DKernel
(
kernel_
,
ss
.
str
(),
gws
,
lws
,
future
);
TuningOrRun2DKernel
(
kernel_
,
ss
.
str
(),
gws
,
lws
,
future
);
}
;
}
template
struct
AddNFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
AddNFunctor
<
DeviceType
::
OPENCL
,
float
>;
...
...
mace/kernels/opencl/bias_add_opencl.cc
浏览文件 @
16703420
...
@@ -32,7 +32,6 @@ void BiasAddFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
...
@@ -32,7 +32,6 @@ void BiasAddFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
kernel_
=
runtime
->
BuildKernel
(
"bias_add"
,
kernel_name
,
built_options
);
kernel_
=
runtime
->
BuildKernel
(
"bias_add"
,
kernel_name
,
built_options
);
}
}
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
uint32_t
idx
=
0
;
...
...
mace/kernels/opencl/buffer_to_image.cc
浏览文件 @
16703420
...
@@ -14,7 +14,7 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(
...
@@ -14,7 +14,7 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(
Tensor
*
buffer
,
const
BufferType
type
,
Tensor
*
image
,
StatsFuture
*
future
)
{
Tensor
*
buffer
,
const
BufferType
type
,
Tensor
*
image
,
StatsFuture
*
future
)
{
std
::
vector
<
size_t
>
image_shape
;
std
::
vector
<
size_t
>
image_shape
;
if
(
!
i2b_
)
{
if
(
!
i2b_
)
{
CalImage2DShape
(
buffer
->
shape
(),
type
,
image_shape
);
CalImage2DShape
(
buffer
->
shape
(),
type
,
&
image_shape
);
if
(
type
==
WINOGRAD_FILTER
)
{
if
(
type
==
WINOGRAD_FILTER
)
{
std
::
vector
<
index_t
>
new_shape
=
CalWinogradShape
(
buffer
->
shape
(),
type
);
std
::
vector
<
index_t
>
new_shape
=
CalWinogradShape
(
buffer
->
shape
(),
type
);
image
->
ResizeImage
(
new_shape
,
image_shape
);
image
->
ResizeImage
(
new_shape
,
image_shape
);
...
...
mace/kernels/opencl/channel_shuffle.cc
浏览文件 @
16703420
...
@@ -39,7 +39,8 @@ void ChannelShuffleFunctor<DeviceType::OPENCL, T>::operator()(
...
@@ -39,7 +39,8 @@ void ChannelShuffleFunctor<DeviceType::OPENCL, T>::operator()(
auto
dt
=
DataTypeToEnum
<
T
>::
value
;
auto
dt
=
DataTypeToEnum
<
T
>::
value
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
kernel_
=
runtime
->
BuildKernel
(
"channel_shuffle"
,
kernel_name
,
built_options
);
kernel_
=
runtime
->
BuildKernel
(
"channel_shuffle"
,
kernel_name
,
built_options
);
}
}
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
uint32_t
idx
=
0
;
...
@@ -61,7 +62,6 @@ void ChannelShuffleFunctor<DeviceType::OPENCL, T>::operator()(
...
@@ -61,7 +62,6 @@ void ChannelShuffleFunctor<DeviceType::OPENCL, T>::operator()(
<<
output
->
dim
(
2
)
<<
"_"
<<
output
->
dim
(
2
)
<<
"_"
<<
output
->
dim
(
3
);
<<
output
->
dim
(
3
);
TuningOrRun3DKernel
(
kernel_
,
ss
.
str
(),
gws
,
lws
,
future
);
TuningOrRun3DKernel
(
kernel_
,
ss
.
str
(),
gws
,
lws
,
future
);
}
}
template
template
...
...
mace/kernels/opencl/concat.cc
浏览文件 @
16703420
...
@@ -41,7 +41,6 @@ static void Concat2(cl::Kernel *kernel,
...
@@ -41,7 +41,6 @@ static void Concat2(cl::Kernel *kernel,
built_options
.
emplace
(
"-DDIVISIBLE_FOUR"
);
built_options
.
emplace
(
"-DDIVISIBLE_FOUR"
);
}
}
*
kernel
=
runtime
->
BuildKernel
(
"concat"
,
kernel_name
,
built_options
);
*
kernel
=
runtime
->
BuildKernel
(
"concat"
,
kernel_name
,
built_options
);
}
}
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input0
->
shape
()))
{
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input0
->
shape
()))
{
uint32_t
idx
=
0
;
uint32_t
idx
=
0
;
...
@@ -140,7 +139,7 @@ void ConcatFunctor<DeviceType::OPENCL, T>::operator()(
...
@@ -140,7 +139,7 @@ void ConcatFunctor<DeviceType::OPENCL, T>::operator()(
inputs_count
==
2
||
divisible_four
,
inputs_count
==
2
||
divisible_four
,
"Dimensions of inputs should be divisible by 4 when inputs_count > 2."
);
"Dimensions of inputs should be divisible by 4 when inputs_count > 2."
);
std
::
vector
<
size_t
>
image_shape
;
std
::
vector
<
size_t
>
image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
image_shape
);
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
&
image_shape
);
output
->
ResizeImage
(
output_shape
,
image_shape
);
output
->
ResizeImage
(
output_shape
,
image_shape
);
switch
(
inputs_count
)
{
switch
(
inputs_count
)
{
...
@@ -155,7 +154,7 @@ void ConcatFunctor<DeviceType::OPENCL, T>::operator()(
...
@@ -155,7 +154,7 @@ void ConcatFunctor<DeviceType::OPENCL, T>::operator()(
MACE_NOT_IMPLEMENTED
;
MACE_NOT_IMPLEMENTED
;
}
}
}
}
}
;
}
template
struct
ConcatFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
ConcatFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
ConcatFunctor
<
DeviceType
::
OPENCL
,
half
>;
template
struct
ConcatFunctor
<
DeviceType
::
OPENCL
,
half
>;
...
...
mace/kernels/opencl/conv_2d_opencl.cc
浏览文件 @
16703420
...
@@ -92,7 +92,8 @@ void Conv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
...
@@ -92,7 +92,8 @@ void Conv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
}
}
std
::
vector
<
size_t
>
output_image_shape
;
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
output_image_shape
);
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
&
output_image_shape
);
output
->
ResizeImage
(
output_shape
,
output_image_shape
);
output
->
ResizeImage
(
output_shape
,
output_image_shape
);
if
(
kernel_h
==
kernel_w
&&
kernel_h
<=
5
&&
if
(
kernel_h
==
kernel_w
&&
kernel_h
<=
5
&&
...
...
mace/kernels/opencl/conv_2d_opencl_1x1.cc
浏览文件 @
16703420
...
@@ -68,7 +68,6 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
...
@@ -68,7 +68,6 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
auto
runtime
=
OpenCLRuntime
::
Global
();
auto
runtime
=
OpenCLRuntime
::
Global
();
*
kernel
=
runtime
->
BuildKernel
(
"conv_2d_1x1"
,
kernel_name
,
built_options
);
*
kernel
=
runtime
->
BuildKernel
(
"conv_2d_1x1"
,
kernel_name
,
built_options
);
}
}
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input
->
shape
()))
{
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
uint32_t
idx
=
0
;
...
...
mace/kernels/opencl/depthwise_conv_opencl.cc
浏览文件 @
16703420
...
@@ -91,18 +91,18 @@ void DepthwiseConv2d(cl::Kernel *kernel,
...
@@ -91,18 +91,18 @@ void DepthwiseConv2d(cl::Kernel *kernel,
}
}
kernel
->
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
relux_max_limit
);
kernel
->
setArg
(
idx
++
,
relux_max_limit
);
kernel
->
setArg
(
idx
++
,
static_cast
<
shor
t
>
(
input_height
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int16_
t
>
(
input_height
));
kernel
->
setArg
(
idx
++
,
static_cast
<
shor
t
>
(
input_width
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int16_
t
>
(
input_width
));
kernel
->
setArg
(
idx
++
,
static_cast
<
shor
t
>
(
input_channel_blocks
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int16_
t
>
(
input_channel_blocks
));
kernel
->
setArg
(
idx
++
,
static_cast
<
shor
t
>
(
height
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int16_
t
>
(
height
));
kernel
->
setArg
(
idx
++
,
static_cast
<
shor
t
>
(
width
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int16_
t
>
(
width
));
kernel
->
setArg
(
idx
++
,
static_cast
<
shor
t
>
(
filter_height
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int16_
t
>
(
filter_height
));
kernel
->
setArg
(
idx
++
,
static_cast
<
shor
t
>
(
filter_width
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int16_
t
>
(
filter_width
));
kernel
->
setArg
(
idx
++
,
static_cast
<
shor
t
>
(
paddings
[
0
]
/
2
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int16_
t
>
(
paddings
[
0
]
/
2
));
kernel
->
setArg
(
idx
++
,
static_cast
<
shor
t
>
(
paddings
[
1
]
/
2
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int16_
t
>
(
paddings
[
1
]
/
2
));
if
(
stride
!=
1
||
dilations
[
0
]
!=
1
||
dilations
[
1
]
!=
1
)
{
if
(
stride
!=
1
||
dilations
[
0
]
!=
1
||
dilations
[
1
]
!=
1
)
{
kernel
->
setArg
(
idx
++
,
static_cast
<
shor
t
>
(
dilations
[
0
]));
kernel
->
setArg
(
idx
++
,
static_cast
<
int16_
t
>
(
dilations
[
0
]));
kernel
->
setArg
(
idx
++
,
static_cast
<
shor
t
>
(
dilations
[
1
]));
kernel
->
setArg
(
idx
++
,
static_cast
<
int16_
t
>
(
dilations
[
1
]));
}
}
*
prev_input_shape
=
input
->
shape
();
*
prev_input_shape
=
input
->
shape
();
}
}
...
@@ -159,7 +159,8 @@ void DepthwiseConv2dFunctor<DeviceType::OPENCL, T>::operator()(
...
@@ -159,7 +159,8 @@ void DepthwiseConv2dFunctor<DeviceType::OPENCL, T>::operator()(
}
}
std
::
vector
<
size_t
>
output_image_shape
;
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
output_image_shape
);
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
&
output_image_shape
);
output
->
ResizeImage
(
output_shape
,
output_image_shape
);
output
->
ResizeImage
(
output_shape
,
output_image_shape
);
DepthwiseConv2d
(
&
kernel_
,
input
,
filter
,
bias
,
strides_
[
0
],
paddings
.
data
(),
DepthwiseConv2d
(
&
kernel_
,
input
,
filter
,
bias
,
strides_
[
0
],
paddings
.
data
(),
...
...
mace/kernels/opencl/eltwise_opencl.cc
浏览文件 @
16703420
...
@@ -35,7 +35,6 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
...
@@ -35,7 +35,6 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
built_options
.
emplace
(
MakeString
(
"-DELTWISE_TYPE="
,
type_
));
built_options
.
emplace
(
MakeString
(
"-DELTWISE_TYPE="
,
type_
));
if
(
!
coeff_
.
empty
())
built_options
.
emplace
(
"-DCOEFF_SUM"
);
if
(
!
coeff_
.
empty
())
built_options
.
emplace
(
"-DCOEFF_SUM"
);
kernel_
=
runtime
->
BuildKernel
(
"eltwise"
,
kernel_name
,
built_options
);
kernel_
=
runtime
->
BuildKernel
(
"eltwise"
,
kernel_name
,
built_options
);
}
}
if
(
!
IsVecEqual
(
input_shape_
,
input0
->
shape
()))
{
if
(
!
IsVecEqual
(
input_shape_
,
input0
->
shape
()))
{
uint32_t
idx
=
0
;
uint32_t
idx
=
0
;
...
...
mace/kernels/opencl/fully_connected_opencl.cc
浏览文件 @
16703420
...
@@ -16,12 +16,14 @@ void FCWXKernel(cl::Kernel *kernel,
...
@@ -16,12 +16,14 @@ void FCWXKernel(cl::Kernel *kernel,
std
::
vector
<
index_t
>
*
prev_input_shape
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
Tensor
*
output
,
const
ActivationType
activation
,
const
ActivationType
activation
,
std
::
vector
<
uint32_t
>
&
gws
,
std
::
vector
<
uint32_t
>
*
gws
,
std
::
vector
<
uint32_t
>
&
lws
,
std
::
vector
<
uint32_t
>
*
lws
,
const
float
relux_max_limit
,
const
float
relux_max_limit
,
StatsFuture
*
future
)
{
StatsFuture
*
future
)
{
MACE_CHECK
(
input
->
dim
(
3
)
%
4
==
0
)
MACE_CHECK
(
input
->
dim
(
3
)
%
4
==
0
)
<<
"FC width kernel only support input with 4x channel."
;
<<
"FC width kernel only support input with 4x channel."
;
MACE_CHECK_NOTNULL
(
gws
);
MACE_CHECK_NOTNULL
(
lws
);
auto
runtime
=
OpenCLRuntime
::
Global
();
auto
runtime
=
OpenCLRuntime
::
Global
();
if
(
kernel
->
get
()
==
nullptr
)
{
if
(
kernel
->
get
()
==
nullptr
)
{
...
@@ -62,12 +64,11 @@ void FCWXKernel(cl::Kernel *kernel,
...
@@ -62,12 +64,11 @@ void FCWXKernel(cl::Kernel *kernel,
const
index_t
output_blocks
=
RoundUpDiv4
(
output_size
);
const
index_t
output_blocks
=
RoundUpDiv4
(
output_size
);
const
uint32_t
wave_size
=
runtime
->
GetKernelWaveSize
(
*
kernel
);
const
uint32_t
wave_size
=
runtime
->
GetKernelWaveSize
(
*
kernel
);
gws
=
{
4
,
(
wave_size
/
4
),
static_cast
<
uint32_t
>
(
batch
*
output_blocks
)};
*
gws
=
{
4
,
(
wave_size
/
4
),
static_cast
<
uint32_t
>
(
batch
*
output_blocks
)};
const
uint32_t
kwg_size
=
runtime
->
GetKernelMaxWorkGroupSize
(
*
kernel
);
const
uint32_t
kwg_size
=
runtime
->
GetKernelMaxWorkGroupSize
(
*
kernel
);
const
uint32_t
inter_local_blks
=
kwg_size
/
(
gws
[
0
]
*
gws
[
1
]);
const
uint32_t
inter_local_blks
=
kwg_size
/
((
*
gws
)[
0
]
*
(
*
gws
)[
1
]);
lws
=
{
gws
[
0
],
gws
[
1
],
inter_local_blks
};
*
lws
=
{(
*
gws
)[
0
],
(
*
gws
)[
1
],
inter_local_blks
};
}
}
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input
->
shape
()))
{
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input
->
shape
()))
{
const
index_t
batch
=
output
->
dim
(
0
);
const
index_t
batch
=
output
->
dim
(
0
);
...
@@ -80,21 +81,22 @@ void FCWXKernel(cl::Kernel *kernel,
...
@@ -80,21 +81,22 @@ void FCWXKernel(cl::Kernel *kernel,
kernel
->
setArg
(
idx
++
,
*
(
bias
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
bias
->
opencl_image
()));
}
}
kernel
->
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
(
lws
[
0
]
*
lws
[
1
]
*
lws
[
2
]
*
sizeof
(
float
)),
nullptr
);
kernel
->
setArg
(
idx
++
,
((
*
lws
)[
0
]
*
(
*
lws
)[
1
]
*
(
*
lws
)[
2
]
*
sizeof
(
float
)),
nullptr
);
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
input
->
dim
(
1
)));
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
input
->
dim
(
1
)));
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
input
->
dim
(
2
)));
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
input
->
dim
(
2
)));
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
RoundUpDiv4
(
input
->
dim
(
3
))));
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
RoundUpDiv4
(
input
->
dim
(
3
))));
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
output_blocks
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
output_blocks
));
kernel
->
setArg
(
idx
++
,
relux_max_limit
);
kernel
->
setArg
(
idx
++
,
relux_max_limit
);
gws
[
2
]
=
static_cast
<
uint32_t
>
(
batch
*
output_blocks
);
(
*
gws
)
[
2
]
=
static_cast
<
uint32_t
>
(
batch
*
output_blocks
);
*
prev_input_shape
=
input
->
shape
();
*
prev_input_shape
=
input
->
shape
();
}
}
cl
::
Event
event
;
cl
::
Event
event
;
cl_int
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
cl_int
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
*
kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
gws
[
0
],
gws
[
1
],
gws
[
2
]),
*
kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
(
*
gws
)[
0
],
(
*
gws
)[
1
],
(
*
gws
)
[
2
]),
cl
::
NDRange
(
lws
[
0
],
lws
[
1
],
lws
[
2
]),
nullptr
,
&
event
);
cl
::
NDRange
(
(
*
lws
)[
0
],
(
*
lws
)[
1
],
(
*
lws
)
[
2
]),
nullptr
,
&
event
);
MACE_CHECK
(
error
==
CL_SUCCESS
)
<<
"Error code: "
<<
error
;
MACE_CHECK
(
error
==
CL_SUCCESS
)
<<
"Error code: "
<<
error
;
if
(
future
!=
nullptr
)
{
if
(
future
!=
nullptr
)
{
...
@@ -105,7 +107,6 @@ void FCWXKernel(cl::Kernel *kernel,
...
@@ -105,7 +107,6 @@ void FCWXKernel(cl::Kernel *kernel,
}
}
};
};
}
}
}
}
template
<
typename
T
>
template
<
typename
T
>
...
@@ -116,10 +117,12 @@ void FCWTXKernel(cl::Kernel *kernel,
...
@@ -116,10 +117,12 @@ void FCWTXKernel(cl::Kernel *kernel,
std
::
vector
<
index_t
>
*
prev_input_shape
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
Tensor
*
output
,
const
ActivationType
activation
,
const
ActivationType
activation
,
std
::
vector
<
uint32_t
>
&
gws
,
std
::
vector
<
uint32_t
>
*
gws
,
std
::
vector
<
uint32_t
>
&
lws
,
std
::
vector
<
uint32_t
>
*
lws
,
const
float
relux_max_limit
,
const
float
relux_max_limit
,
StatsFuture
*
future
)
{
StatsFuture
*
future
)
{
MACE_CHECK_NOTNULL
(
gws
);
MACE_CHECK_NOTNULL
(
lws
);
if
(
kernel
->
get
()
==
nullptr
)
{
if
(
kernel
->
get
()
==
nullptr
)
{
auto
runtime
=
OpenCLRuntime
::
Global
();
auto
runtime
=
OpenCLRuntime
::
Global
();
std
::
set
<
std
::
string
>
built_options
;
std
::
set
<
std
::
string
>
built_options
;
...
@@ -152,7 +155,7 @@ void FCWTXKernel(cl::Kernel *kernel,
...
@@ -152,7 +155,7 @@ void FCWTXKernel(cl::Kernel *kernel,
*
kernel
=
*
kernel
=
runtime
->
BuildKernel
(
"fully_connected"
,
kernel_name
,
built_options
);
runtime
->
BuildKernel
(
"fully_connected"
,
kernel_name
,
built_options
);
lws
=
{
16
,
64
,
1
};
*
lws
=
{
16
,
64
,
1
};
}
}
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input
->
shape
()))
{
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
uint32_t
idx
=
0
;
...
@@ -171,18 +174,16 @@ void FCWTXKernel(cl::Kernel *kernel,
...
@@ -171,18 +174,16 @@ void FCWTXKernel(cl::Kernel *kernel,
const
index_t
batch
=
output
->
dim
(
0
);
const
index_t
batch
=
output
->
dim
(
0
);
const
index_t
output_blocks
=
RoundUpDiv4
(
output
->
dim
(
3
));
const
index_t
output_blocks
=
RoundUpDiv4
(
output
->
dim
(
3
));
gws
=
{
*
gws
=
{
static_cast
<
uint32_t
>
(
batch
),
static_cast
<
uint32_t
>
(
output_blocks
),
static_cast
<
uint32_t
>
(
batch
),
static_cast
<
uint32_t
>
(
output_blocks
),
};
};
*
prev_input_shape
=
input
->
shape
();
*
prev_input_shape
=
input
->
shape
();
}
}
std
::
stringstream
ss
;
std
::
stringstream
ss
;
ss
<<
"fc_opencl_kernel_"
<<
output
->
dim
(
0
)
<<
"_"
<<
output
->
dim
(
1
)
<<
"_"
ss
<<
"fc_opencl_kernel_"
<<
output
->
dim
(
0
)
<<
"_"
<<
output
->
dim
(
1
)
<<
"_"
<<
output
->
dim
(
2
)
<<
"_"
<<
output
->
dim
(
3
);
<<
output
->
dim
(
2
)
<<
"_"
<<
output
->
dim
(
3
);
TuningOrRun2DKernel
(
*
kernel
,
ss
.
str
(),
gws
.
data
(),
lws
,
future
);
TuningOrRun2DKernel
(
*
kernel
,
ss
.
str
(),
gws
->
data
(),
*
lws
,
future
);
}
}
template
<
typename
T
>
template
<
typename
T
>
...
@@ -194,17 +195,18 @@ void FullyConnectedFunctor<DeviceType::OPENCL, T>::operator()(
...
@@ -194,17 +195,18 @@ void FullyConnectedFunctor<DeviceType::OPENCL, T>::operator()(
StatsFuture
*
future
)
{
StatsFuture
*
future
)
{
std
::
vector
<
index_t
>
output_shape
=
{
input
->
dim
(
0
),
1
,
1
,
weight
->
dim
(
0
)};
std
::
vector
<
index_t
>
output_shape
=
{
input
->
dim
(
0
),
1
,
1
,
weight
->
dim
(
0
)};
std
::
vector
<
size_t
>
output_image_shape
;
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
output_image_shape
);
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
&
output_image_shape
);
output
->
ResizeImage
(
output_shape
,
output_image_shape
);
output
->
ResizeImage
(
output_shape
,
output_image_shape
);
if
(
weight_type_
==
BufferType
::
WEIGHT_HEIGHT
)
{
if
(
weight_type_
==
BufferType
::
WEIGHT_HEIGHT
)
{
FCWTXKernel
<
T
>
(
&
kernel_
,
input
,
weight
,
bias
,
&
input_shape_
,
output
,
FCWTXKernel
<
T
>
(
&
kernel_
,
input
,
weight
,
bias
,
&
input_shape_
,
output
,
activation_
,
gws_
,
lws_
,
relux_max_limit_
,
future
);
activation_
,
&
gws_
,
&
lws_
,
relux_max_limit_
,
future
);
}
else
{
}
else
{
FCWXKernel
<
T
>
(
&
kernel_
,
input
,
weight
,
bias
,
&
input_shape_
,
output
,
FCWXKernel
<
T
>
(
&
kernel_
,
input
,
weight
,
bias
,
&
input_shape_
,
output
,
activation_
,
gws_
,
lws_
,
relux_max_limit_
,
future
);
activation_
,
&
gws_
,
&
lws_
,
relux_max_limit_
,
future
);
}
}
}
;
}
template
struct
FullyConnectedFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
FullyConnectedFunctor
<
DeviceType
::
OPENCL
,
float
>;
...
...
mace/kernels/opencl/helper.cc
浏览文件 @
16703420
...
@@ -3,6 +3,11 @@
...
@@ -3,6 +3,11 @@
//
//
#include "mace/kernels/opencl/helper.h"
#include "mace/kernels/opencl/helper.h"
#include <algorithm>
#include <string>
#include <vector>
#include "mace/utils/tuner.h"
#include "mace/utils/tuner.h"
#include "mace/utils/utils.h"
#include "mace/utils/utils.h"
...
@@ -11,91 +16,92 @@ namespace kernels {
...
@@ -11,91 +16,92 @@ namespace kernels {
// [(C + 3) / 4 * W, N * H]
// [(C + 3) / 4 * W, N * H]
void
CalInOutputImageShape
(
const
std
::
vector
<
index_t
>
&
shape
,
/* NHWC */
void
CalInOutputImageShape
(
const
std
::
vector
<
index_t
>
&
shape
,
/* NHWC */
std
::
vector
<
size_t
>
&
image_shape
)
{
std
::
vector
<
size_t
>
*
image_shape
)
{
MACE_CHECK
(
shape
.
size
()
==
4
);
MACE_CHECK
(
shape
.
size
()
==
4
);
image_shape
.
resize
(
2
);
image_shape
->
resize
(
2
);
image_shape
[
0
]
=
RoundUpDiv4
(
shape
[
3
])
*
shape
[
2
];
(
*
image_shape
)
[
0
]
=
RoundUpDiv4
(
shape
[
3
])
*
shape
[
2
];
image_shape
[
1
]
=
shape
[
0
]
*
shape
[
1
];
(
*
image_shape
)
[
1
]
=
shape
[
0
]
*
shape
[
1
];
}
}
// [RoundUp<4>(Ic) * H * W, (Oc + 3) / 4]
// [RoundUp<4>(Ic) * H * W, (Oc + 3) / 4]
void
CalConv2dFilterImageShape
(
const
std
::
vector
<
index_t
>
&
shape
,
/* HWOI */
void
CalConv2dFilterImageShape
(
const
std
::
vector
<
index_t
>
&
shape
,
/* HWOI */
std
::
vector
<
size_t
>
&
image_shape
)
{
std
::
vector
<
size_t
>
*
image_shape
)
{
MACE_CHECK
(
shape
.
size
()
==
4
);
MACE_CHECK
(
shape
.
size
()
==
4
);
image_shape
.
resize
(
2
);
image_shape
->
resize
(
2
);
image_shape
[
0
]
=
shape
[
0
]
*
shape
[
1
]
*
RoundUp
<
index_t
>
(
shape
[
3
],
4
);
(
*
image_shape
)
[
0
]
=
shape
[
0
]
*
shape
[
1
]
*
RoundUp
<
index_t
>
(
shape
[
3
],
4
);
image_shape
[
1
]
=
RoundUpDiv4
(
shape
[
2
]);
(
*
image_shape
)
[
1
]
=
RoundUpDiv4
(
shape
[
2
]);
}
}
// [H * W * M, (Ic + 3) / 4]
// [H * W * M, (Ic + 3) / 4]
void
CalDepthwiseConv2dFilterImageShape
(
void
CalDepthwiseConv2dFilterImageShape
(
const
std
::
vector
<
index_t
>
&
shape
,
/* HWIM */
const
std
::
vector
<
index_t
>
&
shape
,
/* HWIM */
std
::
vector
<
size_t
>
&
image_shape
)
{
std
::
vector
<
size_t
>
*
image_shape
)
{
MACE_CHECK
(
shape
.
size
()
==
4
);
MACE_CHECK
(
shape
.
size
()
==
4
);
image_shape
.
resize
(
2
);
image_shape
->
resize
(
2
);
image_shape
[
0
]
=
shape
[
0
]
*
shape
[
1
]
*
shape
[
3
];
(
*
image_shape
)
[
0
]
=
shape
[
0
]
*
shape
[
1
]
*
shape
[
3
];
image_shape
[
1
]
=
RoundUpDiv4
(
shape
[
2
]);
(
*
image_shape
)
[
1
]
=
RoundUpDiv4
(
shape
[
2
]);
}
}
// [(size + 3) / 4, 1]
// [(size + 3) / 4, 1]
void
CalArgImageShape
(
const
std
::
vector
<
index_t
>
&
shape
,
void
CalArgImageShape
(
const
std
::
vector
<
index_t
>
&
shape
,
std
::
vector
<
size_t
>
&
image_shape
)
{
std
::
vector
<
size_t
>
*
image_shape
)
{
MACE_CHECK
(
shape
.
size
()
==
1
);
MACE_CHECK
(
shape
.
size
()
==
1
);
image_shape
.
resize
(
2
);
image_shape
->
resize
(
2
);
image_shape
[
0
]
=
RoundUpDiv4
(
shape
[
0
]);
(
*
image_shape
)
[
0
]
=
RoundUpDiv4
(
shape
[
0
]);
image_shape
[
1
]
=
1
;
(
*
image_shape
)
[
1
]
=
1
;
}
}
// Only support 3x3 now
// Only support 3x3 now
// [ (Ic + 3) / 4, 16 * Oc]
// [ (Ic + 3) / 4, 16 * Oc]
void
CalWinogradFilterImageShape
(
void
CalWinogradFilterImageShape
(
const
std
::
vector
<
index_t
>
&
shape
,
/* Oc, Ic, H, W*/
const
std
::
vector
<
index_t
>
&
shape
,
/* Oc, Ic, H, W*/
std
::
vector
<
size_t
>
&
image_shape
)
{
std
::
vector
<
size_t
>
*
image_shape
)
{
MACE_CHECK
(
shape
.
size
()
==
4
);
MACE_CHECK
(
shape
.
size
()
==
4
);
image_shape
.
resize
(
2
);
image_shape
->
resize
(
2
);
image_shape
[
0
]
=
RoundUpDiv4
(
shape
[
1
]);
(
*
image_shape
)
[
0
]
=
RoundUpDiv4
(
shape
[
1
]);
image_shape
[
1
]
=
(
shape
[
0
]
<<
4
);
(
*
image_shape
)
[
1
]
=
(
shape
[
0
]
<<
4
);
}
}
// [W * C, N * RoundUp<4>(H)]
// [W * C, N * RoundUp<4>(H)]
void
CalInOutHeightImageShape
(
const
std
::
vector
<
index_t
>
&
shape
,
/* NHWC */
void
CalInOutHeightImageShape
(
const
std
::
vector
<
index_t
>
&
shape
,
/* NHWC */
std
::
vector
<
size_t
>
&
image_shape
)
{
std
::
vector
<
size_t
>
*
image_shape
)
{
MACE_CHECK
(
shape
.
size
()
==
4
);
MACE_CHECK
(
shape
.
size
()
==
4
);
image_shape
.
resize
(
2
);
image_shape
->
resize
(
2
);
image_shape
[
0
]
=
shape
[
2
]
*
shape
[
3
];
(
*
image_shape
)
[
0
]
=
shape
[
2
]
*
shape
[
3
];
image_shape
[
1
]
=
shape
[
0
]
*
RoundUpDiv4
(
shape
[
1
]);
(
*
image_shape
)
[
1
]
=
shape
[
0
]
*
RoundUpDiv4
(
shape
[
1
]);
}
}
// [RoundUp<4>(W) * C, N * H]
// [RoundUp<4>(W) * C, N * H]
void
CalInOutWidthImageShape
(
const
std
::
vector
<
index_t
>
&
shape
,
/* NHWC */
void
CalInOutWidthImageShape
(
const
std
::
vector
<
index_t
>
&
shape
,
/* NHWC */
std
::
vector
<
size_t
>
&
image_shape
)
{
std
::
vector
<
size_t
>
*
image_shape
)
{
MACE_CHECK
(
shape
.
size
()
==
4
);
MACE_CHECK
(
shape
.
size
()
==
4
);
image_shape
.
resize
(
2
);
image_shape
->
resize
(
2
);
image_shape
[
0
]
=
RoundUpDiv4
(
shape
[
2
])
*
shape
[
3
];
(
*
image_shape
)
[
0
]
=
RoundUpDiv4
(
shape
[
2
])
*
shape
[
3
];
image_shape
[
1
]
=
shape
[
0
]
*
shape
[
1
];
(
*
image_shape
)
[
1
]
=
shape
[
0
]
*
shape
[
1
];
}
}
// [W, (H + 3) / 4]
// [W, (H + 3) / 4]
void
CalWeightHeightImageShape
(
const
std
::
vector
<
index_t
>
&
shape
,
/* HW */
void
CalWeightHeightImageShape
(
const
std
::
vector
<
index_t
>
&
shape
,
/* HW */
std
::
vector
<
size_t
>
&
image_shape
)
{
std
::
vector
<
size_t
>
*
image_shape
)
{
MACE_CHECK
(
shape
.
size
()
==
2
);
MACE_CHECK
(
shape
.
size
()
==
2
);
image_shape
.
resize
(
2
);
image_shape
->
resize
(
2
);
image_shape
[
0
]
=
shape
[
1
];
(
*
image_shape
)
[
0
]
=
shape
[
1
];
image_shape
[
1
]
=
RoundUpDiv4
(
shape
[
0
]);
(
*
image_shape
)
[
1
]
=
RoundUpDiv4
(
shape
[
0
]);
}
}
// [(W + 3) / 4, H]
// [(W + 3) / 4, H]
void
CalWeightWidthImageShape
(
const
std
::
vector
<
index_t
>
&
shape
,
/* HW */
void
CalWeightWidthImageShape
(
const
std
::
vector
<
index_t
>
&
shape
,
/* HW */
std
::
vector
<
size_t
>
&
image_shape
)
{
std
::
vector
<
size_t
>
*
image_shape
)
{
MACE_CHECK
(
shape
.
size
()
==
2
);
MACE_CHECK
(
shape
.
size
()
==
2
);
image_shape
.
resize
(
2
);
image_shape
->
resize
(
2
);
image_shape
[
0
]
=
RoundUpDiv4
(
shape
[
1
]);
(
*
image_shape
)
[
0
]
=
RoundUpDiv4
(
shape
[
1
]);
image_shape
[
1
]
=
shape
[
0
];
(
*
image_shape
)
[
1
]
=
shape
[
0
];
}
}
void
CalImage2DShape
(
const
std
::
vector
<
index_t
>
&
shape
,
/* NHWC */
void
CalImage2DShape
(
const
std
::
vector
<
index_t
>
&
shape
,
/* NHWC */
const
BufferType
type
,
const
BufferType
type
,
std
::
vector
<
size_t
>
&
image_shape
)
{
std
::
vector
<
size_t
>
*
image_shape
)
{
MACE_CHECK_NOTNULL
(
image_shape
);
switch
(
type
)
{
switch
(
type
)
{
case
CONV2D_FILTER
:
case
CONV2D_FILTER
:
CalConv2dFilterImageShape
(
shape
,
image_shape
);
CalConv2dFilterImageShape
(
shape
,
image_shape
);
...
@@ -188,7 +194,7 @@ std::string DtToUpstreamCLCMDDt(const DataType dt) {
...
@@ -188,7 +194,7 @@ std::string DtToUpstreamCLCMDDt(const DataType dt) {
}
}
}
}
void
TuningOrRun3DKernel
(
cl
::
Kernel
&
kernel
,
void
TuningOrRun3DKernel
(
c
onst
c
l
::
Kernel
&
kernel
,
const
std
::
string
tuning_key
,
const
std
::
string
tuning_key
,
const
uint32_t
*
gws
,
const
uint32_t
*
gws
,
const
std
::
vector
<
uint32_t
>
&
lws
,
const
std
::
vector
<
uint32_t
>
&
lws
,
...
@@ -202,7 +208,7 @@ void TuningOrRun3DKernel(cl::Kernel &kernel,
...
@@ -202,7 +208,7 @@ void TuningOrRun3DKernel(cl::Kernel &kernel,
local_ws
[
2
]
=
local_ws
[
2
]
=
std
::
min
<
uint32_t
>
(
gws
[
2
],
kwg_size
/
(
local_ws
[
0
]
*
local_ws
[
1
]));
std
::
min
<
uint32_t
>
(
gws
[
2
],
kwg_size
/
(
local_ws
[
0
]
*
local_ws
[
1
]));
return
{
return
{
// TODO tuning these magic numbers
// TODO
(heliangliang):
tuning these magic numbers
{
local_ws
[
0
],
local_ws
[
1
],
local_ws
[
2
],
1
},
{
local_ws
[
0
],
local_ws
[
1
],
local_ws
[
2
],
1
},
{
kwg_size
/
16
,
4
,
4
,
1
},
{
kwg_size
/
16
,
4
,
4
,
1
},
{
kwg_size
/
32
,
4
,
8
,
1
},
{
kwg_size
/
32
,
4
,
8
,
1
},
...
@@ -291,7 +297,7 @@ void TuningOrRun3DKernel(cl::Kernel &kernel,
...
@@ -291,7 +297,7 @@ void TuningOrRun3DKernel(cl::Kernel &kernel,
}
}
}
}
void
TuningOrRun2DKernel
(
cl
::
Kernel
&
kernel
,
void
TuningOrRun2DKernel
(
c
onst
c
l
::
Kernel
&
kernel
,
const
std
::
string
tuning_key
,
const
std
::
string
tuning_key
,
const
uint32_t
*
gws
,
const
uint32_t
*
gws
,
const
std
::
vector
<
uint32_t
>
&
lws
,
const
std
::
vector
<
uint32_t
>
&
lws
,
...
...
mace/kernels/opencl/helper.h
浏览文件 @
16703420
...
@@ -5,6 +5,9 @@
...
@@ -5,6 +5,9 @@
#ifndef MACE_KERNELS_OPENCL_HELPER_H_
#ifndef MACE_KERNELS_OPENCL_HELPER_H_
#define MACE_KERNELS_OPENCL_HELPER_H_
#define MACE_KERNELS_OPENCL_HELPER_H_
#include <string>
#include <vector>
#include "mace/core/future.h"
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
...
@@ -30,7 +33,7 @@ enum BufferType {
...
@@ -30,7 +33,7 @@ enum BufferType {
void
CalImage2DShape
(
const
std
::
vector
<
index_t
>
&
shape
,
/* NHWC */
void
CalImage2DShape
(
const
std
::
vector
<
index_t
>
&
shape
,
/* NHWC */
const
BufferType
type
,
const
BufferType
type
,
std
::
vector
<
size_t
>
&
image_shape
);
std
::
vector
<
size_t
>
*
image_shape
);
std
::
vector
<
index_t
>
CalWinogradShape
(
const
std
::
vector
<
index_t
>
&
shape
,
std
::
vector
<
index_t
>
CalWinogradShape
(
const
std
::
vector
<
index_t
>
&
shape
,
const
BufferType
type
);
const
BufferType
type
);
...
@@ -43,13 +46,13 @@ std::string DtToCLDt(const DataType dt);
...
@@ -43,13 +46,13 @@ std::string DtToCLDt(const DataType dt);
std
::
string
DtToUpstreamCLDt
(
const
DataType
dt
);
std
::
string
DtToUpstreamCLDt
(
const
DataType
dt
);
void
TuningOrRun3DKernel
(
cl
::
Kernel
&
kernel
,
void
TuningOrRun3DKernel
(
c
onst
c
l
::
Kernel
&
kernel
,
const
std
::
string
tuning_key
,
const
std
::
string
tuning_key
,
const
uint32_t
*
gws
,
const
uint32_t
*
gws
,
const
std
::
vector
<
uint32_t
>
&
lws
,
const
std
::
vector
<
uint32_t
>
&
lws
,
StatsFuture
*
future
);
StatsFuture
*
future
);
void
TuningOrRun2DKernel
(
cl
::
Kernel
&
kernel
,
void
TuningOrRun2DKernel
(
c
onst
c
l
::
Kernel
&
kernel
,
const
std
::
string
tuning_key
,
const
std
::
string
tuning_key
,
const
uint32_t
*
gws
,
const
uint32_t
*
gws
,
const
std
::
vector
<
uint32_t
>
&
lws
,
const
std
::
vector
<
uint32_t
>
&
lws
,
...
@@ -78,7 +81,6 @@ bool IsVecEqual(const std::vector<T> &input0,
...
@@ -78,7 +81,6 @@ bool IsVecEqual(const std::vector<T> &input0,
(
std
::
equal
(
input0
.
begin
(),
input0
.
end
(),
input1
.
begin
())));
(
std
::
equal
(
input0
.
begin
(),
input0
.
end
(),
input1
.
begin
())));
}
}
namespace
{
template
<
typename
T
>
template
<
typename
T
>
void
AppendToStream
(
std
::
stringstream
*
ss
,
const
std
::
string
&
delimiter
,
T
v
)
{
void
AppendToStream
(
std
::
stringstream
*
ss
,
const
std
::
string
&
delimiter
,
T
v
)
{
(
*
ss
)
<<
v
;
(
*
ss
)
<<
v
;
...
@@ -92,7 +94,6 @@ void AppendToStream(std::stringstream *ss,
...
@@ -92,7 +94,6 @@ void AppendToStream(std::stringstream *ss,
(
*
ss
)
<<
first
<<
delimiter
;
(
*
ss
)
<<
first
<<
delimiter
;
AppendToStream
(
ss
,
delimiter
,
args
...);
AppendToStream
(
ss
,
delimiter
,
args
...);
}
}
}
// namespace
template
<
typename
...
Args
>
template
<
typename
...
Args
>
std
::
string
Concat
(
Args
...
args
)
{
std
::
string
Concat
(
Args
...
args
)
{
...
...
mace/kernels/opencl/matmul.cc
浏览文件 @
16703420
...
@@ -17,7 +17,7 @@ void MatMulFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *A,
...
@@ -17,7 +17,7 @@ void MatMulFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *A,
StatsFuture
*
future
)
{
StatsFuture
*
future
)
{
std
::
vector
<
index_t
>
c_shape
=
{
A
->
dim
(
0
),
A
->
dim
(
1
),
B
->
dim
(
2
),
1
};
std
::
vector
<
index_t
>
c_shape
=
{
A
->
dim
(
0
),
A
->
dim
(
1
),
B
->
dim
(
2
),
1
};
std
::
vector
<
size_t
>
c_image_shape
;
std
::
vector
<
size_t
>
c_image_shape
;
CalImage2DShape
(
c_shape
,
BufferType
::
IN_OUT_HEIGHT
,
c_image_shape
);
CalImage2DShape
(
c_shape
,
BufferType
::
IN_OUT_HEIGHT
,
&
c_image_shape
);
C
->
ResizeImage
(
c_shape
,
c_image_shape
);
C
->
ResizeImage
(
c_shape
,
c_image_shape
);
const
index_t
batch
=
C
->
dim
(
0
);
const
index_t
batch
=
C
->
dim
(
0
);
...
@@ -56,7 +56,7 @@ void MatMulFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *A,
...
@@ -56,7 +56,7 @@ void MatMulFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *A,
ss
<<
"matmul_opencl_kernel_"
<<
C
->
dim
(
0
)
<<
"_"
<<
C
->
dim
(
1
)
<<
"_"
ss
<<
"matmul_opencl_kernel_"
<<
C
->
dim
(
0
)
<<
"_"
<<
C
->
dim
(
1
)
<<
"_"
<<
C
->
dim
(
2
)
<<
"_"
<<
C
->
dim
(
3
);
<<
C
->
dim
(
2
)
<<
"_"
<<
C
->
dim
(
3
);
TuningOrRun2DKernel
(
kernel_
,
ss
.
str
(),
gws
,
lws
,
future
);
TuningOrRun2DKernel
(
kernel_
,
ss
.
str
(),
gws
,
lws
,
future
);
}
;
}
template
struct
MatMulFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
MatMulFunctor
<
DeviceType
::
OPENCL
,
float
>;
...
...
mace/kernels/opencl/pooling_opencl.cc
浏览文件 @
16703420
...
@@ -36,12 +36,11 @@ void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
...
@@ -36,12 +36,11 @@ void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
built_options
.
emplace
(
"-DPOOL_AVG"
);
built_options
.
emplace
(
"-DPOOL_AVG"
);
}
}
kernel_
=
runtime
->
BuildKernel
(
"pooling"
,
kernel_name
,
built_options
);
kernel_
=
runtime
->
BuildKernel
(
"pooling"
,
kernel_name
,
built_options
);
}
}
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
std
::
vector
<
index_t
>
output_shape
(
4
);
std
::
vector
<
index_t
>
output_shape
(
4
);
std
::
vector
<
index_t
>
filter_shape
=
{
kernels_
[
0
],
kernels_
[
1
],
input
->
dim
(
3
),
std
::
vector
<
index_t
>
filter_shape
=
{
kernels_
[
0
],
kernels_
[
1
],
input
->
dim
(
3
)};
input
->
dim
(
3
)
,
input
->
dim
(
3
)
};
std
::
vector
<
int
>
paddings
(
2
);
std
::
vector
<
int
>
paddings
(
2
);
if
(
paddings_
.
empty
())
{
if
(
paddings_
.
empty
())
{
...
@@ -50,12 +49,14 @@ void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
...
@@ -50,12 +49,14 @@ void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
padding_type_
,
output_shape
.
data
(),
paddings
.
data
());
padding_type_
,
output_shape
.
data
(),
paddings
.
data
());
}
else
{
}
else
{
paddings
=
paddings_
;
paddings
=
paddings_
;
CalcOutputSize
(
input
->
shape
().
data
(),
filter_shape
.
data
(),
paddings_
.
data
(),
CalcOutputSize
(
input
->
shape
().
data
(),
filter_shape
.
data
(),
dilations_
,
strides_
,
RoundType
::
CEIL
,
output_shape
.
data
());
paddings_
.
data
(),
dilations_
,
strides_
,
RoundType
::
CEIL
,
output_shape
.
data
());
}
}
std
::
vector
<
size_t
>
output_image_shape
;
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
output_image_shape
);
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
&
output_image_shape
);
output
->
ResizeImage
(
output_shape
,
output_image_shape
);
output
->
ResizeImage
(
output_shape
,
output_image_shape
);
uint32_t
idx
=
0
;
uint32_t
idx
=
0
;
...
...
mace/kernels/opencl/resize_bilinear_opencl.cc
浏览文件 @
16703420
...
@@ -34,7 +34,6 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
...
@@ -34,7 +34,6 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
kernel_
=
kernel_
=
runtime
->
BuildKernel
(
"resize_bilinear"
,
kernel_name
,
built_options
);
runtime
->
BuildKernel
(
"resize_bilinear"
,
kernel_name
,
built_options
);
}
}
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
MACE_CHECK
(
out_height
>
0
&&
out_width
>
0
);
MACE_CHECK
(
out_height
>
0
&&
out_width
>
0
);
...
@@ -42,7 +41,7 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
...
@@ -42,7 +41,7 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
std
::
vector
<
size_t
>
output_image_shape
;
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
output_image_shape
);
&
output_image_shape
);
output
->
ResizeImage
(
output_shape
,
output_image_shape
);
output
->
ResizeImage
(
output_shape
,
output_image_shape
);
float
height_scale
=
float
height_scale
=
...
@@ -60,7 +59,6 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
...
@@ -60,7 +59,6 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
out_height
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
out_height
));
input_shape_
=
input
->
shape
();
input_shape_
=
input
->
shape
();
}
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
...
...
mace/kernels/opencl/slice.cc
浏览文件 @
16703420
...
@@ -24,7 +24,7 @@ void SliceFunctor<DeviceType::OPENCL, T>::operator()(
...
@@ -24,7 +24,7 @@ void SliceFunctor<DeviceType::OPENCL, T>::operator()(
input
->
dim
(
2
),
output_channels
});
input
->
dim
(
2
),
output_channels
});
std
::
vector
<
size_t
>
image_shape
;
std
::
vector
<
size_t
>
image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
image_shape
);
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
&
image_shape
);
for
(
size_t
i
=
0
;
i
<
outputs_count
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
outputs_count
;
++
i
)
{
output_list
[
i
]
->
ResizeImage
(
output_shape
,
image_shape
);
output_list
[
i
]
->
ResizeImage
(
output_shape
,
image_shape
);
}
}
...
...
mace/kernels/opencl/softmax_opencl.cc
浏览文件 @
16703420
...
@@ -33,7 +33,6 @@ void SoftmaxFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *logits,
...
@@ -33,7 +33,6 @@ void SoftmaxFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *logits,
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
kernel_
=
runtime
->
BuildKernel
(
"softmax"
,
kernel_name
,
built_options
);
kernel_
=
runtime
->
BuildKernel
(
"softmax"
,
kernel_name
,
built_options
);
}
}
if
(
!
IsVecEqual
(
input_shape_
,
logits
->
shape
()))
{
if
(
!
IsVecEqual
(
input_shape_
,
logits
->
shape
()))
{
uint32_t
idx
=
0
;
uint32_t
idx
=
0
;
...
...
mace/kernels/opencl/space_to_batch_opencl.cc
浏览文件 @
16703420
...
@@ -22,7 +22,8 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, T>::operator()(
...
@@ -22,7 +22,8 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, T>::operator()(
StatsFuture
*
future
)
{
StatsFuture
*
future
)
{
const
char
*
kernel_name
=
nullptr
;
const
char
*
kernel_name
=
nullptr
;
std
::
vector
<
size_t
>
output_image_shape
;
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
output_image_shape
);
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
&
output_image_shape
);
if
(
b2s_
)
{
if
(
b2s_
)
{
space_tensor
->
ResizeImage
(
output_shape
,
output_image_shape
);
space_tensor
->
ResizeImage
(
output_shape
,
output_image_shape
);
kernel_name
=
"batch_to_space"
;
kernel_name
=
"batch_to_space"
;
...
@@ -42,7 +43,6 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, T>::operator()(
...
@@ -42,7 +43,6 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, T>::operator()(
DtToCLCMDDt
(
DataTypeToEnum
<
T
>::
value
));
DtToCLCMDDt
(
DataTypeToEnum
<
T
>::
value
));
kernel_
=
kernel_
=
runtime
->
BuildKernel
(
"space_to_batch"
,
kernel_name
,
built_options
);
runtime
->
BuildKernel
(
"space_to_batch"
,
kernel_name
,
built_options
);
}
}
if
(
!
IsVecEqual
(
space_shape_
,
space_tensor
->
shape
()))
{
if
(
!
IsVecEqual
(
space_shape_
,
space_tensor
->
shape
()))
{
uint32_t
idx
=
0
;
uint32_t
idx
=
0
;
...
...
mace/kernels/opencl/winograd_transform.cc
浏览文件 @
16703420
...
@@ -27,7 +27,6 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(
...
@@ -27,7 +27,6 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(
auto
runtime
=
OpenCLRuntime
::
Global
();
auto
runtime
=
OpenCLRuntime
::
Global
();
kernel_
=
runtime
->
BuildKernel
(
"winograd_transform"
,
obfuscated_kernel_name
,
kernel_
=
runtime
->
BuildKernel
(
"winograd_transform"
,
obfuscated_kernel_name
,
built_options
);
built_options
);
}
}
std
::
vector
<
index_t
>
output_shape
(
4
);
std
::
vector
<
index_t
>
output_shape
(
4
);
std
::
vector
<
index_t
>
filter_shape
=
{
3
,
3
,
input_tensor
->
dim
(
3
),
1
};
std
::
vector
<
index_t
>
filter_shape
=
{
3
,
3
,
input_tensor
->
dim
(
3
),
1
};
...
@@ -49,7 +48,7 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(
...
@@ -49,7 +48,7 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(
if
(
!
IsVecEqual
(
input_shape_
,
input_tensor
->
shape
()))
{
if
(
!
IsVecEqual
(
input_shape_
,
input_tensor
->
shape
()))
{
output_shape
=
{
16
,
input_tensor
->
dim
(
3
),
out_width
,
1
};
output_shape
=
{
16
,
input_tensor
->
dim
(
3
),
out_width
,
1
};
std
::
vector
<
size_t
>
image_shape
;
std
::
vector
<
size_t
>
image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_HEIGHT
,
image_shape
);
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_HEIGHT
,
&
image_shape
);
output_tensor
->
ResizeImage
(
output_shape
,
image_shape
);
output_tensor
->
ResizeImage
(
output_shape
,
image_shape
);
uint32_t
idx
=
0
;
uint32_t
idx
=
0
;
...
@@ -83,7 +82,6 @@ void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(
...
@@ -83,7 +82,6 @@ void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(
const
Tensor
*
bias
,
const
Tensor
*
bias
,
Tensor
*
output_tensor
,
Tensor
*
output_tensor
,
StatsFuture
*
future
)
{
StatsFuture
*
future
)
{
if
(
kernel_
.
get
()
==
nullptr
)
{
if
(
kernel_
.
get
()
==
nullptr
)
{
std
::
string
obfuscated_kernel_name
=
std
::
string
obfuscated_kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
"winograd_inverse_transform_2x2"
);
MACE_OBFUSCATE_SYMBOL
(
"winograd_inverse_transform_2x2"
);
...
@@ -125,7 +123,7 @@ void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(
...
@@ -125,7 +123,7 @@ void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(
std
::
vector
<
index_t
>
output_shape
=
{
batch_
,
height_
,
width_
,
std
::
vector
<
index_t
>
output_shape
=
{
batch_
,
height_
,
width_
,
input_tensor
->
dim
(
1
)};
input_tensor
->
dim
(
1
)};
std
::
vector
<
size_t
>
image_shape
;
std
::
vector
<
size_t
>
image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
image_shape
);
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
&
image_shape
);
output_tensor
->
ResizeImage
(
output_shape
,
image_shape
);
output_tensor
->
ResizeImage
(
output_shape
,
image_shape
);
const
uint32_t
round_h
=
(
height_
+
1
)
/
2
;
const
uint32_t
round_h
=
(
height_
+
1
)
/
2
;
...
...
mace/kernels/pooling.h
浏览文件 @
16703420
...
@@ -2,10 +2,13 @@
...
@@ -2,10 +2,13 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
// Copyright (c) 2017 XiaoMi All rights reserved.
//
//
#ifndef MACE_KERNELS_POOLING_H
#ifndef MACE_KERNELS_POOLING_H
_
#define MACE_KERNELS_POOLING_H
#define MACE_KERNELS_POOLING_H
_
#include <algorithm>
#include <limits>
#include <limits>
#include <vector>
#include "mace/core/future.h"
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/core/tensor.h"
...
@@ -188,4 +191,4 @@ struct PoolingFunctor<DeviceType::OPENCL, T> : PoolingFunctorBase {
...
@@ -188,4 +191,4 @@ struct PoolingFunctor<DeviceType::OPENCL, T> : PoolingFunctorBase {
}
// namespace kernels
}
// namespace kernels
}
// namespace mace
}
// namespace mace
#endif // MACE_KERNELS_POOLING_H
#endif // MACE_KERNELS_POOLING_H
_
mace/kernels/reshape.h
浏览文件 @
16703420
...
@@ -4,6 +4,8 @@
...
@@ -4,6 +4,8 @@
#ifndef MACE_KERNELS_RESHAPE_H_
#ifndef MACE_KERNELS_RESHAPE_H_
#define MACE_KERNELS_RESHAPE_H_
#define MACE_KERNELS_RESHAPE_H_
#include <vector>
#include "mace/core/future.h"
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/core/tensor.h"
...
@@ -20,7 +22,7 @@ struct ReshapeFunctor {
...
@@ -20,7 +22,7 @@ struct ReshapeFunctor {
Tensor
*
output
,
Tensor
*
output
,
StatsFuture
*
future
)
{
StatsFuture
*
future
)
{
output
->
Resize
(
out_shape
);
output
->
Resize
(
out_shape
);
// TODO copy on write to avoid this copy.
// TODO
(liuqi):
copy on write to avoid this copy.
output
->
CopyBytes
(
input
->
raw_data
(),
input
->
size
()
*
sizeof
(
T
));
output
->
CopyBytes
(
input
->
raw_data
(),
input
->
size
()
*
sizeof
(
T
));
}
}
};
};
...
...
mace/kernels/resize_bilinear.h
浏览文件 @
16703420
...
@@ -4,6 +4,9 @@
...
@@ -4,6 +4,9 @@
#ifndef MACE_KERNELS_RESIZE_BILINEAR_H_
#ifndef MACE_KERNELS_RESIZE_BILINEAR_H_
#define MACE_KERNELS_RESIZE_BILINEAR_H_
#define MACE_KERNELS_RESIZE_BILINEAR_H_
#include <algorithm>
#include <vector>
#include "mace/core/future.h"
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/core/tensor.h"
...
@@ -11,7 +14,6 @@
...
@@ -11,7 +14,6 @@
namespace
mace
{
namespace
mace
{
namespace
kernels
{
namespace
kernels
{
namespace
{
struct
CachedInterpolation
{
struct
CachedInterpolation
{
index_t
lower
;
// Lower source index used in the interpolation
index_t
lower
;
// Lower source index used in the interpolation
index_t
upper
;
// Upper source index used in the interpolation
index_t
upper
;
// Upper source index used in the interpolation
...
@@ -101,7 +103,6 @@ void ResizeImage(const T *images,
...
@@ -101,7 +103,6 @@ void ResizeImage(const T *images,
}
}
}
}
}
}
}
struct
ResizeBilinearFunctorBase
{
struct
ResizeBilinearFunctorBase
{
ResizeBilinearFunctorBase
(
const
std
::
vector
<
index_t
>
&
size
,
ResizeBilinearFunctorBase
(
const
std
::
vector
<
index_t
>
&
size
,
...
...
mace/kernels/slice.h
浏览文件 @
16703420
...
@@ -5,6 +5,8 @@
...
@@ -5,6 +5,8 @@
#ifndef MACE_KERNELS_SLICE_H_
#ifndef MACE_KERNELS_SLICE_H_
#define MACE_KERNELS_SLICE_H_
#define MACE_KERNELS_SLICE_H_
#include <vector>
#include "mace/core/future.h"
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/core/tensor.h"
...
@@ -16,7 +18,6 @@ namespace kernels {
...
@@ -16,7 +18,6 @@ namespace kernels {
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
struct
SliceFunctor
{
struct
SliceFunctor
{
void
operator
()(
const
Tensor
*
input
,
void
operator
()(
const
Tensor
*
input
,
const
std
::
vector
<
Tensor
*>
&
output_list
,
const
std
::
vector
<
Tensor
*>
&
output_list
,
StatsFuture
*
future
)
{
StatsFuture
*
future
)
{
...
@@ -56,15 +57,13 @@ struct SliceFunctor {
...
@@ -56,15 +57,13 @@ struct SliceFunctor {
template
<
typename
T
>
template
<
typename
T
>
struct
SliceFunctor
<
DeviceType
::
OPENCL
,
T
>
{
struct
SliceFunctor
<
DeviceType
::
OPENCL
,
T
>
{
void
operator
()(
const
Tensor
*
input
,
void
operator
()(
const
Tensor
*
input
,
const
std
::
vector
<
Tensor
*>
&
output_list
,
const
std
::
vector
<
Tensor
*>
&
output_list
,
StatsFuture
*
future
);
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
cl
::
Kernel
kernel_
;
};
};
}
// name
ps
ace kernels
}
// name
sp
ace kernels
}
// namespace mace
}
// namespace mace
#endif // MACE_KERNELS_SLICE_H_
#endif // MACE_KERNELS_SLICE_H_
mace/kernels/softmax.h
浏览文件 @
16703420
...
@@ -5,6 +5,10 @@
...
@@ -5,6 +5,10 @@
#ifndef MACE_KERNELS_SOFTMAX_H_
#ifndef MACE_KERNELS_SOFTMAX_H_
#define MACE_KERNELS_SOFTMAX_H_
#define MACE_KERNELS_SOFTMAX_H_
#include <algorithm>
#include <functional>
#include <vector>
#include "mace/core/future.h"
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/core/tensor.h"
...
@@ -38,7 +42,7 @@ struct SoftmaxFunctor {
...
@@ -38,7 +42,7 @@ struct SoftmaxFunctor {
for
(
index_t
c
=
1
;
c
<
num_classes
;
++
c
)
{
for
(
index_t
c
=
1
;
c
<
num_classes
;
++
c
)
{
max_value
=
std
::
max
(
max_value
,
logits_ptr
[
pos
+
c
]);
max_value
=
std
::
max
(
max_value
,
logits_ptr
[
pos
+
c
]);
}
}
// TODO: check overflow?
// TODO
(liuqi)
: check overflow?
T
sum
=
0
;
T
sum
=
0
;
for
(
index_t
c
=
0
;
c
<
num_classes
;
++
c
)
{
for
(
index_t
c
=
0
;
c
<
num_classes
;
++
c
)
{
exp_data
[
c
]
=
::
exp
((
logits_ptr
[
pos
+
c
]
-
max_value
));
exp_data
[
c
]
=
::
exp
((
logits_ptr
[
pos
+
c
]
-
max_value
));
...
@@ -60,7 +64,7 @@ struct SoftmaxFunctor<DeviceType::OPENCL, T> {
...
@@ -60,7 +64,7 @@ struct SoftmaxFunctor<DeviceType::OPENCL, T> {
std
::
vector
<
index_t
>
input_shape_
;
std
::
vector
<
index_t
>
input_shape_
;
};
};
}
// name
ps
ace kernels
}
// name
sp
ace kernels
}
// namespace mace
}
// namespace mace
#endif // MACE_KERNELS_SOFTMAX_H_
#endif // MACE_KERNELS_SOFTMAX_H_
mace/kernels/space_to_batch.h
浏览文件 @
16703420
...
@@ -2,8 +2,10 @@
...
@@ -2,8 +2,10 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
// Copyright (c) 2017 XiaoMi All rights reserved.
//
//
#ifndef MACE_KERNELS_CONV_2D_H_
#ifndef MACE_KERNELS_SPACE_TO_BATCH_H_
#define MACE_KERNELS_CONV_2D_H_
#define MACE_KERNELS_SPACE_TO_BATCH_H_
#include <vector>
#include "mace/core/future.h"
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/cl2_header.h"
...
@@ -60,4 +62,4 @@ struct SpaceToBatchFunctor<DeviceType::OPENCL, T> : SpaceToBatchFunctorBase {
...
@@ -60,4 +62,4 @@ struct SpaceToBatchFunctor<DeviceType::OPENCL, T> : SpaceToBatchFunctorBase {
}
// namespace kernels
}
// namespace kernels
}
// namespace mace
}
// namespace mace
#endif // MACE_KERNELS_
CONV_2D
_H_
#endif // MACE_KERNELS_
SPACE_TO_BATCH
_H_
mace/kernels/winograd_transform.h
浏览文件 @
16703420
...
@@ -5,6 +5,8 @@
...
@@ -5,6 +5,8 @@
#ifndef MACE_KERNELS_WINOGRAD_TRANSFORM_H_
#ifndef MACE_KERNELS_WINOGRAD_TRANSFORM_H_
#define MACE_KERNELS_WINOGRAD_TRANSFORM_H_
#define MACE_KERNELS_WINOGRAD_TRANSFORM_H_
#include <vector>
#include "mace/core/future.h"
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/core/tensor.h"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录