Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
08533627
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
08533627
编写于
1月 26, 2018
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Rebase winograd convolution code and rename gemm to matmul.
上级
bcd624f8
变更
26
显示空白变更内容
内联
并排
Showing
26 changed file
with
165 addition
and
178 deletion
+165
-178
mace/core/operator.cc
mace/core/operator.cc
+2
-2
mace/kernels/matmul.h
mace/kernels/matmul.h
+5
-5
mace/kernels/opencl/activation_opencl.cc
mace/kernels/opencl/activation_opencl.cc
+1
-1
mace/kernels/opencl/addn.cc
mace/kernels/opencl/addn.cc
+1
-1
mace/kernels/opencl/batch_norm_opencl.cc
mace/kernels/opencl/batch_norm_opencl.cc
+1
-1
mace/kernels/opencl/cl/matmul.cl
mace/kernels/opencl/cl/matmul.cl
+8
-9
mace/kernels/opencl/concat.cc
mace/kernels/opencl/concat.cc
+1
-1
mace/kernels/opencl/conv_2d_opencl_1x1.cc
mace/kernels/opencl/conv_2d_opencl_1x1.cc
+1
-1
mace/kernels/opencl/conv_2d_opencl_3x3.cc
mace/kernels/opencl/conv_2d_opencl_3x3.cc
+1
-1
mace/kernels/opencl/conv_2d_opencl_general.cc
mace/kernels/opencl/conv_2d_opencl_general.cc
+1
-1
mace/kernels/opencl/depthwise_conv_opencl.cc
mace/kernels/opencl/depthwise_conv_opencl.cc
+2
-2
mace/kernels/opencl/helper.cc
mace/kernels/opencl/helper.cc
+2
-2
mace/kernels/opencl/helper.h
mace/kernels/opencl/helper.h
+2
-2
mace/kernels/opencl/matmul.cc
mace/kernels/opencl/matmul.cc
+75
-0
mace/kernels/opencl/resize_bilinear_opencl.cc
mace/kernels/opencl/resize_bilinear_opencl.cc
+1
-1
mace/kernels/opencl/softmax_opencl.cc
mace/kernels/opencl/softmax_opencl.cc
+1
-1
mace/kernels/opencl/space_to_batch_opencl.cc
mace/kernels/opencl/space_to_batch_opencl.cc
+1
-1
mace/kernels/opencl/winograd_transform.cc
mace/kernels/opencl/winograd_transform.cc
+7
-94
mace/ops/depthwise_conv2d_test.cc
mace/ops/depthwise_conv2d_test.cc
+6
-6
mace/ops/depthwise_conv_2d_benchmark.cc
mace/ops/depthwise_conv_2d_benchmark.cc
+1
-1
mace/ops/matmul.cc
mace/ops/matmul.cc
+8
-8
mace/ops/matmul.h
mace/ops/matmul.h
+7
-7
mace/ops/matmul_benchmark.cc
mace/ops/matmul_benchmark.cc
+12
-12
mace/ops/matmul_test.cc
mace/ops/matmul_test.cc
+14
-14
mace/ops/winograd_convolution_test.cc
mace/ops/winograd_convolution_test.cc
+3
-3
mace/utils/tuner.h
mace/utils/tuner.h
+1
-1
未找到文件。
mace/core/operator.cc
浏览文件 @
08533627
...
...
@@ -77,7 +77,7 @@ extern void Register_Pooling(OperatorRegistry *op_registry);
extern
void
Register_ResizeBilinear
(
OperatorRegistry
*
op_registry
);
extern
void
Register_Softmax
(
OperatorRegistry
*
op_registry
);
extern
void
Register_SpaceToBatchND
(
OperatorRegistry
*
op_registry
);
extern
void
Register_
GEMM
(
OperatorRegistry
*
op_registry
);
extern
void
Register_
MatMul
(
OperatorRegistry
*
op_registry
);
extern
void
Register_WinogradTransform
(
OperatorRegistry
*
op_registry
);
extern
void
Register_WinogradInverseTransform
(
OperatorRegistry
*
op_registry
);
...
...
@@ -100,7 +100,7 @@ OperatorRegistry::OperatorRegistry() {
Register_ResizeBilinear
(
this
);
Register_Softmax
(
this
);
Register_SpaceToBatchND
(
this
);
Register_
GEMM
(
this
);
Register_
MatMul
(
this
);
Register_WinogradTransform
(
this
);
Register_WinogradInverseTransform
(
this
);
}
...
...
mace/kernels/
gemm
.h
→
mace/kernels/
matmul
.h
浏览文件 @
08533627
...
...
@@ -2,8 +2,8 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_KERNELS_
GEMM
_H_
#define MACE_KERNELS_
GEMM
_H_
#ifndef MACE_KERNELS_
MATMUL
_H_
#define MACE_KERNELS_
MATMUL
_H_
#include "mace/core/future.h"
#include "mace/core/tensor.h"
...
...
@@ -13,7 +13,7 @@ namespace kernels {
template
<
DeviceType
D
,
typename
T
>
struct
GEMM
Functor
{
struct
MatMul
Functor
{
void
operator
()(
const
Tensor
*
A
,
const
Tensor
*
B
,
Tensor
*
C
,
...
...
@@ -53,7 +53,7 @@ struct GEMMFunctor {
template
<
typename
T
>
struct
GEMM
Functor
<
DeviceType
::
OPENCL
,
T
>
{
struct
MatMul
Functor
<
DeviceType
::
OPENCL
,
T
>
{
void
operator
()(
const
Tensor
*
A
,
const
Tensor
*
B
,
Tensor
*
C
,
...
...
@@ -63,4 +63,4 @@ struct GEMMFunctor<DeviceType::OPENCL, T> {
}
// namespace kernels
}
// namespace mace
#endif // MACE_KERNELS_
GEMM
_H_
#endif // MACE_KERNELS_
MATMUL
_H_
mace/kernels/opencl/activation_opencl.cc
浏览文件 @
08533627
...
...
@@ -63,7 +63,7 @@ void ActivationFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
static_cast
<
uint32_t
>
(
width
),
static_cast
<
uint32_t
>
(
height
*
batch
)};
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
const
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
std
::
string
tuning_key
=
Concat
(
"relu_opencl_kernel_"
,
activation_
,
output
->
dim
(
0
),
output
->
dim
(
1
),
output
->
dim
(
2
),
output
->
dim
(
3
));
...
...
mace/kernels/opencl/addn.cc
浏览文件 @
08533627
...
...
@@ -48,7 +48,7 @@ static void AddN(const std::vector<const Tensor *> &input_tensors,
static_cast
<
uint32_t
>
(
width_pixels
),
static_cast
<
uint32_t
>
(
batch_height_pixels
)
};
std
::
vector
<
uint32_t
>
lws
=
{
64
,
16
,
1
};
const
std
::
vector
<
uint32_t
>
lws
=
{
64
,
16
,
1
};
std
::
stringstream
ss
;
ss
<<
"addn_opencl_kernel_"
<<
output
->
dim
(
0
)
<<
"_"
...
...
mace/kernels/opencl/batch_norm_opencl.cc
浏览文件 @
08533627
...
...
@@ -83,7 +83,7 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
static_cast
<
uint32_t
>
(
width
),
static_cast
<
uint32_t
>
(
height
*
batch
)};
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
const
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
std
::
string
tuning_key
=
Concat
(
"batch_norm_opencl_kernel_"
,
activation_
,
output
->
dim
(
0
),
output
->
dim
(
1
),
output
->
dim
(
2
),
output
->
dim
(
3
),
folded_constant_
);
...
...
mace/kernels/opencl/cl/
gemm
.cl
→
mace/kernels/opencl/cl/
matmul
.cl
浏览文件 @
08533627
#
include
<common.h>
//
C
=
A
*
B
__kernel
void
gemm
(
__read_only
image2d_t
A,
__kernel
void
matmul
(
__read_only
image2d_t
A,
__read_only
image2d_t
B,
__write_only
image2d_t
C,
__private
const
int
M,
...
...
@@ -17,7 +17,6 @@ __kernel void gemm(__read_only image2d_t A,
const
int
bm
=
mad24
(
batch,
M,
ty
<<
2
)
;
const
int
bk
=
mul24
(
batch,
k_blocks
)
;
float4
a0,
a1,
a2,
a3
;
float4
b0,
b1,
b2,
b3
;
float4
c0
=
0
,
c1
=
0
,
c2
=
0
,
c3
=
0
;
...
...
mace/kernels/opencl/concat.cc
浏览文件 @
08533627
...
...
@@ -50,7 +50,7 @@ static void Concat2(const Tensor *input0,
static_cast
<
uint32_t
>
(
width
),
static_cast
<
uint32_t
>
(
batch
*
height
),
};
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
const
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
std
::
stringstream
ss
;
ss
<<
"concat_opencl_kernel_"
<<
output
->
dim
(
0
)
<<
"_"
...
...
mace/kernels/opencl/conv_2d_opencl_1x1.cc
浏览文件 @
08533627
...
...
@@ -96,7 +96,7 @@ void Conv1x1(const Tensor *input,
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
static_cast
<
uint32_t
>
(
width_blocks
),
static_cast
<
uint32_t
>
(
height
*
batch
)};
std
::
vector
<
uint32_t
>
lws
=
{
8
,
15
,
8
,
1
};
const
std
::
vector
<
uint32_t
>
lws
=
{
8
,
15
,
8
,
1
};
std
::
string
tuning_key
=
Concat
(
"conv2d_1x1_opencl_kernel_"
,
activation
,
output
->
dim
(
0
),
output
->
dim
(
1
),
output
->
dim
(
2
),
output
->
dim
(
3
));
...
...
mace/kernels/opencl/conv_2d_opencl_3x3.cc
浏览文件 @
08533627
...
...
@@ -94,7 +94,7 @@ static void Conv2d3x3S12(const Tensor *input,
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
static_cast
<
uint32_t
>
(
width_blocks
),
static_cast
<
uint32_t
>
(
height
*
batch
)};
std
::
vector
<
uint32_t
>
lws
=
{
4
,
15
,
8
,
1
};
const
std
::
vector
<
uint32_t
>
lws
=
{
4
,
15
,
8
,
1
};
std
::
string
tuning_key
=
Concat
(
"conv2d_3x3_opencl_kernel_"
,
activation
,
output
->
dim
(
0
),
output
->
dim
(
1
),
output
->
dim
(
2
),
output
->
dim
(
3
));
...
...
mace/kernels/opencl/conv_2d_opencl_general.cc
浏览文件 @
08533627
...
...
@@ -97,7 +97,7 @@ void Conv2dOpencl(const Tensor *input,
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
static_cast
<
uint32_t
>
(
width_blocks
),
static_cast
<
uint32_t
>
(
height
*
batch
)};
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
const
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
std
::
string
tuning_key
=
Concat
(
"conv2d_general_opencl_kernel_"
,
activation
,
output
->
dim
(
0
),
output
->
dim
(
1
),
output
->
dim
(
2
),
output
->
dim
(
3
));
...
...
mace/kernels/opencl/depthwise_conv_opencl.cc
浏览文件 @
08533627
...
...
@@ -106,7 +106,7 @@ void DepthwiseConv2d(const Tensor *input, // NHWC
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
static_cast
<
uint32_t
>
(
width_blocks
),
static_cast
<
uint32_t
>
(
height
*
batch
)};
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
const
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
std
::
string
tuning_key
=
Concat
(
"depthwise_conv2d_ocl_kernel_"
,
activation
,
batch
,
height
,
width
,
channels
,
multiplier
);
TuningOrRun3DKernel
(
dw_conv2d_kernel
,
tuning_key
,
gws
,
lws
,
future
);
...
...
@@ -150,7 +150,7 @@ void DepthwiseConv2dFunctor<DeviceType::OPENCL, T>::operator()(
padding_
,
output_shape
.
data
(),
paddings
.
data
());
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT
,
output_image_shape
);
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT
_CHANNEL
,
output_image_shape
);
output
->
ResizeImage
(
output_shape
,
output_image_shape
);
DepthwiseConv2d
(
input
,
filter
,
bias
,
strides_
[
0
],
paddings
.
data
(),
dilations_
,
...
...
mace/kernels/opencl/helper.cc
浏览文件 @
08533627
...
...
@@ -158,7 +158,7 @@ std::string DtToUpstreamCLCMDDt(const DataType dt) {
void
TuningOrRun3DKernel
(
cl
::
Kernel
&
kernel
,
const
std
::
string
tuning_key
,
const
uint32_t
*
gws
,
std
::
vector
<
uint32_t
>
&
lws
,
const
std
::
vector
<
uint32_t
>
&
lws
,
StatsFuture
*
future
)
{
auto
runtime
=
OpenCLRuntime
::
Global
();
const
uint32_t
kwg_size
=
runtime
->
GetKernelMaxWorkGroupSize
(
kernel
);
...
...
@@ -255,7 +255,7 @@ void TuningOrRun3DKernel(cl::Kernel &kernel,
void
TuningOrRun2DKernel
(
cl
::
Kernel
&
kernel
,
const
std
::
string
tuning_key
,
const
uint32_t
*
gws
,
std
::
vector
<
uint32_t
>
&
lws
,
const
std
::
vector
<
uint32_t
>
&
lws
,
StatsFuture
*
future
)
{
auto
runtime
=
OpenCLRuntime
::
Global
();
const
uint32_t
kwg_size
=
runtime
->
GetKernelMaxWorkGroupSize
(
kernel
);
...
...
mace/kernels/opencl/helper.h
浏览文件 @
08533627
...
...
@@ -44,14 +44,14 @@ std::string DtToUpstreamCLDt(const DataType dt);
void
TuningOrRun3DKernel
(
cl
::
Kernel
&
kernel
,
const
std
::
string
tuning_key
,
const
uint32_t
*
gws
,
std
::
vector
<
uint32_t
>
&
lws
,
const
std
::
vector
<
uint32_t
>
&
lws
,
StatsFuture
*
future
);
void
TuningOrRun2DKernel
(
cl
::
Kernel
&
kernel
,
const
std
::
string
tuning_key
,
const
uint32_t
*
gws
,
std
::
vector
<
uint32_t
>
&
lws
,
const
std
::
vector
<
uint32_t
>
&
lws
,
StatsFuture
*
future
);
inline
void
SetFuture
(
StatsFuture
*
future
,
const
cl
::
Event
&
event
)
{
...
...
mace/kernels/opencl/
gemm
.cc
→
mace/kernels/opencl/
matmul
.cc
浏览文件 @
08533627
...
...
@@ -2,7 +2,7 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/kernels/
gemm
.h"
#include "mace/kernels/
matmul
.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/kernels/opencl/helper.h"
#include "mace/utils/tuner.h"
...
...
@@ -11,7 +11,7 @@ namespace mace {
namespace
kernels
{
template
<
typename
T
>
void
GEMM
Functor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
void
MatMul
Functor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
A
,
const
Tensor
*
B
,
Tensor
*
C
,
...
...
@@ -32,87 +32,44 @@ void GEMMFunctor<DeviceType::OPENCL, T>::operator()(
auto
runtime
=
OpenCLRuntime
::
Global
();
std
::
set
<
std
::
string
>
built_options
;
auto
dt
=
DataTypeToEnum
<
T
>::
value
;
std
::
string
kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
"
gemm
"
);
built_options
.
emplace
(
"-D
gemm
="
+
kernel_name
);
std
::
string
kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
"
matmul
"
);
built_options
.
emplace
(
"-D
matmul
="
+
kernel_name
);
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
auto
gemm_kernel
=
runtime
->
BuildKernel
(
"gemm
"
,
kernel_name
,
built_options
);
auto
matmul_kernel
=
runtime
->
BuildKernel
(
"matmul
"
,
kernel_name
,
built_options
);
uint32_t
idx
=
0
;
gemm
_kernel
.
setArg
(
idx
++
,
matmul
_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
A
->
buffer
())));
gemm
_kernel
.
setArg
(
idx
++
,
matmul
_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
B
->
buffer
())));
gemm
_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
C
->
buffer
())));
gemm
_kernel
.
setArg
(
idx
++
,
static_cast
<
int
>
(
height
));
gemm
_kernel
.
setArg
(
idx
++
,
static_cast
<
int
>
(
width
));
gemm
_kernel
.
setArg
(
idx
++
,
static_cast
<
int
>
(
A
->
dim
(
2
)));
gemm
_kernel
.
setArg
(
idx
++
,
static_cast
<
int
>
(
height_blocks
));
gemm
_kernel
.
setArg
(
idx
++
,
static_cast
<
int
>
(
RoundUpDiv4
(
A
->
dim
(
2
))));
matmul
_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
C
->
buffer
())));
matmul
_kernel
.
setArg
(
idx
++
,
static_cast
<
int
>
(
height
));
matmul
_kernel
.
setArg
(
idx
++
,
static_cast
<
int
>
(
width
));
matmul
_kernel
.
setArg
(
idx
++
,
static_cast
<
int
>
(
A
->
dim
(
2
)));
matmul
_kernel
.
setArg
(
idx
++
,
static_cast
<
int
>
(
height_blocks
));
matmul
_kernel
.
setArg
(
idx
++
,
static_cast
<
int
>
(
RoundUpDiv4
(
A
->
dim
(
2
))));
const
uint32_t
gws
[
3
]
=
{
const
uint32_t
gws
[
2
]
=
{
static_cast
<
uint32_t
>
(
width_blocks
),
static_cast
<
uint32_t
>
(
height_blocks
*
batch
),
};
const
std
::
vector
<
uint32_t
>
lws
=
{
16
,
64
};
const
uint32_t
kwg_size
=
runtime
->
GetKernelMaxWorkGroupSize
(
gemm_kernel
);
auto
params_generator
=
[
&
]()
->
std
::
vector
<
std
::
vector
<
uint32_t
>>
{
std
::
vector
<
uint32_t
>
local_ws
(
2
,
0
);
local_ws
[
0
]
=
std
::
min
<
uint32_t
>
(
width_blocks
,
kwg_size
);
local_ws
[
1
]
=
std
::
min
<
uint32_t
>
(
height_blocks
*
batch
,
kwg_size
/
local_ws
[
0
]);
return
{{
local_ws
[
0
],
local_ws
[
1
]},
{
local_ws
[
1
],
local_ws
[
0
]},
{
kwg_size
/
4
,
4
},
{
kwg_size
/
8
,
8
},
{
kwg_size
/
16
,
16
},
{
kwg_size
/
32
,
32
},
{
kwg_size
/
64
,
64
},
{
kwg_size
/
128
,
128
},
{
kwg_size
/
256
,
256
},
{
kwg_size
/
512
,
512
},
{
kwg_size
,
1
},
{
1
,
kwg_size
}
};
};
cl
::
Event
event
;
auto
func
=
[
&
](
const
std
::
vector
<
uint32_t
>&
params
)
->
cl_int
{
cl_int
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
gemm_kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
gws
[
0
],
gws
[
1
]),
cl
::
NDRange
(
params
[
0
],
params
[
1
]),
nullptr
,
&
event
);
MACE_CHECK
(
error
==
CL_SUCCESS
)
<<
"Error code: "
<<
error
;
return
error
;
};
const
std
::
vector
<
uint32_t
>
lws
=
{
16
,
64
,
1
};
std
::
stringstream
ss
;
ss
<<
"
gemm
_opencl_kernel_"
ss
<<
"
matmul
_opencl_kernel_"
<<
C
->
dim
(
0
)
<<
"_"
<<
C
->
dim
(
1
)
<<
"_"
<<
C
->
dim
(
2
)
<<
"_"
<<
C
->
dim
(
3
);
OpenCLProfilingTimer
timer
(
&
event
);
Tuner
<
uint32_t
>::
Get
()
->
template
TuneOrRun
<
cl_int
>(
ss
.
str
(),
lws
,
params_generator
,
func
,
&
timer
);
if
(
future
!=
nullptr
)
{
future
->
wait_fn
=
[
runtime
,
event
](
CallStats
*
stats
)
{
event
.
wait
();
if
(
stats
!=
nullptr
)
{
runtime
->
GetCallStats
(
event
,
stats
);
}
};
}
TuningOrRun2DKernel
(
matmul_kernel
,
ss
.
str
(),
gws
,
lws
,
future
);
};
template
struct
GEMM
Functor
<
DeviceType
::
OPENCL
,
float
>;
struct
MatMul
Functor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
GEMM
Functor
<
DeviceType
::
OPENCL
,
half
>;
struct
MatMul
Functor
<
DeviceType
::
OPENCL
,
half
>;
}
// namespace kernels
}
// namespace mace
mace/kernels/opencl/resize_bilinear_opencl.cc
浏览文件 @
08533627
...
...
@@ -59,7 +59,7 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
static_cast
<
uint32_t
>
(
out_width
),
static_cast
<
uint32_t
>
(
out_height
*
batch
)};
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
const
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
std
::
stringstream
ss
;
ss
<<
"resize_bilinear_opencl_kernel_"
<<
output
->
dim
(
0
)
<<
"_"
...
...
mace/kernels/opencl/softmax_opencl.cc
浏览文件 @
08533627
...
...
@@ -41,7 +41,7 @@ void SoftmaxFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *logits,
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
static_cast
<
uint32_t
>
(
width
),
static_cast
<
uint32_t
>
(
height
*
batch
)};
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
const
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
std
::
stringstream
ss
;
ss
<<
"softmax_opencl_kernel_"
<<
output
->
dim
(
0
)
<<
"_"
...
...
mace/kernels/opencl/space_to_batch_opencl.cc
浏览文件 @
08533627
...
...
@@ -61,7 +61,7 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, T>::operator()(Tensor *space_tensor
const
uint32_t
gws
[
3
]
=
{
chan_blk
,
static_cast
<
uint32_t
>
(
batch_tensor
->
dim
(
2
)),
static_cast
<
uint32_t
>
(
batch_tensor
->
dim
(
0
)
*
batch_tensor
->
dim
(
1
))};
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
const
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
std
::
stringstream
ss
;
ss
<<
kernel_name
<<
"_"
<<
batch_tensor
->
dim
(
0
)
<<
"_"
...
...
mace/kernels/opencl/winograd_transform.cc
浏览文件 @
08533627
...
...
@@ -51,60 +51,16 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *i
wino_kernel
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
paddings
[
0
]
/
2
));
wino_kernel
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
paddings
[
1
]
/
2
));
const
size
_t
gws
[
2
]
=
{
static_cast
<
size_t
>
(
out_width
),
const
uint32
_t
gws
[
2
]
=
{
static_cast
<
size_t
>
(
out_width
),
static_cast
<
size_t
>
(
RoundUpDiv4
(
input_tensor
->
dim
(
3
)))};
const
std
::
vector
<
uint32_t
>
lws
=
{
128
,
8
};
const
uint32_t
kwg_size
=
runtime
->
GetKernelMaxWorkGroupSize
(
wino_kernel
);
auto
params_generator
=
[
&
]()
->
std
::
vector
<
std
::
vector
<
uint32_t
>>
{
std
::
vector
<
uint32_t
>
local_ws
(
2
,
0
);
local_ws
[
0
]
=
std
::
min
<
uint32_t
>
(
gws
[
0
],
kwg_size
);
local_ws
[
1
]
=
std
::
min
<
uint32_t
>
(
gws
[
1
],
kwg_size
/
local_ws
[
0
]);
return
{{
local_ws
[
0
],
local_ws
[
1
]},
{
local_ws
[
1
],
local_ws
[
0
]},
{
kwg_size
/
4
,
4
},
{
kwg_size
/
8
,
8
},
{
kwg_size
/
16
,
16
},
{
kwg_size
/
32
,
32
},
{
kwg_size
/
64
,
64
},
{
kwg_size
/
128
,
128
},
{
kwg_size
/
256
,
256
},
{
kwg_size
/
512
,
512
},
{
kwg_size
,
1
},
{
1
,
kwg_size
}
};
};
cl
::
Event
event
;
auto
func
=
[
&
](
const
std
::
vector
<
uint32_t
>&
params
)
->
cl_int
{
cl_int
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
wino_kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
gws
[
0
],
gws
[
1
]),
cl
::
NDRange
(
params
[
0
],
params
[
1
]),
nullptr
,
&
event
);
MACE_CHECK
(
error
==
CL_SUCCESS
)
<<
"Error code: "
<<
error
;
return
error
;
};
const
std
::
vector
<
uint32_t
>
lws
=
{
128
,
8
,
1
};
std
::
stringstream
ss
;
ss
<<
"winograd_transform_kernel_"
<<
input_tensor
->
dim
(
0
)
<<
"_"
<<
input_tensor
->
dim
(
1
)
<<
"_"
<<
input_tensor
->
dim
(
2
)
<<
"_"
<<
input_tensor
->
dim
(
3
);
OpenCLProfilingTimer
timer
(
&
event
);
Tuner
<
uint32_t
>::
Get
()
->
template
TuneOrRun
<
cl_int
>(
ss
.
str
(),
lws
,
params_generator
,
func
,
&
timer
);
if
(
future
!=
nullptr
)
{
future
->
wait_fn
=
[
runtime
,
event
](
CallStats
*
stats
)
{
event
.
wait
();
if
(
stats
!=
nullptr
)
{
runtime
->
GetCallStats
(
event
,
stats
);
}
};
}
TuningOrRun2DKernel
(
wino_kernel
,
ss
.
str
(),
gws
,
lws
,
future
);
}
template
<
typename
T
>
...
...
@@ -165,60 +121,17 @@ void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(const Te
wino_kernel
.
setArg
(
idx
++
,
relux_max_limit_
);
wino_kernel
.
setArg
(
idx
++
,
prelu_alpha_
);
const
size
_t
gws
[
2
]
=
{
static_cast
<
size_t
>
(
input_tensor
->
dim
(
2
)),
const
uint32
_t
gws
[
2
]
=
{
static_cast
<
size_t
>
(
input_tensor
->
dim
(
2
)),
static_cast
<
size_t
>
(
RoundUpDiv4
(
input_tensor
->
dim
(
1
)))};
const
std
::
vector
<
uint32_t
>
lws
=
{
128
,
8
};
const
uint32_t
kwg_size
=
runtime
->
GetKernelMaxWorkGroupSize
(
wino_kernel
);
auto
params_generator
=
[
&
]()
->
std
::
vector
<
std
::
vector
<
uint32_t
>>
{
std
::
vector
<
uint32_t
>
local_ws
(
2
,
0
);
local_ws
[
0
]
=
std
::
min
<
uint32_t
>
(
gws
[
0
],
kwg_size
);
local_ws
[
1
]
=
std
::
min
<
uint32_t
>
(
gws
[
1
],
kwg_size
/
local_ws
[
0
]);
return
{{
local_ws
[
0
],
local_ws
[
1
]},
{
local_ws
[
1
],
local_ws
[
0
]},
{
kwg_size
/
4
,
4
},
{
kwg_size
/
8
,
8
},
{
kwg_size
/
16
,
16
},
{
kwg_size
/
32
,
32
},
{
kwg_size
/
64
,
64
},
{
kwg_size
/
128
,
128
},
{
kwg_size
/
256
,
256
},
{
kwg_size
/
512
,
512
},
{
kwg_size
,
1
},
{
1
,
kwg_size
}
};
};
cl
::
Event
event
;
auto
func
=
[
&
](
const
std
::
vector
<
uint32_t
>&
params
)
->
cl_int
{
cl_int
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
wino_kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
gws
[
0
],
gws
[
1
]),
cl
::
NDRange
(
params
[
0
],
params
[
1
]),
nullptr
,
&
event
);
MACE_CHECK
(
error
==
CL_SUCCESS
)
<<
"Error code: "
<<
error
;
return
error
;
};
const
std
::
vector
<
uint32_t
>
lws
=
{
128
,
8
,
1
};
std
::
stringstream
ss
;
ss
<<
"winograd_inverse_transform_kernel_"
<<
input_tensor
->
dim
(
0
)
<<
"_"
<<
input_tensor
->
dim
(
1
)
<<
"_"
<<
input_tensor
->
dim
(
2
)
<<
"_"
<<
input_tensor
->
dim
(
3
);
OpenCLProfilingTimer
timer
(
&
event
);
Tuner
<
uint32_t
>::
Get
()
->
template
TuneOrRun
<
cl_int
>(
ss
.
str
(),
lws
,
params_generator
,
func
,
&
timer
);
if
(
future
!=
nullptr
)
{
future
->
wait_fn
=
[
runtime
,
event
](
CallStats
*
stats
)
{
event
.
wait
();
if
(
stats
!=
nullptr
)
{
runtime
->
GetCallStats
(
event
,
stats
);
}
};
}
TuningOrRun2DKernel
(
wino_kernel
,
ss
.
str
(),
gws
,
lws
,
future
);
}
template
...
...
mace/ops/depthwise_conv2d_test.cc
浏览文件 @
08533627
...
...
@@ -26,7 +26,7 @@ void SimpleValidTest() {
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
2
},
{
.1
f
,
.2
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
kernels
::
BufferType
::
IN_OUT
_CHANNEL
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
DW_CONV2D_FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
...
...
@@ -46,7 +46,7 @@ void SimpleValidTest() {
// Transfer output
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
kernels
::
BufferType
::
IN_OUT
_CHANNEL
);
}
else
{
OpDefBuilder
(
"DepthwiseConv2d"
,
"DepthwiseConv2DTest"
)
...
...
@@ -129,7 +129,7 @@ void ComplexValidTest() {
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
kernels
::
BufferType
::
IN_OUT
_CHANNEL
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
DW_CONV2D_FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
...
...
@@ -149,7 +149,7 @@ void ComplexValidTest() {
// Transfer output
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
kernels
::
BufferType
::
IN_OUT
_CHANNEL
);
}
else
{
OpDefBuilder
(
"DepthwiseConv2d"
,
"DepthwiseConv2DTest"
)
...
...
@@ -239,7 +239,7 @@ void TestNxNS12(const index_t height, const index_t width) {
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
kernels
::
BufferType
::
IN_OUT
_CHANNEL
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
DW_CONV2D_FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
...
...
@@ -259,7 +259,7 @@ void TestNxNS12(const index_t height, const index_t width) {
// Transfer output
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"DeviceOutput"
,
kernels
::
BufferType
::
IN_OUT
);
kernels
::
BufferType
::
IN_OUT
_CHANNEL
);
}
else
{
OpDefBuilder
(
"DepthwiseConv2d"
,
"DepthwiseConv2DTest"
)
.
Input
(
"Input"
)
...
...
mace/ops/depthwise_conv_2d_benchmark.cc
浏览文件 @
08533627
...
...
@@ -34,7 +34,7 @@ static void DepthwiseConv2d(int iters,
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
kernels
::
BufferType
::
IN_OUT
_CHANNEL
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
DW_CONV2D_FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
...
...
mace/ops/
gemm
.cc
→
mace/ops/
matmul
.cc
浏览文件 @
08533627
...
...
@@ -2,28 +2,28 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/ops/
gemm
.h"
#include "mace/ops/
matmul
.h"
namespace
mace
{
void
Register_
GEMM
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"
GEMM
"
)
void
Register_
MatMul
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"
MatMul
"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
GEMM
Op
<
DeviceType
::
CPU
,
float
>
);
MatMul
Op
<
DeviceType
::
CPU
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"
GEMM
"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"
MatMul
"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
GEMM
Op
<
DeviceType
::
OPENCL
,
float
>
);
MatMul
Op
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"
GEMM
"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"
MatMul
"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
GEMM
Op
<
DeviceType
::
OPENCL
,
half
>
);
MatMul
Op
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
mace/ops/
gemm
.h
→
mace/ops/
matmul
.h
浏览文件 @
08533627
...
...
@@ -2,18 +2,18 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_OPS_
GEMM
_H_
#define MACE_OPS_
GEMM
_H_
#ifndef MACE_OPS_
MATMUL
_H_
#define MACE_OPS_
MATMUL
_H_
#include "mace/core/operator.h"
#include "mace/kernels/
gemm
.h"
#include "mace/kernels/
matmul
.h"
namespace
mace
{
template
<
DeviceType
D
,
class
T
>
class
GEMM
Op
:
public
Operator
<
D
,
T
>
{
class
MatMul
Op
:
public
Operator
<
D
,
T
>
{
public:
GEMM
Op
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
MatMul
Op
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
operator_def
,
ws
)
{}
bool
Run
(
StatsFuture
*
future
)
override
{
...
...
@@ -32,9 +32,9 @@ class GEMMOp : public Operator<D, T> {
}
private:
kernels
::
GEMM
Functor
<
D
,
T
>
functor_
;
kernels
::
MatMul
Functor
<
D
,
T
>
functor_
;
};
}
// namespace mace
#endif // MACE_OPS_
GEMM
_H_
#endif // MACE_OPS_
MATMUL
_H_
mace/ops/
gemm
_benchmark.cc
→
mace/ops/
matmul
_benchmark.cc
浏览文件 @
08533627
...
...
@@ -9,7 +9,7 @@
namespace
mace
{
template
<
DeviceType
D
,
typename
T
>
static
void
GEMM
Benchmark
(
static
void
MatMul
Benchmark
(
int
iters
,
int
batch
,
int
height
,
int
channels
,
int
out_width
)
{
mace
::
testing
::
StopTiming
();
...
...
@@ -25,14 +25,14 @@ static void GEMMBenchmark(
BufferToImage
<
D
,
T
>
(
net
,
"B"
,
"BImage"
,
kernels
::
BufferType
::
IN_OUT_HEIGHT
);
OpDefBuilder
(
"
GEMM"
,
"GEMM
BM"
)
OpDefBuilder
(
"
MatMul"
,
"MatMul
BM"
)
.
Input
(
"AImage"
)
.
Input
(
"BImage"
)
.
Output
(
"Output"
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
}
else
{
OpDefBuilder
(
"
GEMM"
,
"GEMM
BM"
)
OpDefBuilder
(
"
MatMul"
,
"MatMul
BM"
)
.
Input
(
"A"
)
.
Input
(
"B"
)
.
Output
(
"Output"
)
...
...
@@ -52,19 +52,19 @@ static void GEMMBenchmark(
net
.
Sync
();
}
#define BM_
GEMM
_MACRO(N, H, C, W, TYPE, DEVICE) \
static void BM_
GEMM
_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE(int iters) { \
#define BM_
MATMUL
_MACRO(N, H, C, W, TYPE, DEVICE) \
static void BM_
MATMUL
_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
GEMM
Benchmark<DEVICE, TYPE>(iters, N, H, C, W); \
MatMul
Benchmark<DEVICE, TYPE>(iters, N, H, C, W); \
} \
BENCHMARK(BM_
GEMM
_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE)
BENCHMARK(BM_
MATMUL
_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE)
#define BM_
GEMM
(N, H, C, W, TYPE) \
BM_
GEMM
_MACRO(N, H, C, W, TYPE, OPENCL);
#define BM_
MATMUL
(N, H, C, W, TYPE) \
BM_
MATMUL
_MACRO(N, H, C, W, TYPE, OPENCL);
BM_
GEMM
(
16
,
32
,
128
,
49
,
half
);
BM_
GEMM
(
16
,
32
,
128
,
961
,
half
);
BM_
GEMM
(
16
,
32
,
128
,
3969
,
half
);
BM_
MATMUL
(
16
,
32
,
128
,
49
,
half
);
BM_
MATMUL
(
16
,
32
,
128
,
961
,
half
);
BM_
MATMUL
(
16
,
32
,
128
,
3969
,
half
);
}
// namespace mace
mace/ops/
gemm
_test.cc
→
mace/ops/
matmul
_test.cc
浏览文件 @
08533627
...
...
@@ -8,7 +8,7 @@
namespace
mace
{
class
GEMM
OpTest
:
public
OpsTestBase
{};
class
MatMul
OpTest
:
public
OpsTestBase
{};
template
<
DeviceType
D
>
void
Simple
(
const
std
::
vector
<
index_t
>
&
A_shape
,
...
...
@@ -29,7 +29,7 @@ void Simple(const std::vector<index_t> &A_shape,
BufferToImage
<
D
,
float
>
(
net
,
"B"
,
"BImage"
,
kernels
::
BufferType
::
IN_OUT_HEIGHT
);
OpDefBuilder
(
"
GEMM"
,
"GEMM
Test"
)
OpDefBuilder
(
"
MatMul"
,
"MatMul
Test"
)
.
Input
(
"AImage"
)
.
Input
(
"BImage"
)
.
Output
(
"OutputImage"
)
...
...
@@ -41,7 +41,7 @@ void Simple(const std::vector<index_t> &A_shape,
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT_HEIGHT
);
}
else
{
OpDefBuilder
(
"
GEMM"
,
"GEMM
Test"
)
OpDefBuilder
(
"
MatMul"
,
"MatMul
Test"
)
.
Input
(
"A"
)
.
Input
(
"B"
)
.
Output
(
"Output"
)
...
...
@@ -57,7 +57,7 @@ void Simple(const std::vector<index_t> &A_shape,
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
}
TEST_F
(
GEMM
OpTest
,
SimpleCPU
)
{
TEST_F
(
MatMul
OpTest
,
SimpleCPU
)
{
Simple
<
DeviceType
::
CPU
>
({
1
,
2
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
3
,
2
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
2
,
2
,
1
},
{
22
,
28
,
49
,
64
});
...
...
@@ -74,13 +74,13 @@ TEST_F(GEMMOpTest, SimpleCPU) {
}
TEST_F
(
GEMM
OpTest
,
SimpleCPUWithBatch
)
{
TEST_F
(
MatMul
OpTest
,
SimpleCPUWithBatch
)
{
Simple
<
DeviceType
::
CPU
>
({
2
,
2
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
1
,
2
,
3
,
4
,
5
,
6
},
{
2
,
3
,
2
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
1
,
2
,
3
,
4
,
5
,
6
},
{
2
,
2
,
2
,
1
},
{
22
,
28
,
49
,
64
,
22
,
28
,
49
,
64
});
}
TEST_F
(
GEMM
OpTest
,
SimpleOPENCL
)
{
TEST_F
(
MatMul
OpTest
,
SimpleOPENCL
)
{
Simple
<
DeviceType
::
OPENCL
>
({
1
,
2
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
3
,
2
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
2
,
2
,
1
},
{
22
,
28
,
49
,
64
});
...
...
@@ -96,7 +96,7 @@ TEST_F(GEMMOpTest, SimpleOPENCL) {
1315
,
1430
,
1545
,
1660
,
1775
});
}
TEST_F
(
GEMM
OpTest
,
SimpleGPUWithBatch
)
{
TEST_F
(
MatMul
OpTest
,
SimpleGPUWithBatch
)
{
Simple
<
DeviceType
::
CPU
>
({
2
,
2
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
1
,
2
,
3
,
4
,
5
,
6
},
{
2
,
3
,
2
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
1
,
2
,
3
,
4
,
5
,
6
},
{
2
,
2
,
2
,
1
},
{
22
,
28
,
49
,
64
,
22
,
28
,
49
,
64
});
...
...
@@ -111,7 +111,7 @@ void Complex(const index_t batch,
// Construct graph
OpsTestNet
net
;
OpDefBuilder
(
"
GEMM"
,
"GEMM
Test"
)
OpDefBuilder
(
"
MatMul"
,
"MatMul
Test"
)
.
Input
(
"A"
)
.
Input
(
"B"
)
.
Output
(
"Output"
)
...
...
@@ -136,7 +136,7 @@ void Complex(const index_t batch,
BufferToImage
<
DeviceType
::
OPENCL
,
T
>
(
net
,
"B"
,
"BImage"
,
kernels
::
BufferType
::
IN_OUT_HEIGHT
);
OpDefBuilder
(
"
GEMM"
,
"GEMM
Test"
)
OpDefBuilder
(
"
MatMul"
,
"MatMul
Test"
)
.
Input
(
"AImage"
)
.
Input
(
"BImage"
)
.
Output
(
"OutputImage"
)
...
...
@@ -155,24 +155,24 @@ void Complex(const index_t batch,
}
}
TEST_F
(
GEMM
OpTest
,
OPENCLAlignedWithoutBatch
)
{
TEST_F
(
MatMul
OpTest
,
OPENCLAlignedWithoutBatch
)
{
Complex
<
float
>
(
1
,
64
,
128
,
32
);
Complex
<
float
>
(
1
,
64
,
32
,
128
);
}
TEST_F
(
GEMM
OpTest
,
OPENCLUnAlignedWithoutBatch
)
{
TEST_F
(
MatMul
OpTest
,
OPENCLUnAlignedWithoutBatch
)
{
Complex
<
float
>
(
1
,
31
,
113
,
61
);
Complex
<
float
>
(
1
,
113
,
31
,
73
);
}
TEST_F
(
GEMM
OpTest
,
OPENCLUnAlignedWithBatch
)
{
TEST_F
(
MatMul
OpTest
,
OPENCLUnAlignedWithBatch
)
{
Complex
<
float
>
(
2
,
3
,
3
,
3
);
Complex
<
float
>
(
16
,
31
,
61
,
67
);
Complex
<
float
>
(
31
,
31
,
61
,
67
);
}
TEST_F
(
GEMM
OpTest
,
OPENCLHalfAlignedWithoutBatch
)
{
TEST_F
(
MatMul
OpTest
,
OPENCLHalfAlignedWithoutBatch
)
{
Complex
<
half
>
(
1
,
64
,
128
,
32
);
Complex
<
half
>
(
1
,
64
,
32
,
128
);
}
TEST_F
(
GEMM
OpTest
,
OPENCLHalfUnAlignedWithBatch
)
{
TEST_F
(
MatMul
OpTest
,
OPENCLHalfUnAlignedWithBatch
)
{
Complex
<
half
>
(
2
,
31
,
113
,
61
);
Complex
<
half
>
(
16
,
32
,
64
,
64
);
Complex
<
half
>
(
31
,
31
,
61
,
67
);
...
...
mace/ops/winograd_convolution_test.cc
浏览文件 @
08533627
...
...
@@ -52,7 +52,7 @@ void WinogradConvolution(const index_t batch,
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
kernels
::
BufferType
::
CONV2D_
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
...
...
@@ -92,8 +92,8 @@ void WinogradConvolution(const index_t batch,
// Run on opencl
net
.
RunOp
(
D
);
//
GEMM
OpDefBuilder
(
"
GEMM"
,
"GEMM
Test"
)
//
MatMul
OpDefBuilder
(
"
MatMul"
,
"MatMul
Test"
)
.
Input
(
"WinoFilter"
)
.
Input
(
"WinoInput"
)
.
Output
(
"WinoGemm"
)
...
...
mace/utils/tuner.h
浏览文件 @
08533627
...
...
@@ -41,7 +41,7 @@ class Tuner {
template
<
typename
RetType
>
RetType
TuneOrRun
(
const
std
::
string
param_key
,
std
::
vector
<
param_type
>
&
default_param
,
const
std
::
vector
<
param_type
>
&
default_param
,
const
std
::
function
<
std
::
vector
<
std
::
vector
<
param_type
>>
()
>
&
param_generator
,
const
std
::
function
<
RetType
(
const
std
::
vector
<
param_type
>
&
,
Timer
*
,
std
::
vector
<
param_type
>
*
)
>
&
func
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录