Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
cf234f2a
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
cf234f2a
编写于
3月 19, 2018
作者:
L
Liangliang He
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'resize-output' into 'master'
Resize output for every run. See merge request !305
上级
0c02d436
2460a946
变更
36
隐藏空白更改
内联
并排
Showing
36 changed file
with
336 addition
and
162 deletion
+336
-162
mace/kernels/activation.h
mace/kernels/activation.h
+1
-0
mace/kernels/addn.h
mace/kernels/addn.h
+1
-0
mace/kernels/batch_norm.h
mace/kernels/batch_norm.h
+1
-0
mace/kernels/bias_add.h
mace/kernels/bias_add.h
+1
-0
mace/kernels/channel_shuffle.h
mace/kernels/channel_shuffle.h
+1
-0
mace/kernels/concat.h
mace/kernels/concat.h
+1
-0
mace/kernels/conv_2d.h
mace/kernels/conv_2d.h
+1
-0
mace/kernels/depthwise_conv2d.h
mace/kernels/depthwise_conv2d.h
+1
-0
mace/kernels/eltwise.h
mace/kernels/eltwise.h
+1
-0
mace/kernels/fully_connected.h
mace/kernels/fully_connected.h
+1
-0
mace/kernels/opencl/activation_opencl.cc
mace/kernels/opencl/activation_opencl.cc
+5
-0
mace/kernels/opencl/addn.cc
mace/kernels/opencl/addn.cc
+15
-9
mace/kernels/opencl/batch_norm_opencl.cc
mace/kernels/opencl/batch_norm_opencl.cc
+4
-1
mace/kernels/opencl/bias_add_opencl.cc
mace/kernels/opencl/bias_add_opencl.cc
+3
-0
mace/kernels/opencl/channel_shuffle.cc
mace/kernels/opencl/channel_shuffle.cc
+8
-4
mace/kernels/opencl/concat.cc
mace/kernels/opencl/concat.cc
+5
-1
mace/kernels/opencl/conv_2d_opencl.cc
mace/kernels/opencl/conv_2d_opencl.cc
+7
-4
mace/kernels/opencl/conv_2d_opencl_1x1.cc
mace/kernels/opencl/conv_2d_opencl_1x1.cc
+5
-0
mace/kernels/opencl/conv_2d_opencl_3x3.cc
mace/kernels/opencl/conv_2d_opencl_3x3.cc
+5
-1
mace/kernels/opencl/conv_2d_opencl_general.cc
mace/kernels/opencl/conv_2d_opencl_general.cc
+5
-1
mace/kernels/opencl/depthwise_conv_opencl.cc
mace/kernels/opencl/depthwise_conv_opencl.cc
+16
-15
mace/kernels/opencl/eltwise_opencl.cc
mace/kernels/opencl/eltwise_opencl.cc
+3
-0
mace/kernels/opencl/fully_connected_opencl.cc
mace/kernels/opencl/fully_connected_opencl.cc
+20
-7
mace/kernels/opencl/helper.h
mace/kernels/opencl/helper.h
+7
-0
mace/kernels/opencl/matmul.cc
mace/kernels/opencl/matmul.cc
+9
-10
mace/kernels/opencl/pooling_opencl.cc
mace/kernels/opencl/pooling_opencl.cc
+32
-26
mace/kernels/opencl/resize_bilinear_opencl.cc
mace/kernels/opencl/resize_bilinear_opencl.cc
+15
-10
mace/kernels/opencl/softmax_opencl.cc
mace/kernels/opencl/softmax_opencl.cc
+3
-0
mace/kernels/opencl/space_to_batch_opencl.cc
mace/kernels/opencl/space_to_batch_opencl.cc
+4
-0
mace/kernels/opencl/winograd_transform.cc
mace/kernels/opencl/winograd_transform.cc
+27
-19
mace/kernels/pooling.h
mace/kernels/pooling.h
+1
-0
mace/kernels/resize_bilinear.h
mace/kernels/resize_bilinear.h
+1
-0
mace/kernels/softmax.h
mace/kernels/softmax.h
+1
-0
mace/kernels/space_to_batch.h
mace/kernels/space_to_batch.h
+1
-0
mace/kernels/winograd_transform.h
mace/kernels/winograd_transform.h
+2
-0
tools/wino_conv.py
tools/wino_conv.py
+122
-54
未找到文件。
mace/kernels/activation.h
浏览文件 @
cf234f2a
...
...
@@ -152,6 +152,7 @@ class ActivationFunctor<DeviceType::OPENCL, T> {
T
relux_max_limit_
;
cl
::
Kernel
kernel_
;
std
::
string
tuning_key_prefix_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namespace kernels
...
...
mace/kernels/addn.h
浏览文件 @
cf234f2a
...
...
@@ -91,6 +91,7 @@ struct AddNFunctor<DeviceType::OPENCL, T> {
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namespace kernels
...
...
mace/kernels/batch_norm.h
浏览文件 @
cf234f2a
...
...
@@ -156,6 +156,7 @@ struct BatchNormFunctor<DeviceType::OPENCL, T> : BatchNormFunctorBase {
Tensor
*
output
,
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namepsace kernels
...
...
mace/kernels/bias_add.h
浏览文件 @
cf234f2a
...
...
@@ -62,6 +62,7 @@ struct BiasAddFunctor<DeviceType::OPENCL, T> {
Tensor
*
output
,
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namepsace kernels
...
...
mace/kernels/channel_shuffle.h
浏览文件 @
cf234f2a
...
...
@@ -55,6 +55,7 @@ struct ChannelShuffleFunctor<DeviceType::OPENCL, T> {
cl
::
Kernel
kernel_
;
const
int
groups_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namespace kernels
...
...
mace/kernels/concat.h
浏览文件 @
cf234f2a
...
...
@@ -83,6 +83,7 @@ struct ConcatFunctor<DeviceType::OPENCL, T> : ConcatFunctorBase {
Tensor
*
output
,
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namepsace kernels
...
...
mace/kernels/conv_2d.h
浏览文件 @
cf234f2a
...
...
@@ -401,6 +401,7 @@ struct Conv2dFunctor<DeviceType::OPENCL, T> : Conv2dFunctorBase {
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namespace kernels
...
...
mace/kernels/depthwise_conv2d.h
浏览文件 @
cf234f2a
...
...
@@ -439,6 +439,7 @@ struct DepthwiseConv2dFunctor<DeviceType::OPENCL, T>
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namespace kernels
...
...
mace/kernels/eltwise.h
浏览文件 @
cf234f2a
...
...
@@ -94,6 +94,7 @@ struct EltwiseFunctor<DeviceType::OPENCL, T> : EltwiseFunctorBase {
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namespace kernels
...
...
mace/kernels/fully_connected.h
浏览文件 @
cf234f2a
...
...
@@ -90,6 +90,7 @@ struct FullyConnectedFunctor<DeviceType::OPENCL, T> : FullyConnectedBase {
cl
::
Kernel
kernel_
;
std
::
vector
<
uint32_t
>
gws_
;
std
::
vector
<
uint32_t
>
lws_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namespace kernels
...
...
mace/kernels/opencl/activation_opencl.cc
浏览文件 @
cf234f2a
...
...
@@ -58,6 +58,9 @@ void ActivationFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
LOG
(
FATAL
)
<<
"Unknown activation type: "
<<
activation_
;
}
kernel_
=
runtime
->
BuildKernel
(
"activation"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
int
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
if
(
activation_
==
PRELU
)
{
...
...
@@ -66,6 +69,8 @@ void ActivationFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
}
kernel_
.
setArg
(
idx
++
,
static_cast
<
float
>
(
relux_max_limit_
));
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
input_shape_
=
input
->
shape
();
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
...
...
mace/kernels/opencl/addn.cc
浏览文件 @
cf234f2a
...
...
@@ -32,15 +32,6 @@ void AddNFunctor<DeviceType::OPENCL, T>::operator()(
MACE_CHECK
(
channels
==
input_tensors
[
i
]
->
dim
(
3
));
}
std
::
vector
<
index_t
>
output_shape
=
input_tensors
[
0
]
->
shape
();
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
output_image_shape
);
output_tensor
->
ResizeImage
(
output_shape
,
output_image_shape
);
const
index_t
channel_blocks
=
RoundUpDiv4
(
channels
);
const
index_t
width_pixels
=
channel_blocks
*
width
;
const
index_t
batch_height_pixels
=
batch
*
height
;
if
(
kernel_
.
get
()
==
nullptr
)
{
if
(
input_tensors
.
size
()
>
4
)
{
MACE_NOT_IMPLEMENTED
;
...
...
@@ -55,11 +46,26 @@ void AddNFunctor<DeviceType::OPENCL, T>::operator()(
built_options
.
emplace
(
MakeString
(
"-DINPUT_NUM="
,
input_tensors
.
size
()));
kernel_
=
runtime
->
BuildKernel
(
"addn"
,
kernel_name
,
built_options
);
}
std
::
vector
<
index_t
>
output_shape
=
input_tensors
[
0
]
->
shape
();
const
index_t
channel_blocks
=
RoundUpDiv4
(
channels
);
const
index_t
width_pixels
=
channel_blocks
*
width
;
const
index_t
batch_height_pixels
=
batch
*
height
;
if
(
!
IsVecEqual
(
input_shape_
,
input_tensors
[
0
]
->
shape
()))
{
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
output_image_shape
);
output_tensor
->
ResizeImage
(
output_shape
,
output_image_shape
);
uint32_t
idx
=
0
;
for
(
auto
input
:
input_tensors
)
{
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
}
kernel_
.
setArg
(
idx
++
,
*
(
output_tensor
->
opencl_image
()));
input_shape_
=
input_tensors
[
0
]
->
shape
();
}
const
uint32_t
gws
[
2
]
=
{
static_cast
<
uint32_t
>
(
width_pixels
),
...
...
mace/kernels/opencl/batch_norm_opencl.cc
浏览文件 @
cf234f2a
...
...
@@ -61,7 +61,8 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
}
kernel_
=
runtime
->
BuildKernel
(
"batch_norm"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
scale
->
opencl_image
()));
...
...
@@ -73,6 +74,8 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
}
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
relux_max_limit_
);
input_shape_
=
input
->
shape
();
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
...
...
mace/kernels/opencl/bias_add_opencl.cc
浏览文件 @
cf234f2a
...
...
@@ -33,10 +33,13 @@ void BiasAddFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
kernel_
=
runtime
->
BuildKernel
(
"bias_add"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
bias
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
input_shape_
=
input
->
shape
();
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
...
...
mace/kernels/opencl/channel_shuffle.cc
浏览文件 @
cf234f2a
...
...
@@ -13,9 +13,10 @@ namespace mace {
namespace
kernels
{
template
<
typename
T
>
void
ChannelShuffleFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
)
{
void
ChannelShuffleFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
)
{
output
->
ResizeLike
(
input
);
const
index_t
batch
=
input
->
dim
(
0
);
...
...
@@ -39,12 +40,15 @@ void ChannelShuffleFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *inpu
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
kernel_
=
runtime
->
BuildKernel
(
"channel_shuffle"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
groups_
);
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
channels_per_group
));
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
input_shape_
=
input
->
shape
();
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
group_channel_blocks
),
static_cast
<
uint32_t
>
(
width
),
...
...
mace/kernels/opencl/concat.cc
浏览文件 @
cf234f2a
...
...
@@ -15,6 +15,7 @@ static void Concat2(cl::Kernel *kernel,
const
Tensor
*
input0
,
const
Tensor
*
input1
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
StatsFuture
*
future
)
{
const
index_t
batch
=
output
->
dim
(
0
);
...
...
@@ -41,6 +42,8 @@ static void Concat2(cl::Kernel *kernel,
}
*
kernel
=
runtime
->
BuildKernel
(
"concat"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input0
->
shape
()))
{
uint32_t
idx
=
0
;
kernel
->
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
input0
->
opencl_image
())));
...
...
@@ -49,6 +52,7 @@ static void Concat2(cl::Kernel *kernel,
kernel
->
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input0
->
dim
(
3
)));
kernel
->
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
output
->
opencl_image
())));
*
prev_input_shape
=
input0
->
shape
();
}
const
uint32_t
gws
[
3
]
=
{
...
...
@@ -142,7 +146,7 @@ void ConcatFunctor<DeviceType::OPENCL, T>::operator()(
switch
(
inputs_count
)
{
case
2
:
Concat2
(
&
kernel_
,
input_list
[
0
],
input_list
[
1
],
DataTypeToEnum
<
T
>::
value
,
output
,
future
);
&
input_shape_
,
output
,
future
);
break
;
default:
if
(
divisible_four
)
{
...
...
mace/kernels/opencl/conv_2d_opencl.cc
浏览文件 @
cf234f2a
...
...
@@ -18,6 +18,7 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
StatsFuture
*
future
);
...
...
@@ -31,6 +32,7 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
StatsFuture
*
future
);
...
...
@@ -44,6 +46,7 @@ extern void Conv2dOpencl(cl::Kernel *kernel,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
StatsFuture
*
future
);
...
...
@@ -57,8 +60,8 @@ void Conv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
cl
::
Kernel
*
kernel
,
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
stride
,
const
int
*
padding
,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
DataType
dt
,
Tensor
*
output
,
StatsFuture
*
future
);
const
float
relux_max_limit
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
input_shape
,
Tensor
*
output
,
StatsFuture
*
future
);
// Selection matrix: kernel_size x stride_size
static
const
Conv2dOpenclFunction
selector
[
5
]
=
{
Conv2dOpenclK1x1
,
nullptr
,
Conv2dOpenclK3x3
,
nullptr
,
nullptr
};
...
...
@@ -97,11 +100,11 @@ void Conv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
auto
conv2d_func
=
selector
[
kernel_h
-
1
];
conv2d_func
(
&
kernel_
,
input
,
filter
,
bias
,
strides_
[
0
],
paddings
.
data
(),
dilations_
,
activation_
,
relux_max_limit_
,
DataTypeToEnum
<
T
>::
value
,
output
,
future
);
DataTypeToEnum
<
T
>::
value
,
&
input_shape_
,
output
,
future
);
}
else
{
Conv2dOpencl
(
&
kernel_
,
input
,
filter
,
bias
,
strides_
[
0
],
paddings
.
data
(),
dilations_
,
activation_
,
relux_max_limit_
,
DataTypeToEnum
<
T
>::
value
,
output
,
future
);
DataTypeToEnum
<
T
>::
value
,
&
input_shape_
,
output
,
future
);
}
}
...
...
mace/kernels/opencl/conv_2d_opencl_1x1.cc
浏览文件 @
cf234f2a
...
...
@@ -20,6 +20,7 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
StatsFuture
*
future
)
{
const
index_t
batch
=
output
->
dim
(
0
);
...
...
@@ -68,6 +69,8 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
auto
runtime
=
OpenCLRuntime
::
Global
();
*
kernel
=
runtime
->
BuildKernel
(
"conv_2d_1x1"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
kernel
->
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
filter
->
opencl_image
()));
...
...
@@ -83,6 +86,8 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
height
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
width
));
kernel
->
setArg
(
idx
++
,
stride
);
*
prev_input_shape
=
input
->
shape
();
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
...
...
mace/kernels/opencl/conv_2d_opencl_3x3.cc
浏览文件 @
cf234f2a
...
...
@@ -22,6 +22,7 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
StatsFuture
*
future
)
{
const
index_t
batch
=
output
->
dim
(
0
);
...
...
@@ -62,7 +63,8 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel,
auto
runtime
=
OpenCLRuntime
::
Global
();
*
kernel
=
runtime
->
BuildKernel
(
"conv_2d_3x3"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
kernel
->
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
filter
->
opencl_image
()));
...
...
@@ -81,6 +83,8 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel,
kernel
->
setArg
(
idx
++
,
padding
[
1
]
/
2
);
kernel
->
setArg
(
idx
++
,
dilations
[
0
]);
kernel
->
setArg
(
idx
++
,
dilations
[
1
]);
*
prev_input_shape
=
input
->
shape
();
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
...
...
mace/kernels/opencl/conv_2d_opencl_general.cc
浏览文件 @
cf234f2a
...
...
@@ -22,6 +22,7 @@ extern void Conv2dOpencl(cl::Kernel *kernel,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
StatsFuture
*
future
)
{
const
index_t
batch
=
output
->
dim
(
0
);
...
...
@@ -62,7 +63,8 @@ extern void Conv2dOpencl(cl::Kernel *kernel,
auto
runtime
=
OpenCLRuntime
::
Global
();
*
kernel
=
runtime
->
BuildKernel
(
"conv_2d"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
kernel
->
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
filter
->
opencl_image
()));
...
...
@@ -83,6 +85,8 @@ extern void Conv2dOpencl(cl::Kernel *kernel,
kernel
->
setArg
(
idx
++
,
padding
[
1
]
/
2
);
kernel
->
setArg
(
idx
++
,
dilations
[
0
]);
kernel
->
setArg
(
idx
++
,
dilations
[
1
]);
*
prev_input_shape
=
input
->
shape
();
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
...
...
mace/kernels/opencl/depthwise_conv_opencl.cc
浏览文件 @
cf234f2a
...
...
@@ -21,6 +21,7 @@ void DepthwiseConv2d(cl::Kernel *kernel,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
DataType
dt
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
StatsFuture
*
future
)
{
const
index_t
batch
=
output
->
dim
(
0
);
...
...
@@ -35,17 +36,6 @@ void DepthwiseConv2d(cl::Kernel *kernel,
const
index_t
input_channel_blocks
=
RoundUpDiv4
(
input_channels
);
const
index_t
width_blocks
=
RoundUpDiv4
(
width
);
if
(
kernel
->
get
()
==
nullptr
)
{
const
index_t
input_batch
=
input
->
dim
(
0
);
const
index_t
input_height
=
input
->
dim
(
1
);
const
index_t
input_width
=
input
->
dim
(
2
);
const
index_t
filter_height
=
filter
->
dim
(
0
);
const
index_t
filter_width
=
filter
->
dim
(
1
);
MACE_CHECK
(
multiplier
==
1
,
"Multiplier > 1 not supported"
);
MACE_CHECK
(
multiplier
*
input_channels
==
channels
);
MACE_CHECK
(
filter
->
dim
(
2
)
==
input_channels
,
filter
->
dim
(
2
),
"!="
,
input_channels
);
auto
runtime
=
OpenCLRuntime
::
Global
();
std
::
set
<
std
::
string
>
built_options
;
std
::
string
kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
"depthwise_conv2d"
);
...
...
@@ -80,6 +70,18 @@ void DepthwiseConv2d(cl::Kernel *kernel,
*
kernel
=
runtime
->
BuildKernel
(
"depthwise_conv2d"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input
->
shape
()))
{
const
index_t
input_batch
=
input
->
dim
(
0
);
const
index_t
input_height
=
input
->
dim
(
1
);
const
index_t
input_width
=
input
->
dim
(
2
);
const
index_t
filter_height
=
filter
->
dim
(
0
);
const
index_t
filter_width
=
filter
->
dim
(
1
);
MACE_CHECK
(
multiplier
==
1
,
"Multiplier > 1 not supported"
);
MACE_CHECK
(
multiplier
*
input_channels
==
channels
);
MACE_CHECK
(
filter
->
dim
(
2
)
==
input_channels
,
filter
->
dim
(
2
),
"!="
,
input_channels
);
uint32_t
idx
=
0
;
kernel
->
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
...
...
@@ -102,6 +104,7 @@ void DepthwiseConv2d(cl::Kernel *kernel,
kernel
->
setArg
(
idx
++
,
static_cast
<
short
>
(
dilations
[
0
]));
kernel
->
setArg
(
idx
++
,
static_cast
<
short
>
(
dilations
[
1
]));
}
*
prev_input_shape
=
input
->
shape
();
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
...
...
@@ -120,9 +123,7 @@ void DepthwiseConv2dFunctor<DeviceType::OPENCL, T>::operator()(
const
Tensor
*
bias
,
Tensor
*
output
,
StatsFuture
*
future
)
{
typedef
void
(
*
Conv2dOpenclFunction
)(
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
Tensor
*
output
,
StatsFuture
*
future
);
index_t
kernel_h
=
filter
->
dim
(
2
);
index_t
kernel_w
=
filter
->
dim
(
3
);
if
(
strides_
[
0
]
!=
strides_
[
1
])
{
...
...
@@ -163,7 +164,7 @@ void DepthwiseConv2dFunctor<DeviceType::OPENCL, T>::operator()(
DepthwiseConv2d
(
&
kernel_
,
input
,
filter
,
bias
,
strides_
[
0
],
paddings
.
data
(),
dilations_
,
activation_
,
relux_max_limit_
,
DataTypeToEnum
<
T
>::
value
,
output
,
future
);
DataTypeToEnum
<
T
>::
value
,
&
input_shape_
,
output
,
future
);
}
template
struct
DepthwiseConv2dFunctor
<
DeviceType
::
OPENCL
,
float
>;
...
...
mace/kernels/opencl/eltwise_opencl.cc
浏览文件 @
cf234f2a
...
...
@@ -36,6 +36,8 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
if
(
!
coeff_
.
empty
())
built_options
.
emplace
(
"-DCOEFF_SUM"
);
kernel_
=
runtime
->
BuildKernel
(
"eltwise"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
input_shape_
,
input0
->
shape
()))
{
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
input0
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
input1
->
opencl_image
()));
...
...
@@ -44,6 +46,7 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
kernel_
.
setArg
(
idx
++
,
coeff_
[
1
]);
}
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
input_shape_
=
input0
->
shape
();
}
const
uint32_t
gws
[
2
]
=
{
static_cast
<
uint32_t
>
(
width_pixels
),
...
...
mace/kernels/opencl/fully_connected_opencl.cc
浏览文件 @
cf234f2a
...
...
@@ -13,6 +13,7 @@ void FCWXKernel(cl::Kernel *kernel,
const
Tensor
*
input
,
const
Tensor
*
weight
,
const
Tensor
*
bias
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
const
ActivationType
activation
,
std
::
vector
<
uint32_t
>
&
gws
,
...
...
@@ -67,6 +68,11 @@ void FCWXKernel(cl::Kernel *kernel,
const
uint32_t
inter_local_blks
=
kwg_size
/
(
gws
[
0
]
*
gws
[
1
]);
lws
=
{
gws
[
0
],
gws
[
1
],
inter_local_blks
};
}
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input
->
shape
()))
{
const
index_t
batch
=
output
->
dim
(
0
);
const
index_t
output_blocks
=
RoundUpDiv4
(
output
->
dim
(
3
));
uint32_t
idx
=
0
;
kernel
->
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
weight
->
opencl_image
()));
...
...
@@ -80,6 +86,10 @@ void FCWXKernel(cl::Kernel *kernel,
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
RoundUpDiv4
(
input
->
dim
(
3
))));
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
output_blocks
));
kernel
->
setArg
(
idx
++
,
relux_max_limit
);
gws
[
2
]
=
static_cast
<
uint32_t
>
(
batch
*
output_blocks
);
*
prev_input_shape
=
input
->
shape
();
}
cl
::
Event
event
;
cl_int
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
...
...
@@ -103,6 +113,7 @@ void FCWTXKernel(cl::Kernel *kernel,
const
Tensor
*
input
,
const
Tensor
*
weight
,
const
Tensor
*
bias
,
std
::
vector
<
index_t
>
*
prev_input_shape
,
Tensor
*
output
,
const
ActivationType
activation
,
std
::
vector
<
uint32_t
>
&
gws
,
...
...
@@ -141,6 +152,9 @@ void FCWTXKernel(cl::Kernel *kernel,
*
kernel
=
runtime
->
BuildKernel
(
"fully_connected"
,
kernel_name
,
built_options
);
lws
=
{
16
,
64
,
1
};
}
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
kernel
->
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
weight
->
opencl_image
()));
...
...
@@ -155,14 +169,13 @@ void FCWTXKernel(cl::Kernel *kernel,
kernel
->
setArg
(
idx
++
,
relux_max_limit
);
const
index_t
batch
=
output
->
dim
(
0
);
const
index_t
output_size
=
output
->
dim
(
3
);
const
index_t
output_blocks
=
RoundUpDiv4
(
output_size
);
const
index_t
output_blocks
=
RoundUpDiv4
(
output
->
dim
(
3
));
gws
=
{
static_cast
<
uint32_t
>
(
batch
),
static_cast
<
uint32_t
>
(
output_blocks
),
};
lws
=
{
16
,
64
,
1
};
*
prev_input_shape
=
input
->
shape
();
}
std
::
stringstream
ss
;
...
...
@@ -185,11 +198,11 @@ void FullyConnectedFunctor<DeviceType::OPENCL, T>::operator()(
output
->
ResizeImage
(
output_shape
,
output_image_shape
);
if
(
weight_type_
==
BufferType
::
WEIGHT_HEIGHT
)
{
FCWTXKernel
<
T
>
(
&
kernel_
,
input
,
weight
,
bias
,
output
,
FCWTXKernel
<
T
>
(
&
kernel_
,
input
,
weight
,
bias
,
&
input_shape_
,
output
,
activation_
,
gws_
,
lws_
,
relux_max_limit_
,
future
);
}
else
{
FCWXKernel
<
T
>
(
&
kernel_
,
input
,
weight
,
bias
,
output
,
activation_
,
gws_
,
lws_
,
relux_max_limit_
,
future
);
FCWXKernel
<
T
>
(
&
kernel_
,
input
,
weight
,
bias
,
&
input_shape_
,
output
,
activation_
,
gws_
,
lws_
,
relux_max_limit_
,
future
);
}
};
...
...
mace/kernels/opencl/helper.h
浏览文件 @
cf234f2a
...
...
@@ -71,6 +71,13 @@ inline bool LimitKernelTime() {
return
flag
!=
nullptr
&&
strlen
(
flag
)
==
1
&&
flag
[
0
]
==
'1'
;
}
template
<
typename
T
>
bool
IsVecEqual
(
const
std
::
vector
<
T
>
&
input0
,
const
std
::
vector
<
T
>
&
input1
)
{
return
((
input0
.
size
()
==
input1
.
size
())
&&
(
std
::
equal
(
input0
.
begin
(),
input0
.
end
(),
input1
.
begin
())));
}
namespace
{
template
<
typename
T
>
void
AppendToStream
(
std
::
stringstream
*
ss
,
const
std
::
string
&
delimiter
,
T
v
)
{
...
...
mace/kernels/opencl/matmul.cc
浏览文件 @
cf234f2a
...
...
@@ -36,17 +36,16 @@ void MatMulFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *A,
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
kernel_
=
runtime
->
BuildKernel
(
"matmul"
,
kernel_name
,
built_options
);
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
A
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
B
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
C
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
height
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
width
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
A
->
dim
(
2
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
height_blocks
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
RoundUpDiv4
(
A
->
dim
(
2
))));
}
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
A
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
B
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
C
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
height
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
width
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
A
->
dim
(
2
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
height_blocks
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
RoundUpDiv4
(
A
->
dim
(
2
))));
const
uint32_t
gws
[
2
]
=
{
static_cast
<
uint32_t
>
(
width_blocks
),
...
...
mace/kernels/opencl/pooling_opencl.cc
浏览文件 @
cf234f2a
...
...
@@ -17,31 +17,6 @@ void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
StatsFuture
*
future
)
{
MACE_CHECK
(
dilations_
[
0
]
==
1
&&
dilations_
[
1
]
==
1
)
<<
"Pooling opencl kernel not support dilation yet"
;
std
::
vector
<
index_t
>
output_shape
(
4
);
std
::
vector
<
index_t
>
filter_shape
=
{
kernels_
[
0
],
kernels_
[
1
],
input
->
dim
(
3
),
input
->
dim
(
3
)};
std
::
vector
<
int
>
paddings
(
2
);
if
(
paddings_
.
empty
())
{
kernels
::
CalcNHWCPaddingAndOutputSize
(
input
->
shape
().
data
(),
filter_shape
.
data
(),
dilations_
,
strides_
,
padding_type_
,
output_shape
.
data
(),
paddings
.
data
());
}
else
{
paddings
=
paddings_
;
CalcOutputSize
(
input
->
shape
().
data
(),
filter_shape
.
data
(),
paddings_
.
data
(),
dilations_
,
strides_
,
RoundType
::
CEIL
,
output_shape
.
data
());
}
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
output_image_shape
);
output
->
ResizeImage
(
output_shape
,
output_image_shape
);
index_t
batch
=
output
->
dim
(
0
);
index_t
out_height
=
output
->
dim
(
1
);
index_t
out_width
=
output
->
dim
(
2
);
index_t
channels
=
output
->
dim
(
3
);
index_t
channel_blocks
=
(
channels
+
3
)
/
4
;
if
(
kernel_
.
get
()
==
nullptr
)
{
const
DataType
dt
=
DataTypeToEnum
<
T
>::
value
;
...
...
@@ -62,18 +37,49 @@ void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
}
kernel_
=
runtime
->
BuildKernel
(
"pooling"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
std
::
vector
<
index_t
>
output_shape
(
4
);
std
::
vector
<
index_t
>
filter_shape
=
{
kernels_
[
0
],
kernels_
[
1
],
input
->
dim
(
3
),
input
->
dim
(
3
)};
std
::
vector
<
int
>
paddings
(
2
);
if
(
paddings_
.
empty
())
{
kernels
::
CalcNHWCPaddingAndOutputSize
(
input
->
shape
().
data
(),
filter_shape
.
data
(),
dilations_
,
strides_
,
padding_type_
,
output_shape
.
data
(),
paddings
.
data
());
}
else
{
paddings
=
paddings_
;
CalcOutputSize
(
input
->
shape
().
data
(),
filter_shape
.
data
(),
paddings_
.
data
(),
dilations_
,
strides_
,
RoundType
::
CEIL
,
output_shape
.
data
());
}
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
output_image_shape
);
output
->
ResizeImage
(
output_shape
,
output_image_shape
);
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input
->
dim
(
1
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input
->
dim
(
2
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
out
_height
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
out
put
->
dim
(
1
)
));
kernel_
.
setArg
(
idx
++
,
paddings
[
0
]
/
2
);
kernel_
.
setArg
(
idx
++
,
paddings
[
1
]
/
2
);
kernel_
.
setArg
(
idx
++
,
strides_
[
0
]);
kernel_
.
setArg
(
idx
++
,
kernels_
[
0
]);
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
input_shape_
=
input
->
shape
();
}
index_t
batch
=
output
->
dim
(
0
);
index_t
out_height
=
output
->
dim
(
1
);
index_t
out_width
=
output
->
dim
(
2
);
index_t
channels
=
output
->
dim
(
3
);
index_t
channel_blocks
=
(
channels
+
3
)
/
4
;
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
static_cast
<
uint32_t
>
(
out_width
),
static_cast
<
uint32_t
>
(
batch
*
out_height
),
...
...
mace/kernels/opencl/resize_bilinear_opencl.cc
浏览文件 @
cf234f2a
...
...
@@ -25,6 +25,18 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
const
index_t
out_width
=
out_width_
;
if
(
kernel_
.
get
()
==
nullptr
)
{
auto
runtime
=
OpenCLRuntime
::
Global
();
std
::
set
<
std
::
string
>
built_options
;
std
::
string
kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
"resize_bilinear_nocache"
);
built_options
.
emplace
(
"-Dresize_bilinear_nocache="
+
kernel_name
);
auto
dt
=
DataTypeToEnum
<
T
>::
value
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
kernel_
=
runtime
->
BuildKernel
(
"resize_bilinear"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
MACE_CHECK
(
out_height
>
0
&&
out_width
>
0
);
std
::
vector
<
index_t
>
output_shape
{
batch
,
out_height
,
out_width
,
channels
};
...
...
@@ -38,16 +50,6 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
float
width_scale
=
CalculateResizeScale
(
in_width
,
out_width
,
align_corners_
);
auto
runtime
=
OpenCLRuntime
::
Global
();
std
::
set
<
std
::
string
>
built_options
;
std
::
string
kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
"resize_bilinear_nocache"
);
built_options
.
emplace
(
"-Dresize_bilinear_nocache="
+
kernel_name
);
auto
dt
=
DataTypeToEnum
<
T
>::
value
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
kernel_
=
runtime
->
BuildKernel
(
"resize_bilinear"
,
kernel_name
,
built_options
);
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
...
...
@@ -56,6 +58,9 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
in_height
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
in_width
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
out_height
));
input_shape_
=
input
->
shape
();
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
...
...
mace/kernels/opencl/softmax_opencl.cc
浏览文件 @
cf234f2a
...
...
@@ -34,11 +34,14 @@ void SoftmaxFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *logits,
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
kernel_
=
runtime
->
BuildKernel
(
"softmax"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
input_shape_
,
logits
->
shape
()))
{
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
logits
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
channels
));
kernel_
.
setArg
(
idx
++
,
remain_channels
);
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
input_shape_
=
logits
->
shape
();
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
static_cast
<
uint32_t
>
(
width
),
...
...
mace/kernels/opencl/space_to_batch_opencl.cc
浏览文件 @
cf234f2a
...
...
@@ -43,6 +43,8 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, T>::operator()(
kernel_
=
runtime
->
BuildKernel
(
"space_to_batch"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
space_shape_
,
space_tensor
->
shape
()))
{
uint32_t
idx
=
0
;
if
(
b2s_
)
{
kernel_
.
setArg
(
idx
++
,
*
(
batch_tensor
->
opencl_image
()));
...
...
@@ -59,6 +61,8 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, T>::operator()(
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
space_tensor
->
dim
(
2
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
batch_tensor
->
dim
(
1
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
batch_tensor
->
dim
(
2
)));
space_shape_
=
space_tensor
->
shape
();
}
const
uint32_t
chan_blk
=
RoundUpDiv4
<
uint32_t
>
(
batch_tensor
->
dim
(
3
));
...
...
mace/kernels/opencl/winograd_transform.cc
浏览文件 @
cf234f2a
...
...
@@ -14,6 +14,21 @@ namespace kernels {
template
<
typename
T
>
void
WinogradTransformFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
input_tensor
,
Tensor
*
output_tensor
,
StatsFuture
*
future
)
{
if
(
kernel_
.
get
()
==
nullptr
)
{
std
::
string
obfuscated_kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
"winograd_transform_2x2"
);
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
"-Dwinograd_transform_2x2="
+
obfuscated_kernel_name
);
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
DataTypeToEnum
<
T
>::
value
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
DataTypeToEnum
<
T
>::
value
));
auto
runtime
=
OpenCLRuntime
::
Global
();
kernel_
=
runtime
->
BuildKernel
(
"winograd_transform"
,
obfuscated_kernel_name
,
built_options
);
}
std
::
vector
<
index_t
>
output_shape
(
4
);
std
::
vector
<
index_t
>
filter_shape
=
{
3
,
3
,
input_tensor
->
dim
(
3
),
1
};
std
::
vector
<
int
>
paddings
(
2
);
...
...
@@ -27,29 +42,16 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(
paddings_
.
data
(),
dilations_
.
data
(),
strides_
.
data
(),
RoundType
::
FLOOR
,
output_shape
.
data
());
}
const
index_t
round_h
=
(
output_shape
[
1
]
+
1
)
/
2
;
const
index_t
round_w
=
(
output_shape
[
2
]
+
1
)
/
2
;
const
index_t
out_width
=
input_tensor
->
dim
(
0
)
*
round_h
*
round_w
;
if
(
kernel_
.
get
()
==
nullptr
)
{
if
(
!
IsVecEqual
(
input_shape_
,
input_tensor
->
shape
())
)
{
output_shape
=
{
16
,
input_tensor
->
dim
(
3
),
out_width
,
1
};
std
::
vector
<
size_t
>
image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_HEIGHT
,
image_shape
);
output_tensor
->
ResizeImage
(
output_shape
,
image_shape
);
std
::
string
obfuscated_kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
"winograd_transform_2x2"
);
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
"-Dwinograd_transform_2x2="
+
obfuscated_kernel_name
);
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
DataTypeToEnum
<
T
>::
value
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
DataTypeToEnum
<
T
>::
value
));
auto
runtime
=
OpenCLRuntime
::
Global
();
kernel_
=
runtime
->
BuildKernel
(
"winograd_transform"
,
obfuscated_kernel_name
,
built_options
);
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
input_tensor
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
output_tensor
->
opencl_image
()));
...
...
@@ -60,6 +62,8 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
round_w
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
paddings
[
0
]
/
2
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
paddings
[
1
]
/
2
));
input_shape_
=
input_tensor
->
shape
();
}
const
uint32_t
gws
[
2
]
=
{
...
...
@@ -79,11 +83,6 @@ void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(
const
Tensor
*
bias
,
Tensor
*
output_tensor
,
StatsFuture
*
future
)
{
std
::
vector
<
index_t
>
output_shape
=
{
batch_
,
height_
,
width_
,
input_tensor
->
dim
(
1
)};
std
::
vector
<
size_t
>
image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
image_shape
);
output_tensor
->
ResizeImage
(
output_shape
,
image_shape
);
if
(
kernel_
.
get
()
==
nullptr
)
{
std
::
string
obfuscated_kernel_name
=
...
...
@@ -121,6 +120,13 @@ void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(
auto
runtime
=
OpenCLRuntime
::
Global
();
kernel_
=
runtime
->
BuildKernel
(
"winograd_transform"
,
obfuscated_kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
input_shape_
,
input_tensor
->
shape
()))
{
std
::
vector
<
index_t
>
output_shape
=
{
batch_
,
height_
,
width_
,
input_tensor
->
dim
(
1
)};
std
::
vector
<
size_t
>
image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
image_shape
);
output_tensor
->
ResizeImage
(
output_shape
,
image_shape
);
const
uint32_t
round_h
=
(
height_
+
1
)
/
2
;
const
uint32_t
round_w
=
(
width_
+
1
)
/
2
;
...
...
@@ -139,6 +145,8 @@ void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
round_h
*
round_w
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
round_w
));
kernel_
.
setArg
(
idx
++
,
relux_max_limit_
);
input_shape_
=
input_tensor
->
shape
();
}
const
uint32_t
gws
[
2
]
=
{
...
...
mace/kernels/pooling.h
浏览文件 @
cf234f2a
...
...
@@ -182,6 +182,7 @@ struct PoolingFunctor<DeviceType::OPENCL, T> : PoolingFunctorBase {
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namespace kernels
...
...
mace/kernels/resize_bilinear.h
浏览文件 @
cf234f2a
...
...
@@ -172,6 +172,7 @@ struct ResizeBilinearFunctor<DeviceType::OPENCL, T>
void
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namespace kernels
...
...
mace/kernels/softmax.h
浏览文件 @
cf234f2a
...
...
@@ -57,6 +57,7 @@ struct SoftmaxFunctor<DeviceType::OPENCL, T> {
void
operator
()(
const
Tensor
*
logits
,
Tensor
*
output
,
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namepsace kernels
...
...
mace/kernels/space_to_batch.h
浏览文件 @
cf234f2a
...
...
@@ -54,6 +54,7 @@ struct SpaceToBatchFunctor<DeviceType::OPENCL, T> : SpaceToBatchFunctorBase {
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
space_shape_
;
};
}
// namespace kernels
...
...
mace/kernels/winograd_transform.h
浏览文件 @
cf234f2a
...
...
@@ -49,6 +49,7 @@ struct WinogradTransformFunctor<DeviceType::OPENCL, T>
void
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
input_shape_
;
};
struct
WinogradInverseTransformFunctorBase
{
...
...
@@ -105,6 +106,7 @@ struct WinogradInverseTransformFunctor<DeviceType::OPENCL, T>
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namespace kernels
...
...
tools/wino_conv.py
浏览文件 @
cf234f2a
...
...
@@ -2,22 +2,89 @@ import numpy as np
import
math
import
tensorflow
as
tf
A_T
=
np
.
array
([[
1
,
1
,
1
,
0
],
[
0
,
1
,
-
1
,
-
1
]]).
astype
(
np
.
float32
)
A
=
np
.
transpose
(
A_T
)
B_T
=
np
.
array
([
A_T
=
{}
A
=
{}
B_T
=
{}
B
=
{}
G
=
{}
G_T
=
{}
# f(2, 3)
A_T
[
4
]
=
np
.
array
([[
1
,
1
,
1
,
0
],
[
0
,
1
,
-
1
,
-
1
]]).
astype
(
np
.
float32
)
A
[
4
]
=
np
.
transpose
(
A_T
[
4
])
B_T
[
4
]
=
np
.
array
([
[
1
,
0
,
-
1
,
0
],
[
0
,
1
,
1
,
0
],
[
0
,
-
1
,
1
,
0
],
[
0
,
1
,
0
,
-
1
]
]).
astype
(
np
.
float32
)
B
=
np
.
transpose
(
B_T
)
G
=
np
.
array
([
B
[
4
]
=
np
.
transpose
(
B_T
[
4
]
)
G
[
4
]
=
np
.
array
([
[
1
,
0
,
0
],
[
0.5
,
0.5
,
0.5
],
[
0.5
,
-
0.5
,
0.5
],
[
0
,
0
,
1
],
]).
astype
(
np
.
float32
)
G_T
=
np
.
transpose
(
G
)
G_T
[
4
]
=
np
.
transpose
(
G
[
4
])
# f(4, 3)
A_T
[
6
]
=
np
.
array
([
[
1
,
1
,
1
,
1
,
1
,
0
],
[
0
,
1
,
-
1
,
2
,
-
2
,
0
],
[
0
,
1
,
1
,
4
,
4
,
0
],
[
0
,
1
,
-
1
,
8
,
-
8
,
1
],
]).
astype
(
np
.
float32
)
A
[
6
]
=
np
.
transpose
(
A_T
[
6
])
B_T
[
6
]
=
np
.
array
([
[
4
,
0
,
-
5
,
0
,
1
,
0
],
[
0
,
-
4
,
-
4
,
1
,
1
,
0
],
[
0
,
4
,
-
4
,
-
1
,
1
,
0
],
[
0
,
-
2
,
-
1
,
2
,
1
,
0
],
[
0
,
2
,
-
1
,
-
2
,
1
,
0
],
[
0
,
4
,
0
,
-
5
,
0
,
1
],
]).
astype
(
np
.
float32
)
B
[
6
]
=
np
.
transpose
(
B_T
[
6
])
G
[
6
]
=
np
.
array
([
[
1
/
4.0
,
0
,
0
],
[
-
1
/
6.0
,
-
1
/
6.0
,
-
1
/
6.0
],
[
-
1
/
6.0
,
1
/
6.0
,
-
1
/
6.0
],
[
1
/
24.0
,
1
/
12.0
,
1
/
6.0
],
[
1
/
24.0
,
-
1
/
12.0
,
1
/
6.0
],
[
0
,
0
,
1
],
]).
astype
(
np
.
float32
)
G_T
[
6
]
=
np
.
transpose
(
G
[
6
])
# f(6, 3)
A_T
[
8
]
=
np
.
array
([
[
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
],
[
0
,
1
,
-
1
,
2
,
-
2
,
1
/
2.
,
-
1
/
2.
,
0
],
[
0
,
1
,
1
,
4
,
4
,
1
/
4.
,
1
/
4.
,
0
],
[
0
,
1
,
-
1
,
8
,
-
8
,
1
/
8.
,
-
1
/
8.
,
0
],
[
0
,
1
,
1
,
16
,
16
,
1
/
16.
,
1
/
16.
,
0
],
[
0
,
1
,
-
1
,
32
,
-
32
,
1
/
32.
,
-
1
/
32.
,
1
],
]).
astype
(
np
.
float32
)
A
[
8
]
=
np
.
transpose
(
A_T
[
8
])
B_T
[
8
]
=
np
.
array
([
[
1
,
0
,
-
21
/
4.
,
0
,
21
/
4.
,
0
,
-
1
,
0
],
[
0
,
1
,
1
,
-
17
/
4.
,
-
17
/
4.
,
1
,
1
,
0
],
[
0
,
-
1
,
1
,
17
/
4.
,
-
17
/
4.
,
-
1
,
1
,
0
],
[
0
,
1
/
2.
,
1
/
4.
,
-
5
/
2.
,
-
5
/
4.
,
2
,
1
,
0
],
[
0
,
-
1
/
2.
,
1
/
4.
,
5
/
2.
,
-
5
/
4.
,
-
2
,
1
,
0
],
[
0
,
2
,
4
,
-
5
/
2.
,
-
5
,
1
/
2.
,
1
,
0
],
[
0
,
-
2
,
4
,
5
/
2.
,
-
5
,
-
1
/
2.
,
1
,
0
],
[
0
,
-
1
,
0
,
21
/
4.
,
0
,
-
21
/
4.
,
0
,
1
],
]).
astype
(
np
.
float32
)
B
[
8
]
=
np
.
transpose
(
B_T
[
8
])
G
[
8
]
=
np
.
array
([
[
1
,
0
,
0
],
[
-
2
/
9.
,
-
2
/
9.
,
-
2
/
9.
],
[
-
2
/
9.
,
2
/
9.
,
-
2
/
9.
],
[
1
/
90.
,
1
/
45.
,
2
/
45.
],
[
1
/
90.
,
-
1
/
45.
,
2
/
45.
],
[
32
/
45.
,
16
/
45.
,
8
/
45.
],
[
32
/
45.
,
-
16
/
45.
,
8
/
45.
],
[
0
,
0
,
1
],
]).
astype
(
np
.
float32
)
G_T
[
8
]
=
np
.
transpose
(
G
[
8
])
def
output_shape
(
input_shape
,
filter_shape
):
...
...
@@ -29,55 +96,54 @@ def output_shape(input_shape, filter_shape):
return
out_shape
def
winog_conv
(
input
,
filter
):
m
=
2
r
=
3
def
winograd_conv
(
m
,
r
,
input
,
filter
):
alpha
=
m
+
r
-
1
print
'Winograd(m = %d, r = %d, tile size=%d'
%
(
m
,
r
,
alpha
)
alpha_square
=
alpha
*
alpha
input_shape
=
input
.
shape
filter_shape
=
filter
.
shape
out_shape
=
output_shape
(
input_shape
,
filter_shape
)
K
=
filter_shape
[
0
]
C
=
input_shape
[
1
]
U
=
np
.
zeros
((
K
*
16
,
C
))
U
=
np
.
zeros
((
K
*
alpha_square
,
C
))
for
k
in
range
(
K
):
for
c
in
range
(
C
):
u
=
np
.
dot
(
np
.
dot
(
G
,
filter
[
k
,
c
,
:,
:]),
G_T
)
for
i
in
range
(
4
):
for
j
in
range
(
4
)
:
U
[(
i
*
4
+
j
)
*
K
+
k
,
c
]
=
u
[
i
,
j
]
u
=
np
.
dot
(
np
.
dot
(
G
[
alpha
],
filter
[
k
,
c
,
:,
:]),
G_T
[
alpha
]
)
for
i
in
range
(
alpha
):
for
j
in
range
(
alpha
)
:
U
[(
i
*
alpha
+
j
)
*
K
+
k
,
c
]
=
u
[
i
,
j
]
print
'filter out: '
,
U
.
shape
print
U
[
0
,
0
]
U
.
astype
(
np
.
float32
).
tofile
(
"filter_out"
)
rounded_h
=
int
(
math
.
ceil
(
out_shape
[
2
]
/
2.0
))
rounded_w
=
int
(
math
.
ceil
(
out_shape
[
3
]
/
2.0
))
rounded_h
=
int
(
math
.
ceil
(
out_shape
[
2
]
/
(
m
*
1.0
)
))
rounded_w
=
int
(
math
.
ceil
(
out_shape
[
3
]
/
(
m
*
1.0
)
))
P
=
input_shape
[
0
]
*
rounded_h
*
rounded_w
V
=
np
.
zeros
((
C
*
16
,
P
))
V
=
np
.
zeros
((
C
*
alpha_square
,
P
))
for
p
in
range
(
P
):
for
c
in
range
(
C
):
n
=
p
/
(
rounded_w
*
rounded_h
)
t
=
p
%
(
rounded_h
*
rounded_w
)
h_idx
=
t
/
rounded_w
w_idx
=
t
%
rounded_w
h_start
=
h_idx
*
2
w_start
=
w_idx
*
2
h_end
=
min
(
h_start
+
4
,
input_shape
[
2
])
w_end
=
min
(
w_start
+
4
,
input_shape
[
3
])
d
=
np
.
zeros
((
4
,
4
))
d
[
0
:
h_end
-
h_start
,
0
:
w_end
-
w_start
]
=
input
[
n
,
c
,
h_start
:
h_end
,
w_start
:
w_end
]
v
=
np
.
dot
(
np
.
dot
(
B_T
,
d
),
B
)
for
i
in
range
(
4
):
for
j
in
range
(
4
):
V
[(
i
*
4
+
j
)
*
C
+
c
,
p
]
=
v
[
i
,
j
]
tmp
=
V
.
reshape
(
16
,
C
,
P
,
1
)
h_start
=
h_idx
*
m
w_start
=
w_idx
*
m
h_end
=
min
(
h_start
+
alpha
,
input_shape
[
2
])
w_end
=
min
(
w_start
+
alpha
,
input_shape
[
3
])
d
=
np
.
zeros
((
alpha
,
alpha
))
d
[
0
:
h_end
-
h_start
,
0
:
w_end
-
w_start
]
=
\
input
[
n
,
c
,
h_start
:
h_end
,
w_start
:
w_end
]
v
=
np
.
dot
(
np
.
dot
(
B_T
[
alpha
],
d
),
B
[
alpha
])
for
i
in
range
(
alpha
):
for
j
in
range
(
alpha
):
V
[(
i
*
alpha
+
j
)
*
C
+
c
,
p
]
=
v
[
i
,
j
]
tmp
=
V
.
reshape
(
alpha_square
,
C
,
P
,
1
)
print
'input out: '
,
tmp
.
shape
tmp
.
astype
(
np
.
float32
).
tofile
(
"C"
)
M
=
np
.
zeros
((
16
*
K
,
P
))
for
i
in
range
(
alpha
*
alpha
):
M
=
np
.
zeros
((
alpha_square
*
K
,
P
))
for
i
in
range
(
alpha
_square
):
u
=
U
[
i
*
K
:
(
i
+
1
)
*
K
,
:]
v
=
V
[
i
*
C
:
(
i
+
1
)
*
C
,
:]
M
[
i
*
K
:
(
i
+
1
)
*
K
,
:]
=
np
.
dot
(
u
,
v
)
...
...
@@ -87,17 +153,17 @@ def winog_conv(input, filter):
res
=
np
.
zeros
((
out_shape
[
0
],
out_shape
[
2
],
out_shape
[
3
],
out_shape
[
1
]))
for
k
in
range
(
K
):
for
b
in
range
(
P
):
m
=
np
.
zeros
((
4
,
4
))
for
i
in
range
(
4
):
for
j
in
range
(
4
):
m
[
i
][
j
]
=
M
[(
i
*
4
+
j
)
*
K
+
k
,
b
]
y
=
np
.
dot
(
np
.
dot
(
A_T
,
m
),
A
)
for
i
in
range
(
2
):
for
j
in
range
(
2
):
tm
=
np
.
zeros
((
alpha
,
alpha
))
for
i
in
range
(
alpha
):
for
j
in
range
(
alpha
):
tm
[
i
][
j
]
=
M
[(
i
*
alpha
+
j
)
*
K
+
k
,
b
]
y
=
np
.
dot
(
np
.
dot
(
A_T
[
alpha
],
tm
),
A
[
alpha
]
)
for
i
in
range
(
m
):
for
j
in
range
(
m
):
n
=
b
/
(
rounded_h
*
rounded_w
)
t
=
b
%
(
rounded_h
*
rounded_w
)
p
=
(
t
/
rounded_w
)
*
2
+
i
q
=
(
t
%
rounded_w
)
*
2
+
j
p
=
(
t
/
rounded_w
)
*
m
+
i
q
=
(
t
%
rounded_w
)
*
m
+
j
if
p
>=
out_shape
[
2
]
or
q
>=
out_shape
[
3
]:
continue
res
[
n
,
p
,
q
,
k
]
=
y
[
i
,
j
]
...
...
@@ -115,25 +181,27 @@ def tf_conv(input, filter):
def
main
():
input
=
np
.
random
.
random
([
7
,
61
,
71
,
31
]).
astype
(
np
.
float32
)
input
=
np
.
random
.
random
([
5
,
23
,
29
,
15
]).
astype
(
np
.
float32
)
# input = np.fromfile(file="A", dtype=np.float32)
# input = input.reshape(1, 3, 3, 5)
print
'input shape: '
,
input
.
shape
input
.
tofile
(
"A"
)
filter
=
np
.
random
.
random
([
3
,
3
,
31
,
31
]).
astype
(
np
.
float32
)
#
input.tofile("A")
filter
=
np
.
random
.
random
([
3
,
3
,
15
,
13
]).
astype
(
np
.
float32
)
tf_out
=
tf_conv
(
input
,
filter
)
input
=
input
.
transpose
((
0
,
3
,
1
,
2
))
filter
=
filter
.
transpose
((
3
,
2
,
0
,
1
))
print
'filter shape: '
,
filter
.
shape
filter
.
tofile
(
"filter_in"
)
winog_out
=
winog_conv
(
input
,
filter
)
res
=
np
.
allclose
(
tf_out
,
winog_out
)
if
res
:
print
"=========Pass========="
else
:
print
"=========Failed========="
print
"TF: "
,
tf_out
print
"Winograd: "
,
winog_out
# filter.tofile("filter_in")
for
i
in
[
2
,
4
,
6
]:
print
"==========f(%d,3)=========="
%
i
winograd_out
=
winograd_conv
(
i
,
3
,
input
,
filter
)
res
=
np
.
allclose
(
tf_out
,
winograd_out
)
if
res
:
print
"=========Pass========="
else
:
print
"=========Failed======="
print
"TF: "
,
tf_out
print
"Winograd: "
,
winograd_out
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录