Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
3a424351
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
3a424351
编写于
2月 12, 2019
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refactor: Polish the Eltwise code and add some UTs.
上级
9d3e2cc5
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
162 addition
and
49 deletion
+162
-49
mace/ops/eltwise_test.cc
mace/ops/eltwise_test.cc
+95
-0
mace/ops/opencl/cl/eltwise.cl
mace/ops/opencl/cl/eltwise.cl
+11
-11
mace/ops/opencl/image/eltwise.h
mace/ops/opencl/image/eltwise.h
+42
-24
mace/ops/ops_test_util.cc
mace/ops/ops_test_util.cc
+14
-14
未找到文件。
mace/ops/eltwise_test.cc
浏览文件 @
3a424351
...
...
@@ -14,6 +14,7 @@
#include <vector>
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/eltwise.h"
#include "mace/ops/ops_test_util.h"
...
...
@@ -531,6 +532,100 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorTensor) {
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
},
{
0
,
0
,
0
,
0
,
0
,
25
,
25
,
25
,
25
,
25
});
}
namespace
{
template
<
typename
T
>
void
GPUOverflowTest
(
const
ops
::
EltwiseType
type
,
const
std
::
vector
<
index_t
>
&
shape0
,
const
std
::
vector
<
T
>
&
input0
,
const
std
::
vector
<
index_t
>
&
shape1
,
const
std
::
vector
<
T
>
&
input1
,
const
std
::
vector
<
index_t
>
&
output_shape
,
const
std
::
vector
<
T
>
&
output
)
{
// Construct graph
OpsTestNet
net
;
// Add input data
net
.
AddInputFromArray
<
DeviceType
::
GPU
,
T
>
(
"Input0"
,
shape0
,
input0
);
net
.
AddInputFromArray
<
DeviceType
::
GPU
,
T
>
(
"Input1"
,
shape1
,
input1
);
OpDefBuilder
(
"Eltwise"
,
"EltwiseTest"
)
.
AddIntArg
(
"T"
,
DataTypeToEnum
<
T
>::
v
())
.
Input
(
"Input0"
)
.
Input
(
"Input1"
)
.
AddIntArg
(
"type"
,
static_cast
<
int
>
(
type
))
.
OutputType
({
ops
::
IsLogicalType
(
type
)
?
DT_INT32
:
DT_FLOAT
})
.
Output
(
"EltOutput"
)
.
OutputShape
(
output_shape
)
.
Finalize
(
net
.
AddNewOperatorDef
());
net
.
AddInputFromArray
<
DeviceType
::
GPU
,
T
>
(
"Filter"
,
{
output_shape
.
back
(),
shape0
.
back
(),
3
,
3
},
std
::
vector
<
float
>
(
output_shape
.
back
()
*
shape0
.
back
()
*
9
,
1
));
OpDefBuilder
(
"Conv2D"
,
"Conv2D"
)
.
AddIntArg
(
"T"
,
DataTypeToEnum
<
T
>::
v
())
.
Input
(
"EltOutput"
)
.
Input
(
"Filter"
)
.
Output
(
"Output"
)
.
OutputShape
(
output_shape
)
.
AddIntsArg
(
"strides"
,
{
1
,
1
})
.
AddIntArg
(
"padding"
,
Padding
::
SAME
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
AddNewOperatorDef
());
// Run
net
.
RunOp
(
DeviceType
::
GPU
);
auto
expected
=
net
.
CreateTensor
<
T
>
(
output_shape
,
output
);
ExpectTensorNear
<
T
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
}
}
// namespace
TEST_F
(
EltwiseOpTest
,
GPUOverflowTest
)
{
GPUOverflowTest
<
float
>
(
ops
::
EltwiseType
::
SUM
,
{
1
,
2
,
2
,
2
},
std
::
vector
<
float
>
(
8
,
1
),
{
1
,
1
,
1
,
2
},
{
1
,
1
},
{
1
,
2
,
2
,
1
},
{
16
,
16
,
16
,
16
});
GPUOverflowTest
<
float
>
(
ops
::
EltwiseType
::
SUB
,
{
2
,
2
,
2
,
2
},
std
::
vector
<
float
>
(
16
,
1
),
{
2
,
1
,
1
,
2
},
{
1
,
1
,
2
,
2
},
{
2
,
2
,
2
,
1
},
{
0
,
0
,
0
,
0
,
-
8
,
-
8
,
-
8
,
-
8
});
GPUOverflowTest
<
float
>
(
ops
::
EltwiseType
::
PROD
,
{
1
,
3
,
2
,
1
},
std
::
vector
<
float
>
(
6
,
1
),
{
1
,
3
,
2
,
1
},
std
::
vector
<
float
>
(
6
,
1
),
{
1
,
3
,
2
,
1
},
{
4
,
4
,
6
,
6
,
4
,
4
});
GPUOverflowTest
<
float
>
(
ops
::
EltwiseType
::
DIV
,
{
2
,
3
,
2
,
1
},
std
::
vector
<
float
>
(
12
,
1
),
{
2
,
3
,
2
,
1
},
std
::
vector
<
float
>
(
12
,
1
),
{
2
,
3
,
2
,
1
},
{
4
,
4
,
6
,
6
,
4
,
4
,
4
,
4
,
6
,
6
,
4
,
4
});
GPUOverflowTest
<
float
>
(
ops
::
EltwiseType
::
MIN
,
{
1
,
2
,
2
,
2
},
std
::
vector
<
float
>
(
8
,
1
),
{
1
,
1
,
1
,
2
},
{
1
,
1
},
{
1
,
2
,
2
,
1
},
{
8
,
8
,
8
,
8
});
GPUOverflowTest
<
float
>
(
ops
::
EltwiseType
::
MAX
,
{
2
,
2
,
2
,
2
},
std
::
vector
<
float
>
(
16
,
1
),
{
2
,
1
,
1
,
2
},
{
1
,
1
,
2
,
2
},
{
2
,
2
,
2
,
1
},
{
8
,
8
,
8
,
8
,
16
,
16
,
16
,
16
});
GPUOverflowTest
<
float
>
(
ops
::
EltwiseType
::
NEG
,
{
1
,
3
,
2
,
1
},
std
::
vector
<
float
>
(
6
,
1
),
{
1
,
1
,
1
,
1
},
{
0
},
{
1
,
3
,
2
,
1
},
{
-
4
,
-
4
,
-
6
,
-
6
,
-
4
,
-
4
});
GPUOverflowTest
<
float
>
(
ops
::
EltwiseType
::
ABS
,
{
2
,
3
,
2
,
1
},
std
::
vector
<
float
>
(
12
,
-
1
),
{
1
,
1
,
1
,
1
},
{
0
},
{
2
,
3
,
2
,
1
},
{
4
,
4
,
6
,
6
,
4
,
4
,
4
,
4
,
6
,
6
,
4
,
4
});
GPUOverflowTest
<
float
>
(
ops
::
EltwiseType
::
SQR_DIFF
,
{
2
,
2
,
2
,
2
},
std
::
vector
<
float
>
(
16
,
1
),
{
2
,
1
,
1
,
2
},
{
1
,
1
,
2
,
2
},
{
2
,
2
,
2
,
1
},
{
0
,
0
,
0
,
0
,
8
,
8
,
8
,
8
});
GPUOverflowTest
<
float
>
(
ops
::
EltwiseType
::
POW
,
{
1
,
3
,
2
,
1
},
std
::
vector
<
float
>
(
6
,
1
),
{
1
,
3
,
2
,
1
},
std
::
vector
<
float
>
(
6
,
1
),
{
1
,
3
,
2
,
1
},
{
4
,
4
,
6
,
6
,
4
,
4
});
GPUOverflowTest
<
float
>
(
ops
::
EltwiseType
::
FLOOR_DIV
,
{
2
,
2
,
2
,
2
},
std
::
vector
<
float
>
(
16
,
1
),
{
2
,
1
,
1
,
2
},
{
1
,
1
,
2
,
2
},
{
2
,
2
,
2
,
1
},
{
8
,
8
,
8
,
8
,
0
,
0
,
0
,
0
});
}
namespace
{
template
<
typename
T
>
void
RandomTensorScalar
(
const
ops
::
EltwiseType
type
,
...
...
mace/ops/opencl/cl/eltwise.cl
浏览文件 @
3a424351
...
...
@@ -3,7 +3,7 @@
__kernel
void
eltwise
(
OUT_OF_RANGE_PARAMS
GLOBAL_WORK_GROUP_SIZE_DIM3
__read_only
image2d_t
input0,
#
if
INPUT_TYPE
==
1
#
if
defined
(
INPUT_SCALAR
)
__private
const
float
value,
#
else
__read_only
image2d_t
input1,
...
...
@@ -28,14 +28,14 @@ __kernel void eltwise(OUT_OF_RANGE_PARAMS
const int pos = mad24(chan_idx, width, width_idx);
DATA_TYPE4 in0 = READ_IMAGET(input0, SAMPLER, (int2)(pos, hb));
#if
INPUT_TYPE == 1
#if
defined(INPUT_SCALAR)
DATA_TYPE4 in1 = (DATA_TYPE4)(value, value, value, value);
#elif INPUT_TYPE == 2
#elif defined(INPUT_VECTOR)
DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(chan_idx, 0));
#elif defined(INPUT_BATCH_VECTOR)
const int batch_idx = hb / height;
DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(chan_idx, batch_idx));
#elif INPUT_TYPE == 3
DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(chan_idx, 0));
#elif INPUT_TYPE == 4
#elif defined(INPUT_TENSOR_BC_CHAN)
DATA_TYPE4 tmp = READ_IMAGET(input1, SAMPLER, (int2)(width_idx, hb));
DATA_TYPE4 in1 = (DATA_TYPE4)(tmp.x, tmp.x, tmp.x, tmp.x);
#else
...
...
@@ -89,11 +89,11 @@ __kernel void eltwise(OUT_OF_RANGE_PARAMS
#endif
#endif
#if
((INPUT_TYPE == 1 || INPUT_TYPE == 4) &&
\
(ELTWISE_TYPE == 0 || ELTWISE_TYPE == 1 || ELTWISE_TYPE == 4 ||
\
ELTWISE_TYPE == 5 || ELTWISE_TYPE == 8 || ELTWISE_TYPE == 9)) ||
\
((INPUT_TYPE != 1 || INPUT_TYPE != 4) &&
\
(ELTWISE_TYPE == 3 || ELTWISE_TYPE == 9 |
|
ELTWISE_TYPE
==
11
)
)
#if
defined(NOT_DIVISIBLE_FOUR) &&
\
((ELTWISE_TYPE == 3 || ELTWISE_TYPE == 9 || ELTWISE_TYPE == 11)
\
|| ((defined(INPUT_SCALAR) || defined(INPUT_TENSOR_BC_CHAN)) &&
\
(ELTWISE_TYPE == 0 || ELTWISE_TYPE == 1 || ELTWISE_TYPE == 4 ||
\
ELTWISE_TYPE == 5 |
|
ELTWISE_TYPE
==
8
)
))
const
int
remain_channel
=
channel
-
4
*
chan_idx
;
if
(
remain_channel
<
4
)
{
switch
(
remain_channel
)
{
...
...
mace/ops/opencl/image/eltwise.h
浏览文件 @
3a424351
...
...
@@ -67,25 +67,52 @@ MaceStatus EltwiseKernel<T>::Compute(
const
Tensor
*
input1
,
Tensor
*
output
)
{
bool
swapped
=
false
;
if
(
input1
!=
nullptr
)
{
std
::
string
input1_type
=
""
;
if
(
input1
==
nullptr
)
{
input1_type
=
"INPUT_SCALAR"
;
}
else
{
MACE_CHECK
(
input0
->
dim_size
()
==
input1
->
dim_size
()
||
input0
->
dim_size
()
==
1
||
input1
->
dim_size
()
==
1
)
<<
"Inputs of Eltwise op must be same shape"
;
MACE_CHECK
(
type_
!=
EltwiseType
::
EQUAL
)
<<
"Eltwise op on GPU does not support EQUAL"
;
// broadcast
if
(
input0
->
size
()
!=
input1
->
size
())
{
if
(
input0
->
size
()
<
input1
->
size
())
{
std
::
swap
(
input0
,
input1
);
swapped
=
true
;
}
if
(
input1
->
dim_size
()
==
1
)
{
MACE_CHECK
(
input0
->
dim
(
3
)
==
input1
->
dim
(
0
))
<<
"Element-Wise op support broadcast on only-channel or non-channel dimension"
;
// NOLINT(whitespace/line_length)
}
else
{
MACE_CHECK
(((
input0
->
dim
(
0
)
==
input1
->
dim
(
0
)
||
input1
->
dim
(
0
)
==
1
)
&&
input0
->
dim
(
3
)
==
input1
->
dim
(
3
)
&&
input1
->
dim
(
1
)
==
1
&&
input1
->
dim
(
2
)
==
1
)
||
(
input0
->
dim
(
0
)
==
input1
->
dim
(
0
)
&&
input0
->
dim
(
1
)
==
input1
->
dim
(
1
)
&&
input0
->
dim
(
2
)
==
input1
->
dim
(
2
)
&&
input1
->
dim
(
3
)
==
1
))
<<
"Element-Wise op support broadcast on only-channel or non-channel dimension"
;
// NOLINT(whitespace/line_length)
if
(
input1
->
dim_size
()
==
1
||
(
input1
->
dim
(
0
)
==
1
&&
input1
->
dim
(
1
)
==
1
&&
input1
->
dim
(
2
)
==
1
))
{
// Tensor-Vector element wise
if
(
input0
->
dim
(
3
)
==
input1
->
dim
(
input1
->
dim_size
()
-
1
))
{
input1_type
=
"INPUT_VECTOR"
;
}
else
{
LOG
(
FATAL
)
<<
"Inputs not match the broadcast logic, "
<<
MakeString
(
input0
->
shape
())
<<
" vs "
<<
MakeString
(
input1
->
shape
());
}
}
else
{
// must be 4-D
if
(
input0
->
dim
(
0
)
==
input1
->
dim
(
0
)
&&
input1
->
dim
(
1
)
==
1
&&
input1
->
dim
(
2
)
==
1
&&
input0
->
dim
(
3
)
==
input1
->
dim
(
3
))
{
input1_type
=
"INPUT_BATCH_VECTOR"
;
}
else
if
(
input0
->
dim
(
0
)
==
input1
->
dim
(
0
)
&&
input0
->
dim
(
1
)
==
input1
->
dim
(
1
)
&&
input0
->
dim
(
2
)
==
input1
->
dim
(
2
)
&&
input1
->
dim
(
3
)
==
1
)
{
// broadcast on channel dimension
input1_type
=
"INPUT_TENSOR_BC_CHAN"
;
}
else
{
LOG
(
FATAL
)
<<
"Element-Wise op only support broadcast on"
" channel dimension:"
"Tensor-BatchVector(4D-[N,1,1,C]) "
"and Tensor-Tensor(4D-[N,H,W,1]). but got "
<<
MakeString
(
input0
->
shape
())
<<
" vs "
<<
MakeString
(
input1
->
shape
());
}
}
}
}
...
...
@@ -129,20 +156,11 @@ MaceStatus EltwiseKernel<T>::Compute(
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpCompatibleCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpCompatibleCLCMDDt
(
dt
));
built_options
.
emplace
(
MakeString
(
"-DELTWISE_TYPE="
,
type_
));
if
(
input1
==
nullptr
)
{
built_options
.
emplace
(
"-DINPUT_TYPE=1"
);
}
else
if
(
input0
->
size
()
!=
input1
->
size
())
{
if
(
input0
->
dim
(
0
)
==
input1
->
dim
(
0
)
&&
input0
->
dim
(
1
)
==
input1
->
dim
(
1
)
&&
input0
->
dim
(
2
)
==
input1
->
dim
(
2
)
&&
input1
->
dim
(
3
)
==
1
)
{
// only broadcast on channel
built_options
.
emplace
(
"-DINPUT_TYPE=4"
);
}
else
if
(
input1
->
dim
(
0
)
==
1
||
input1
->
dim_size
()
==
1
)
{
built_options
.
emplace
(
"-DINPUT_TYPE=3"
);
}
else
{
built_options
.
emplace
(
"-DINPUT_TYPE=2"
);
}
if
(
swapped
)
built_options
.
emplace
(
"-DSWAPPED"
);
if
(
!
input1_type
.
empty
())
{
built_options
.
emplace
(
"-D"
+
input1_type
);
}
if
(
swapped
)
built_options
.
emplace
(
"-DSWAPPED"
);
if
(
channels
%
4
!=
0
)
built_options
.
emplace
(
"-DNOT_DIVISIBLE_FOUR"
);
if
(
!
coeff_
.
empty
())
built_options
.
emplace
(
"-DCOEFF_SUM"
);
MACE_RETURN_IF_ERROR
(
runtime
->
BuildKernel
(
"eltwise"
,
kernel_name
,
built_options
,
&
kernel_
));
...
...
mace/ops/ops_test_util.cc
浏览文件 @
3a424351
...
...
@@ -159,16 +159,16 @@ void OpTestContext::SetOCLImageAndBufferTestFlag() {
bool
OpsTestNet
::
Setup
(
mace
::
DeviceType
device
)
{
NetDef
net_def
;
for
(
auto
&
op_def
_
:
op_defs_
)
{
net_def
.
add_op
()
->
CopyFrom
(
op_def
_
);
for
(
auto
&
op_def
:
op_defs_
)
{
net_def
.
add_op
()
->
CopyFrom
(
op_def
);
for
(
auto
input
:
op_def
_
.
input
())
{
for
(
auto
input
:
op_def
.
input
())
{
if
(
ws_
.
GetTensor
(
input
)
!=
nullptr
&&
!
ws_
.
GetTensor
(
input
)
->
is_weight
())
{
auto
input_info
=
net_def
.
add_input_info
();
input_info
->
set_name
(
input
);
auto
data_format
=
ProtoArgHelper
::
GetOptionalArg
<
OperatorDef
,
int
>
(
op_def
_
,
"data_format"
,
DataFormat
::
DF_NONE
);
op_def
,
"data_format"
,
DataFormat
::
DF_NONE
);
input_info
->
set_data_format
(
data_format
);
auto
&
shape
=
ws_
.
GetTensor
(
input
)
->
shape
();
for
(
auto
d
:
shape
)
{
...
...
@@ -176,16 +176,16 @@ bool OpsTestNet::Setup(mace::DeviceType device) {
}
}
}
for
(
int
i
=
0
;
i
<
op_def_
.
output_size
();
++
i
)
{
ws_
.
RemoveTensor
(
op_def_
.
output
(
i
));
auto
output_info
=
net_def
.
add_output_info
(
);
output_info
->
set_name
(
op_def_
.
output
(
i
)
);
if
(
op_def_
.
output_type_size
()
==
op_def_
.
output_size
())
{
output_info
->
set_data_type
(
op_def_
.
output_type
(
i
));
}
else
{
output_info
->
set_data_type
(
DataType
::
DT_FLOAT
);
}
}
auto
op_def
=
op_defs_
.
back
();
for
(
int
i
=
0
;
i
<
op_def
.
output_size
();
++
i
)
{
ws_
.
RemoveTensor
(
op_def
.
output
(
i
)
);
auto
output_info
=
net_def
.
add_output_info
(
);
output_info
->
set_name
(
op_def
.
output
(
i
));
if
(
op_def
.
output_type_size
()
==
op_def
.
output_size
())
{
output_info
->
set_data_type
(
op_def
.
output_type
(
i
));
}
else
{
output_info
->
set_data_type
(
DataType
::
DT_FLOAT
);
}
}
MemoryOptimizer
mem_optimizer
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录