Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
25d2ad2d
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
25d2ad2d
编写于
4月 27, 2018
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix eltwise sub and div swapped bug.
上级
f078a265
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
88 addition
and
17 deletion
+88
-17
mace/kernels/eltwise.h
mace/kernels/eltwise.h
+42
-13
mace/kernels/opencl/cl/eltwise.cl
mace/kernels/opencl/cl/eltwise.cl
+10
-2
mace/kernels/opencl/eltwise_opencl.cc
mace/kernels/opencl/eltwise_opencl.cc
+5
-2
mace/ops/eltwise_test.cc
mace/ops/eltwise_test.cc
+31
-0
未找到文件。
mace/kernels/eltwise.h
浏览文件 @
25d2ad2d
...
@@ -114,6 +114,7 @@ inline void TensorVector(const EltwiseType type,
...
@@ -114,6 +114,7 @@ inline void TensorVector(const EltwiseType type,
const
index_t
batch
,
const
index_t
batch
,
const
index_t
channel
,
const
index_t
channel
,
const
index_t
hw
,
const
index_t
hw
,
const
bool
swapped
,
float
*
output
)
{
float
*
output
)
{
switch
(
type
)
{
switch
(
type
)
{
case
SUM
:
case
SUM
:
...
@@ -129,6 +130,18 @@ inline void TensorVector(const EltwiseType type,
...
@@ -129,6 +130,18 @@ inline void TensorVector(const EltwiseType type,
}
}
break
;
break
;
case
SUB
:
case
SUB
:
if
(
swapped
)
{
#pragma omp parallel for collapse(3)
for
(
index_t
b
=
0
;
b
<
batch
;
++
b
)
{
for
(
index_t
c
=
0
;
c
<
channel
;
++
c
)
{
for
(
index_t
i
=
0
;
i
<
hw
;
++
i
)
{
const
index_t
idx0
=
(
b
*
channel
+
c
)
*
hw
+
i
;
const
index_t
idx1
=
b
*
channel
+
c
;
output
[
idx0
]
=
input1
[
idx1
]
-
input0
[
idx0
];
}
}
}
}
else
{
#pragma omp parallel for collapse(3)
#pragma omp parallel for collapse(3)
for
(
index_t
b
=
0
;
b
<
batch
;
++
b
)
{
for
(
index_t
b
=
0
;
b
<
batch
;
++
b
)
{
for
(
index_t
c
=
0
;
c
<
channel
;
++
c
)
{
for
(
index_t
c
=
0
;
c
<
channel
;
++
c
)
{
...
@@ -139,6 +152,7 @@ inline void TensorVector(const EltwiseType type,
...
@@ -139,6 +152,7 @@ inline void TensorVector(const EltwiseType type,
}
}
}
}
}
}
}
break
;
break
;
case
PROD
:
case
PROD
:
#pragma omp parallel for collapse(3)
#pragma omp parallel for collapse(3)
...
@@ -153,6 +167,18 @@ inline void TensorVector(const EltwiseType type,
...
@@ -153,6 +167,18 @@ inline void TensorVector(const EltwiseType type,
}
}
break
;
break
;
case
DIV
:
case
DIV
:
if
(
swapped
)
{
#pragma omp parallel for collapse(3)
for
(
index_t
b
=
0
;
b
<
batch
;
++
b
)
{
for
(
index_t
c
=
0
;
c
<
channel
;
++
c
)
{
for
(
index_t
i
=
0
;
i
<
hw
;
++
i
)
{
const
index_t
idx0
=
(
b
*
channel
+
c
)
*
hw
+
i
;
const
index_t
idx1
=
b
*
channel
+
c
;
output
[
idx0
]
=
input1
[
idx1
]
/
input0
[
idx0
];
}
}
}
}
else
{
#pragma omp parallel for collapse(3)
#pragma omp parallel for collapse(3)
for
(
index_t
b
=
0
;
b
<
batch
;
++
b
)
{
for
(
index_t
b
=
0
;
b
<
batch
;
++
b
)
{
for
(
index_t
c
=
0
;
c
<
channel
;
++
c
)
{
for
(
index_t
c
=
0
;
c
<
channel
;
++
c
)
{
...
@@ -163,6 +189,7 @@ inline void TensorVector(const EltwiseType type,
...
@@ -163,6 +189,7 @@ inline void TensorVector(const EltwiseType type,
}
}
}
}
}
}
}
break
;
break
;
case
MIN
:
case
MIN
:
#pragma omp parallel for collapse(3)
#pragma omp parallel for collapse(3)
...
@@ -283,12 +310,14 @@ struct EltwiseFunctor<DeviceType::CPU, float>: EltwiseFunctorBase {
...
@@ -283,12 +310,14 @@ struct EltwiseFunctor<DeviceType::CPU, float>: EltwiseFunctorBase {
const
Tensor
*
input1
,
const
Tensor
*
input1
,
Tensor
*
output
,
Tensor
*
output
,
StatsFuture
*
future
)
{
StatsFuture
*
future
)
{
bool
swapped
=
false
;
if
(
input1
!=
nullptr
)
{
if
(
input1
!=
nullptr
)
{
MACE_CHECK
(
input0
->
dim_size
()
==
input1
->
dim_size
())
MACE_CHECK
(
input0
->
dim_size
()
==
input1
->
dim_size
())
<<
"Inputs of Eltwise op must be same shape"
;
<<
"Inputs of Eltwise op must be same shape"
;
if
(
input0
->
size
()
!=
input1
->
size
())
{
if
(
input0
->
size
()
!=
input1
->
size
())
{
if
(
input0
->
size
()
<
input1
->
size
())
{
if
(
input0
->
size
()
<
input1
->
size
())
{
std
::
swap
(
input0
,
input1
);
std
::
swap
(
input0
,
input1
);
swapped
=
true
;
}
}
MACE_CHECK
(
input0
->
dim
(
0
)
==
input1
->
dim
(
0
)
&&
MACE_CHECK
(
input0
->
dim
(
0
)
==
input1
->
dim
(
0
)
&&
input0
->
dim
(
1
)
==
input1
->
dim
(
1
)
&&
input0
->
dim
(
1
)
==
input1
->
dim
(
1
)
&&
...
@@ -316,7 +345,7 @@ struct EltwiseFunctor<DeviceType::CPU, float>: EltwiseFunctorBase {
...
@@ -316,7 +345,7 @@ struct EltwiseFunctor<DeviceType::CPU, float>: EltwiseFunctorBase {
const
index_t
channel
=
input0
->
dim
(
1
);
const
index_t
channel
=
input0
->
dim
(
1
);
const
index_t
hw
=
input0
->
dim
(
2
)
*
input0
->
dim
(
3
);
const
index_t
hw
=
input0
->
dim
(
2
)
*
input0
->
dim
(
3
);
TensorVector
(
type_
,
input0_ptr
,
input1_ptr
,
TensorVector
(
type_
,
input0_ptr
,
input1_ptr
,
batch
,
channel
,
hw
,
output_ptr
);
batch
,
channel
,
hw
,
swapped
,
output_ptr
);
}
else
{
}
else
{
if
(
!
coeff_
.
empty
()
&&
type_
==
SUM
)
{
if
(
!
coeff_
.
empty
()
&&
type_
==
SUM
)
{
#pragma omp parallel for
#pragma omp parallel for
...
...
mace/kernels/opencl/cl/eltwise.cl
浏览文件 @
25d2ad2d
...
@@ -45,11 +45,19 @@ __kernel void eltwise(KERNEL_ERROR_PARAMS
...
@@ -45,11 +45,19 @@ __kernel void eltwise(KERNEL_ERROR_PARAMS
out = in0 + in1;
out = in0 + in1;
#endif
#endif
#elif ELTWISE_TYPE == 1
#elif ELTWISE_TYPE == 1
#ifdef SWAPPED
out = in1 - in0;
#else
out = in0 - in1;
out = in0 - in1;
#endif
#elif ELTWISE_TYPE == 2
#elif ELTWISE_TYPE == 2
out = in0 * in1;
out = in0 * in1;
#elif ELTWISE_TYPE == 3
#elif ELTWISE_TYPE == 3
#ifdef SWAPPED
out = in1 / in0;
#else
out = in0 / in1;
out = in0 / in1;
#endif
#elif ELTWISE_TYPE == 4
#elif ELTWISE_TYPE == 4
out = fmin(in0, in1);
out = fmin(in0, in1);
#elif ELTWISE_TYPE == 5
#elif ELTWISE_TYPE == 5
...
...
mace/kernels/opencl/eltwise_opencl.cc
浏览文件 @
25d2ad2d
...
@@ -25,12 +25,14 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
...
@@ -25,12 +25,14 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
const
Tensor
*
input1
,
const
Tensor
*
input1
,
Tensor
*
output
,
Tensor
*
output
,
StatsFuture
*
future
)
{
StatsFuture
*
future
)
{
bool
swapped
=
false
;
if
(
input1
!=
nullptr
)
{
if
(
input1
!=
nullptr
)
{
MACE_CHECK
(
input0
->
dim_size
()
==
input1
->
dim_size
())
MACE_CHECK
(
input0
->
dim_size
()
==
input1
->
dim_size
())
<<
"Inputs of Eltwise op must be same shape"
;
<<
"Inputs of Eltwise op must be same shape"
;
if
(
input0
->
size
()
!=
input1
->
size
())
{
if
(
input0
->
size
()
!=
input1
->
size
())
{
if
(
input0
->
size
()
<
input1
->
size
())
{
if
(
input0
->
size
()
<
input1
->
size
())
{
std
::
swap
(
input0
,
input1
);
std
::
swap
(
input0
,
input1
);
swapped
=
true
;
}
}
MACE_CHECK
(
input0
->
dim
(
0
)
==
input1
->
dim
(
0
)
&&
MACE_CHECK
(
input0
->
dim
(
0
)
==
input1
->
dim
(
0
)
&&
input1
->
dim
(
1
)
==
1
&&
input1
->
dim
(
1
)
==
1
&&
...
@@ -62,9 +64,10 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
...
@@ -62,9 +64,10 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
built_options
.
emplace
(
MakeString
(
"-DELTWISE_TYPE="
,
type_
));
built_options
.
emplace
(
MakeString
(
"-DELTWISE_TYPE="
,
type_
));
if
(
input1
==
nullptr
)
{
if
(
input1
==
nullptr
)
{
built_options
.
emplace
(
MakeString
(
"-DINPUT_TYPE=1"
)
);
built_options
.
emplace
(
"-DINPUT_TYPE=1"
);
}
else
if
(
input0
->
size
()
!=
input1
->
size
())
{
}
else
if
(
input0
->
size
()
!=
input1
->
size
())
{
built_options
.
emplace
(
MakeString
(
"-DINPUT_TYPE=2"
));
built_options
.
emplace
(
"-DINPUT_TYPE=2"
);
if
(
swapped
)
built_options
.
emplace
(
"-DSWAPPED"
);
}
}
if
(
!
coeff_
.
empty
())
built_options
.
emplace
(
"-DCOEFF_SUM"
);
if
(
!
coeff_
.
empty
())
built_options
.
emplace
(
"-DCOEFF_SUM"
);
...
...
mace/ops/eltwise_test.cc
浏览文件 @
25d2ad2d
...
@@ -238,6 +238,12 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorVector) {
...
@@ -238,6 +238,12 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorVector) {
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
},
{
1
,
1
,
1
,
5
},
{
1
,
2
,
3
,
4
,
5
},
{
1
,
1
,
1
,
5
},
{
1
,
2
,
3
,
4
,
5
},
{
0
,
0
,
0
,
0
,
0
,
5
,
5
,
5
,
5
,
5
});
{
0
,
0
,
0
,
0
,
0
,
5
,
5
,
5
,
5
,
5
});
SimpleTensorEltwise
<
DeviceType
::
CPU
,
float
>
(
kernels
::
EltwiseType
::
SUB
,
{
1
,
1
,
1
,
5
},
{
1
,
2
,
3
,
4
,
5
},
{
1
,
2
,
1
,
5
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
},
{
0
,
0
,
0
,
0
,
0
,
-
5
,
-
5
,
-
5
,
-
5
,
-
5
});
SimpleTensorEltwise
<
DeviceType
::
CPU
,
float
>
(
kernels
::
EltwiseType
::
PROD
,
SimpleTensorEltwise
<
DeviceType
::
CPU
,
float
>
(
kernels
::
EltwiseType
::
PROD
,
{
1
,
1
,
1
,
3
},
{
1
,
2
,
3
},
{
1
,
1
,
1
,
3
},
{
1
,
2
,
3
},
{
1
,
2
,
1
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
2
,
1
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
...
@@ -247,6 +253,11 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorVector) {
...
@@ -247,6 +253,11 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorVector) {
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
},
{
1
,
1
,
1
,
5
},
{
1
,
1
,
1
,
1
,
5
},
{
1
,
1
,
1
,
5
},
{
1
,
1
,
1
,
1
,
5
},
{
1
,
2
,
3
,
4
,
1
,
6
,
7
,
8
,
9
,
2
});
{
1
,
2
,
3
,
4
,
1
,
6
,
7
,
8
,
9
,
2
});
SimpleTensorEltwise
<
DeviceType
::
CPU
,
float
>
(
kernels
::
EltwiseType
::
DIV
,
{
1
,
1
,
1
,
5
},
{
1
,
1
,
1
,
2
,
4
},
{
1
,
2
,
1
,
5
},
{
1
,
1
,
1
,
2
,
2
,
1
,
1
,
1
,
1
,
1
},
{
1
,
1
,
1
,
1
,
2
,
1
,
1
,
1
,
2
,
4
});
SimpleTensorEltwise
<
DeviceType
::
CPU
,
float
>
(
kernels
::
EltwiseType
::
MIN
,
SimpleTensorEltwise
<
DeviceType
::
CPU
,
float
>
(
kernels
::
EltwiseType
::
MIN
,
{
1
,
1
,
1
,
5
},
{
1
,
2
,
3
,
4
,
5
},
{
1
,
1
,
1
,
5
},
{
1
,
2
,
3
,
4
,
5
},
{
1
,
2
,
1
,
5
},
{
1
,
2
,
1
,
5
},
...
@@ -276,6 +287,12 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorVector) {
...
@@ -276,6 +287,12 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorVector) {
{
1
,
2
,
1
,
5
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
},
{
1
,
2
,
1
,
5
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
},
{
1
,
1
,
1
,
5
},
{
1
,
2
,
3
,
4
,
5
},
{
1
,
1
,
1
,
5
},
{
1
,
2
,
3
,
4
,
5
},
{
0
,
0
,
0
,
0
,
0
,
5
,
5
,
5
,
5
,
5
});
{
0
,
0
,
0
,
0
,
0
,
5
,
5
,
5
,
5
,
5
});
SimpleTensorEltwise
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
EltwiseType
::
SUB
,
{
1
,
1
,
1
,
5
},
{
1
,
2
,
3
,
4
,
5
},
{
1
,
2
,
1
,
5
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
},
{
0
,
0
,
0
,
0
,
0
,
-
5
,
-
5
,
-
5
,
-
5
,
-
5
});
SimpleTensorEltwise
<
DeviceType
::
OPENCL
,
float
>
(
SimpleTensorEltwise
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
EltwiseType
::
PROD
,
kernels
::
EltwiseType
::
PROD
,
{
1
,
1
,
1
,
3
},
{
1
,
2
,
3
},
{
1
,
1
,
1
,
3
},
{
1
,
2
,
3
},
...
@@ -286,6 +303,12 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorVector) {
...
@@ -286,6 +303,12 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorVector) {
{
1
,
2
,
1
,
5
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
},
{
1
,
2
,
1
,
5
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
},
{
1
,
1
,
1
,
5
},
{
1
,
1
,
1
,
1
,
5
},
{
1
,
1
,
1
,
5
},
{
1
,
1
,
1
,
1
,
5
},
{
1
,
2
,
3
,
4
,
1
,
6
,
7
,
8
,
9
,
2
});
{
1
,
2
,
3
,
4
,
1
,
6
,
7
,
8
,
9
,
2
});
SimpleTensorEltwise
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
EltwiseType
::
DIV
,
{
1
,
1
,
1
,
5
},
{
1
,
1
,
1
,
2
,
4
},
{
1
,
2
,
1
,
5
},
{
1
,
1
,
1
,
2
,
2
,
1
,
1
,
1
,
1
,
1
},
{
1
,
1
,
1
,
1
,
2
,
1
,
1
,
1
,
2
,
4
});
SimpleTensorEltwise
<
DeviceType
::
OPENCL
,
float
>
(
SimpleTensorEltwise
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
EltwiseType
::
MIN
,
kernels
::
EltwiseType
::
MIN
,
{
1
,
1
,
1
,
5
},
{
1
,
2
,
3
,
4
,
5
},
{
1
,
1
,
1
,
5
},
{
1
,
2
,
3
,
4
,
5
},
...
@@ -530,6 +553,10 @@ TEST_F(EltwiseOpTest, RandomTensorVecFloat) {
...
@@ -530,6 +553,10 @@ TEST_F(EltwiseOpTest, RandomTensorVecFloat) {
{
1
,
32
,
32
,
16
},
{
1
,
1
,
1
,
16
});
{
1
,
32
,
32
,
16
},
{
1
,
1
,
1
,
16
});
RandomTensorEltwise
<
float
>
(
kernels
::
EltwiseType
::
SUB
,
RandomTensorEltwise
<
float
>
(
kernels
::
EltwiseType
::
SUB
,
{
5
,
32
,
32
,
16
},
{
5
,
1
,
1
,
16
});
{
5
,
32
,
32
,
16
},
{
5
,
1
,
1
,
16
});
RandomTensorEltwise
<
float
>
(
kernels
::
EltwiseType
::
SUB
,
{
5
,
1
,
1
,
16
},
{
5
,
32
,
32
,
16
});
RandomTensorEltwise
<
float
>
(
kernels
::
EltwiseType
::
PROD
,
{
1
,
31
,
37
,
17
},
{
1
,
1
,
1
,
17
});
RandomTensorEltwise
<
float
>
(
kernels
::
EltwiseType
::
PROD
,
RandomTensorEltwise
<
float
>
(
kernels
::
EltwiseType
::
PROD
,
{
1
,
1
,
1
,
17
},
{
1
,
31
,
37
,
17
});
{
1
,
1
,
1
,
17
},
{
1
,
31
,
37
,
17
});
RandomTensorEltwise
<
float
>
(
kernels
::
EltwiseType
::
DIV
,
RandomTensorEltwise
<
float
>
(
kernels
::
EltwiseType
::
DIV
,
...
@@ -547,8 +574,12 @@ TEST_F(EltwiseOpTest, RandomTensorVecHalf) {
...
@@ -547,8 +574,12 @@ TEST_F(EltwiseOpTest, RandomTensorVecHalf) {
{
1
,
32
,
32
,
16
},
{
1
,
1
,
1
,
16
});
{
1
,
32
,
32
,
16
},
{
1
,
1
,
1
,
16
});
RandomTensorEltwise
<
half
>
(
kernels
::
EltwiseType
::
SUB
,
RandomTensorEltwise
<
half
>
(
kernels
::
EltwiseType
::
SUB
,
{
3
,
32
,
32
,
16
},
{
3
,
1
,
1
,
16
});
{
3
,
32
,
32
,
16
},
{
3
,
1
,
1
,
16
});
RandomTensorEltwise
<
half
>
(
kernels
::
EltwiseType
::
SUB
,
{
3
,
1
,
1
,
16
},
{
3
,
32
,
32
,
16
});
RandomTensorEltwise
<
half
>
(
kernels
::
EltwiseType
::
PROD
,
RandomTensorEltwise
<
half
>
(
kernels
::
EltwiseType
::
PROD
,
{
1
,
1
,
1
,
17
},
{
1
,
31
,
37
,
17
});
{
1
,
1
,
1
,
17
},
{
1
,
31
,
37
,
17
});
RandomTensorEltwise
<
half
>
(
kernels
::
EltwiseType
::
DIV
,
{
5
,
31
,
37
,
17
},
{
5
,
1
,
1
,
17
});
RandomTensorEltwise
<
half
>
(
kernels
::
EltwiseType
::
DIV
,
RandomTensorEltwise
<
half
>
(
kernels
::
EltwiseType
::
DIV
,
{
5
,
1
,
1
,
17
},
{
5
,
31
,
37
,
17
});
{
5
,
1
,
1
,
17
},
{
5
,
31
,
37
,
17
});
RandomTensorEltwise
<
half
>
(
kernels
::
EltwiseType
::
MIN
,
RandomTensorEltwise
<
half
>
(
kernels
::
EltwiseType
::
MIN
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录