Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
1bb11f64
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
1bb11f64
编写于
11月 03, 2020
作者:
L
luxuhui
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix: fix compute error in `mvn` and `reduce` ops
N/A Signed-off-by:
N
Luxuhui
<
luxuhui@xiaomi.com
>
上级
6c9f380e
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
110 addition
and
51 deletion
+110
-51
mace/ops/opencl/image/mvnorm.cc
mace/ops/opencl/image/mvnorm.cc
+1
-1
mace/ops/reduce.cc
mace/ops/reduce.cc
+93
-48
test/ccunit/mace/ops/reduce_test.cc
test/ccunit/mace/ops/reduce_test.cc
+16
-2
未找到文件。
mace/ops/opencl/image/mvnorm.cc
浏览文件 @
1bb11f64
...
...
@@ -85,7 +85,7 @@ MaceStatus MVNormKernel::DoCompute(
const
std
::
vector
<
index_t
>
mean_shape
=
{
batch
,
1
,
1
,
channels
};
std
::
vector
<
size_t
>
mean_image_shape
;
OpenCLUtil
::
CalImage2DShape
(
mean_shape
,
OpenCLBufferType
::
IN_OUT_
HEIGHT
,
OpenCLUtil
::
CalImage2DShape
(
mean_shape
,
OpenCLBufferType
::
IN_OUT_
CHANNEL
,
&
mean_image_shape
);
ScratchImageManager
*
scratch_manager
=
context
->
device
()
->
gpu_runtime
()
->
scratch_image_manager
();
...
...
mace/ops/reduce.cc
浏览文件 @
1bb11f64
...
...
@@ -17,12 +17,13 @@
#include <set>
#include <vector>
#include "mace/ops/common/reduce_type.h"
#include "mace/core/future.h"
#include "mace/core/ops/operator.h"
#include "mace/core/quantize.h"
#include "mace/core/registry/ops_registry.h"
#include "mace/core/runtime/cpu/cpu_runtime.h"
#include "mace/core/tensor.h"
#include "mace/ops/common/reduce_type.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/reduce.h"
#endif // MACE_ENABLE_OPENCL
...
...
@@ -75,9 +76,11 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
const
Tensor
*
input
=
this
->
Input
(
0
);
Tensor
*
output
=
this
->
Output
(
0
);
Simplify
(
input
);
// Use the same scale and zero point with input and output.
output
->
SetScale
(
input
->
scale
());
output
->
SetZeroPoint
(
input
->
zero_point
());
if
(
reduce_type_
!=
SUM
)
{
// Use the same scale and zero point with input and output.
output
->
SetScale
(
input
->
scale
());
output
->
SetZeroPoint
(
input
->
zero_point
());
}
output
->
Resize
(
out_shape_
);
Compute
(
context
,
input
,
output
);
return
MaceStatus
::
MACE_SUCCESS
;
...
...
@@ -139,10 +142,12 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
}
void
Reduce1Dims
(
const
OpContext
*
context
,
const
T
*
input
,
const
T
ensor
*
input_tensor
,
ReduceType
type
,
T
*
output
)
{
T
ensor
*
output_tensor
)
{
MACE_UNUSED
(
context
);
const
T
*
input
=
input_tensor
->
data
<
T
>
();
T
*
output
=
output_tensor
->
mutable_data
<
T
>
();
if
(
reduce_first_axis_
)
{
if
(
type
==
ReduceType
::
MEAN
)
{
T
tmp
=
0.
f
;
...
...
@@ -183,12 +188,14 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
}
void
Reduce2Dims
(
const
OpContext
*
context
,
const
T
*
input
,
const
T
ensor
*
input_tensor
,
ReduceType
type
,
T
*
output
)
{
T
ensor
*
output_tensor
)
{
utils
::
ThreadPool
&
thread_pool
=
context
->
device
()
->
cpu_runtime
()
->
thread_pool
();
const
T
*
input
=
input_tensor
->
data
<
T
>
();
T
*
output
=
output_tensor
->
mutable_data
<
T
>
();
if
(
reduce_first_axis_
)
{
thread_pool
.
Compute1D
([
=
](
index_t
start
,
index_t
end
,
index_t
step
)
{
if
(
type
==
ReduceType
::
MEAN
)
{
...
...
@@ -285,12 +292,14 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
}
void
Reduce3Dims
(
const
OpContext
*
context
,
const
T
*
input
,
const
T
ensor
*
input_tensor
,
ReduceType
type
,
T
*
output
)
{
T
ensor
*
output_tensor
)
{
utils
::
ThreadPool
&
thread_pool
=
context
->
device
()
->
cpu_runtime
()
->
thread_pool
();
const
T
*
input
=
input_tensor
->
data
<
T
>
();
T
*
output
=
output_tensor
->
mutable_data
<
T
>
();
if
(
reduce_first_axis_
)
{
thread_pool
.
Compute1D
([
=
](
index_t
start
,
index_t
end
,
index_t
step
)
{
if
(
type
==
ReduceType
::
MEAN
)
{
...
...
@@ -407,8 +416,7 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
for
(
int
j
=
0
;
j
<
data_reshape_
[
2
];
++
j
)
{
for
(
int
k
=
0
;
k
<
data_reshape_
[
1
];
++
k
)
{
output
[
i
*
data_reshape_
[
2
]
+
j
]
+=
input
[(
i
*
data_reshape_
[
1
]
+
k
)
*
data_reshape_
[
2
]
+
j
];
input
[(
i
*
data_reshape_
[
1
]
+
k
)
*
data_reshape_
[
2
]
+
j
];
}
}
}
...
...
@@ -420,12 +428,14 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
}
void
Reduce4Dims
(
const
OpContext
*
context
,
const
T
*
input
,
const
T
ensor
*
input_tensor
,
ReduceType
type
,
T
*
output
)
{
T
ensor
*
output_tensor
)
{
utils
::
ThreadPool
&
thread_pool
=
context
->
device
()
->
cpu_runtime
()
->
thread_pool
();
const
T
*
input
=
input_tensor
->
data
<
T
>
();
T
*
output
=
output_tensor
->
mutable_data
<
T
>
();
if
(
reduce_first_axis_
)
{
thread_pool
.
Compute2D
([
=
](
index_t
start0
,
index_t
end0
,
index_t
step0
,
index_t
start1
,
index_t
end1
,
index_t
step1
)
{
...
...
@@ -587,18 +597,17 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
void
Compute
(
const
OpContext
*
context
,
const
Tensor
*
input
,
Tensor
*
output
)
{
Tensor
::
MappingGuard
input_mapper
(
input
);
const
T
*
input_ptr
=
input
->
data
<
T
>
();
Tensor
::
MappingGuard
output_map
(
output
);
T
*
output_ptr
=
output
->
mutable_data
<
T
>
();
memset
(
static_cast
<
void
*>
(
output_ptr
),
0
,
output
->
size
()
*
sizeof
(
T
));
switch
(
data_reshape_
.
size
())
{
case
1
:
Reduce1Dims
(
context
,
input
_ptr
,
reduce_type_
,
output_ptr
);
case
1
:
Reduce1Dims
(
context
,
input
,
reduce_type_
,
output
);
break
;
case
2
:
Reduce2Dims
(
context
,
input
_ptr
,
reduce_type_
,
output_ptr
);
case
2
:
Reduce2Dims
(
context
,
input
,
reduce_type_
,
output
);
break
;
case
3
:
Reduce3Dims
(
context
,
input
_ptr
,
reduce_type_
,
output_ptr
);
case
3
:
Reduce3Dims
(
context
,
input
,
reduce_type_
,
output
);
break
;
case
4
:
Reduce4Dims
(
context
,
input
_ptr
,
reduce_type_
,
output_ptr
);
case
4
:
Reduce4Dims
(
context
,
input
,
reduce_type_
,
output
);
break
;
default:
MACE_CHECK
(
false
,
"not implemented in mace"
)
<<
"data reshape size"
<<
data_reshape_
.
size
()
...
...
@@ -617,8 +626,10 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
template
<
>
void
ReduceOp
<
DeviceType
::
CPU
,
uint8_t
>::
Reduce1Dims
(
const
OpContext
*
context
,
const
uint8_t
*
input
,
ReduceType
type
,
uint8_t
*
output
)
{
const
Tensor
*
input_tensor
,
ReduceType
type
,
Tensor
*
output_tensor
)
{
MACE_UNUSED
(
context
);
const
uint8_t
*
input
=
input_tensor
->
data
<
uint8_t
>
();
uint8_t
*
output
=
output_tensor
->
mutable_data
<
uint8_t
>
();
if
(
reduce_first_axis_
)
{
if
(
type
==
ReduceType
::
MEAN
)
{
uint32_t
tmp
=
0
;
...
...
@@ -640,11 +651,15 @@ void ReduceOp<DeviceType::CPU, uint8_t>::Reduce1Dims(
}
output
[
0
]
=
tmp
;
}
else
if
(
type
==
ReduceType
::
SUM
)
{
uint32_t
tmp
=
0
;
int32_t
sum
=
0
;
const
auto
in_zero_point
=
input_tensor
->
zero_point
();
const
auto
out_zero_point
=
output_tensor
->
zero_point
();
const
auto
scale
=
input_tensor
->
scale
()
/
output_tensor
->
scale
();
for
(
int
i
=
0
;
i
<
data_reshape_
[
0
];
++
i
)
{
tmp
=
tmp
+
input
[
i
];
sum
=
sum
+
input
[
i
];
}
output
[
0
]
=
static_cast
<
uint8_t
>
(
tmp
+
data_reshape_
[
0
]
/
2
);
const
float
f
=
(
sum
-
in_zero_point
*
data_reshape_
[
0
])
*
scale
;
output
[
0
]
=
Saturate
<
uint8_t
>
(
std
::
roundf
(
f
+
out_zero_point
));
}
else
{
MACE_NOT_IMPLEMENTED
;
}
...
...
@@ -656,10 +671,12 @@ void ReduceOp<DeviceType::CPU, uint8_t>::Reduce1Dims(
template
<
>
void
ReduceOp
<
DeviceType
::
CPU
,
uint8_t
>::
Reduce2Dims
(
const
OpContext
*
context
,
const
uint8_t
*
input
,
ReduceType
type
,
uint8_t
*
output
)
{
const
Tensor
*
input_tensor
,
ReduceType
type
,
Tensor
*
output_tensor
)
{
utils
::
ThreadPool
&
thread_pool
=
context
->
device
()
->
cpu_runtime
()
->
thread_pool
();
const
uint8_t
*
input
=
input_tensor
->
data
<
uint8_t
>
();
uint8_t
*
output
=
output_tensor
->
mutable_data
<
uint8_t
>
();
if
(
reduce_first_axis_
)
{
thread_pool
.
Compute1D
([
=
](
index_t
start
,
index_t
end
,
index_t
step
)
{
if
(
type
==
ReduceType
::
MEAN
)
{
...
...
@@ -687,13 +704,17 @@ void ReduceOp<DeviceType::CPU, uint8_t>::Reduce2Dims(
}
output
[
i
]
=
tmp
;
}
}
else
if
(
type
==
ReduceType
::
SUM
)
{
}
else
if
(
type
==
ReduceType
::
SUM
)
{
const
auto
in_zero_point
=
input_tensor
->
zero_point
();
const
auto
out_zero_point
=
output_tensor
->
zero_point
();
const
auto
scale
=
input_tensor
->
scale
()
/
output_tensor
->
scale
();
for
(
index_t
i
=
start
;
i
<
end
;
i
+=
step
)
{
uint32_t
tmp
=
0
;
int32_t
sum
=
0
;
for
(
int
j
=
0
;
j
<
data_reshape_
[
0
];
++
j
)
{
tmp
+=
input
[
j
*
data_reshape_
[
1
]
+
i
];
sum
+=
input
[
j
*
data_reshape_
[
1
]
+
i
];
}
output
[
i
]
=
static_cast
<
uint8_t
>
(
tmp
+
data_reshape_
[
0
]
/
2
);
const
float
f
=
(
sum
-
in_zero_point
*
data_reshape_
[
0
])
*
scale
;
output
[
i
]
=
Saturate
<
uint8_t
>
(
std
::
roundf
(
f
+
out_zero_point
));
}
}
else
{
MACE_NOT_IMPLEMENTED
;
...
...
@@ -727,12 +748,16 @@ void ReduceOp<DeviceType::CPU, uint8_t>::Reduce2Dims(
output
[
i
]
=
tmp
;
}
}
else
if
(
type
==
ReduceType
::
SUM
)
{
const
auto
in_zero_point
=
input_tensor
->
zero_point
();
const
auto
out_zero_point
=
output_tensor
->
zero_point
();
const
auto
scale
=
input_tensor
->
scale
()
/
output_tensor
->
scale
();
for
(
index_t
i
=
start
;
i
<
end
;
i
+=
step
)
{
uint32_t
tmp
=
0
;
int32_t
sum
=
0
;
for
(
int
j
=
0
;
j
<
data_reshape_
[
1
];
++
j
)
{
tmp
+=
input
[
i
*
data_reshape_
[
1
]
+
j
];
sum
+=
input
[
i
*
data_reshape_
[
1
]
+
j
];
}
output
[
i
]
=
static_cast
<
uint8_t
>
(
tmp
+
data_reshape_
[
1
]
/
2
);
const
float
f
=
(
sum
-
in_zero_point
*
data_reshape_
[
1
])
*
scale
;
output
[
i
]
=
Saturate
<
uint8_t
>
(
std
::
roundf
(
f
+
out_zero_point
));
}
}
else
{
MACE_NOT_IMPLEMENTED
;
...
...
@@ -744,10 +769,12 @@ void ReduceOp<DeviceType::CPU, uint8_t>::Reduce2Dims(
template
<
>
void
ReduceOp
<
DeviceType
::
CPU
,
uint8_t
>::
Reduce3Dims
(
const
OpContext
*
context
,
const
uint8_t
*
input
,
ReduceType
type
,
uint8_t
*
output
)
{
const
Tensor
*
input_tensor
,
ReduceType
type
,
Tensor
*
output_tensor
)
{
utils
::
ThreadPool
&
thread_pool
=
context
->
device
()
->
cpu_runtime
()
->
thread_pool
();
const
uint8_t
*
input
=
input_tensor
->
data
<
uint8_t
>
();
uint8_t
*
output
=
output_tensor
->
mutable_data
<
uint8_t
>
();
if
(
reduce_first_axis_
)
{
thread_pool
.
Compute1D
([
=
](
index_t
start
,
index_t
end
,
index_t
step
)
{
if
(
type
==
ReduceType
::
MEAN
)
{
...
...
@@ -787,15 +814,19 @@ void ReduceOp<DeviceType::CPU, uint8_t>::Reduce3Dims(
output
[
i
]
=
tmp
;
}
}
else
if
(
type
==
ReduceType
::
SUM
)
{
const
auto
in_zero_point
=
input_tensor
->
zero_point
();
const
auto
out_zero_point
=
output_tensor
->
zero_point
();
const
auto
scale
=
input_tensor
->
scale
()
/
output_tensor
->
scale
();
for
(
index_t
i
=
start
;
i
<
end
;
i
+=
step
)
{
uint32_t
tmp
=
0
;
int32_t
sum
=
0
;
for
(
int
j
=
0
;
j
<
data_reshape_
[
2
];
++
j
)
{
for
(
int
k
=
0
;
k
<
data_reshape_
[
0
];
++
k
)
{
tmp
+=
input
[(
k
*
data_reshape_
[
1
]
+
i
)
*
data_reshape_
[
2
]
+
j
];
sum
+=
input
[(
k
*
data_reshape_
[
1
]
+
i
)
*
data_reshape_
[
2
]
+
j
];
}
}
index_t
dim
=
data_reshape_
[
0
]
*
data_reshape_
[
2
];
output
[
i
]
=
static_cast
<
uint8_t
>
(
tmp
+
dim
/
2
);
const
auto
count
=
data_reshape_
[
2
]
*
data_reshape_
[
0
];
const
float
f
=
(
sum
-
in_zero_point
*
count
)
*
scale
;
output
[
i
]
=
Saturate
<
uint8_t
>
(
std
::
roundf
(
f
+
out_zero_point
));
}
}
else
{
MACE_NOT_IMPLEMENTED
;
...
...
@@ -841,14 +872,18 @@ void ReduceOp<DeviceType::CPU, uint8_t>::Reduce3Dims(
}
}
}
else
if
(
type
==
ReduceType
::
SUM
)
{
const
auto
in_zero_point
=
input_tensor
->
zero_point
();
const
auto
out_zero_point
=
output_tensor
->
zero_point
();
const
auto
scale
=
input_tensor
->
scale
()
/
output_tensor
->
scale
();
for
(
index_t
i
=
start0
;
i
<
end0
;
i
+=
step0
)
{
for
(
index_t
j
=
start1
;
j
<
end1
;
j
+=
step1
)
{
uint32_t
tmp
=
0
;
int32_t
sum
=
0
;
for
(
int
k
=
0
;
k
<
data_reshape_
[
1
];
++
k
)
{
tmp
+=
input
[(
i
*
data_reshape_
[
1
]
+
k
)
*
data_reshape_
[
2
]
+
j
];
sum
+=
input
[(
i
*
data_reshape_
[
1
]
+
k
)
*
data_reshape_
[
2
]
+
j
];
}
const
float
f
=
(
sum
-
in_zero_point
*
data_reshape_
[
1
])
*
scale
;
output
[
i
*
data_reshape_
[
2
]
+
j
]
=
static_cast
<
uint8_t
>
(
tmp
+
data_reshape_
[
1
]
/
2
);
Saturate
<
uint8_t
>
(
std
::
roundf
(
f
+
out_zero_point
)
);
}
}
}
else
{
...
...
@@ -861,10 +896,12 @@ void ReduceOp<DeviceType::CPU, uint8_t>::Reduce3Dims(
template
<
>
void
ReduceOp
<
DeviceType
::
CPU
,
uint8_t
>::
Reduce4Dims
(
const
OpContext
*
context
,
const
uint8_t
*
input
,
ReduceType
type
,
uint8_t
*
output
)
{
const
Tensor
*
input_tensor
,
ReduceType
type
,
Tensor
*
output_tensor
)
{
utils
::
ThreadPool
&
thread_pool
=
context
->
device
()
->
cpu_runtime
()
->
thread_pool
();
const
uint8_t
*
input
=
input_tensor
->
data
<
uint8_t
>
();
uint8_t
*
output
=
output_tensor
->
mutable_data
<
uint8_t
>
();
if
(
reduce_first_axis_
)
{
thread_pool
.
Compute2D
([
=
](
index_t
start0
,
index_t
end0
,
index_t
step0
,
index_t
start1
,
index_t
end1
,
index_t
step1
)
{
...
...
@@ -914,18 +951,22 @@ void ReduceOp<DeviceType::CPU, uint8_t>::Reduce4Dims(
}
}
}
else
if
(
type
==
ReduceType
::
SUM
)
{
const
auto
in_zero_point
=
input_tensor
->
zero_point
();
const
auto
out_zero_point
=
output_tensor
->
zero_point
();
const
auto
scale
=
input_tensor
->
scale
()
/
output_tensor
->
scale
();
for
(
index_t
i
=
start0
;
i
<
end0
;
i
+=
step0
)
{
for
(
index_t
j
=
start1
;
j
<
end1
;
j
+=
step1
)
{
uint32_t
tmp
=
0
;
int32_t
sum
=
0
;
for
(
int
k
=
0
;
k
<
data_reshape_
[
2
];
++
k
)
{
for
(
int
t
=
0
;
t
<
data_reshape_
[
0
];
++
t
)
{
tmp
+=
input
[((
t
*
data_reshape_
[
1
]
+
i
)
*
sum
+=
input
[((
t
*
data_reshape_
[
1
]
+
i
)
*
data_reshape_
[
2
]
+
k
)
*
data_reshape_
[
3
]
+
j
];
}
}
index_t
dim
=
data_reshape_
[
0
]
*
data_reshape_
[
2
];
const
auto
count
=
data_reshape_
[
2
]
*
data_reshape_
[
0
];
const
float
f
=
(
sum
-
in_zero_point
*
count
)
*
scale
;
output
[
i
*
data_reshape_
[
3
]
+
j
]
=
static_cast
<
uint8_t
>
(
tmp
+
dim
/
2
);
Saturate
<
uint8_t
>
(
std
::
roundf
(
f
+
out_zero_point
)
);
}
}
}
else
{
...
...
@@ -983,18 +1024,22 @@ void ReduceOp<DeviceType::CPU, uint8_t>::Reduce4Dims(
}
}
}
else
if
(
type
==
ReduceType
::
SUM
)
{
const
auto
in_zero_point
=
input_tensor
->
zero_point
();
const
auto
out_zero_point
=
output_tensor
->
zero_point
();
const
auto
scale
=
input_tensor
->
scale
()
/
output_tensor
->
scale
();
for
(
index_t
i
=
start0
;
i
<
end0
;
i
+=
step0
)
{
for
(
index_t
j
=
start1
;
j
<
end1
;
j
+=
step1
)
{
uint32_t
tmp
=
0
;
int32_t
sum
=
0
;
for
(
int
k
=
0
;
k
<
data_reshape_
[
1
];
++
k
)
{
for
(
int
t
=
0
;
t
<
data_reshape_
[
3
];
++
t
)
{
tmp
+=
input
[((
i
*
data_reshape_
[
1
]
+
k
)
*
sum
+=
input
[((
i
*
data_reshape_
[
1
]
+
k
)
*
data_reshape_
[
2
]
+
j
)
*
data_reshape_
[
3
]
+
t
];
}
}
index_t
dim
=
data_reshape_
[
1
]
*
data_reshape_
[
3
];
const
auto
count
=
data_reshape_
[
1
]
*
data_reshape_
[
3
];
const
float
f
=
(
sum
-
in_zero_point
*
count
)
*
scale
;
output
[
i
*
data_reshape_
[
2
]
+
j
]
=
static_cast
<
uint8_t
>
(
tmp
+
dim
/
2
);
Saturate
<
uint8_t
>
(
std
::
roundf
(
f
+
out_zero_point
)
);
}
}
}
else
{
...
...
test/ccunit/mace/ops/reduce_test.cc
浏览文件 @
1bb11f64
...
...
@@ -372,6 +372,15 @@ void TestQuant(const std::vector<index_t> &input_shape,
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"OutputNCHW"
,
DataFormat
::
NCHW
,
"Output"
,
DataFormat
::
NHWC
);
OpDefBuilder
(
"Quantize"
,
"DoQuantizeOutput"
)
.
Input
(
"Output"
)
.
Output
(
"ExpectedQuantizedOutput"
)
.
OutputType
({
DT_UINT8
})
.
AddIntArg
(
"T"
,
DT_UINT8
)
.
AddIntArg
(
"non_zero"
,
true
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
();
OpDefBuilder
(
"Quantize"
,
"QuantizeInput"
)
.
Input
(
"Input"
)
.
Output
(
"QuantizedInput"
)
...
...
@@ -392,7 +401,12 @@ void TestQuant(const std::vector<index_t> &input_shape,
.
AddIntArg
(
"has_data_format"
,
1
)
.
AddIntArg
(
"T"
,
DT_UINT8
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
();
net
.
Setup
(
DeviceType
::
CPU
);
Tensor
*
expect_quantize_output
=
net
.
GetTensor
(
"ExpectedQuantizedOutput"
);
Tensor
*
quantize_output
=
net
.
GetTensor
(
"QuantizedOutput"
);
quantize_output
->
SetScale
(
expect_quantize_output
->
scale
());
quantize_output
->
SetZeroPoint
(
expect_quantize_output
->
zero_point
());
net
.
Run
();
OpDefBuilder
(
"Dequantize"
,
"DeQuantizeTest"
)
.
Input
(
"QuantizedOutput"
)
...
...
@@ -406,7 +420,7 @@ void TestQuant(const std::vector<index_t> &input_shape,
*
net
.
GetTensor
(
"DequantizedOutput"
),
0.01
);
};
for
(
ReduceType
type
:
{
MEAN
,
MIN
,
MAX
})
{
for
(
ReduceType
type
:
{
MEAN
,
MIN
,
MAX
,
SUM
})
{
func
(
type
);
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录