提交 c509b1e6 编写于 作者: H hjchen2

Fix offline quantize to make sure it will not overflow

上级 4f63b086
......@@ -34,14 +34,66 @@ inline float32_t vmaxvq_f32(float32x4_t r) {
#endif
template <RoundType R>
static void Quantize(const Tensor *input, const float scale, Tensor *output) {
inline void QuantizeOffline(const Tensor *input, const float scale,
const float max_abs, Tensor *output) {
const float *x = input->data<const float>();
int8_t *y = output->mutable_data<int8_t>();
size_t remain = input->numel();
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
size_t loop = remain >> 4;
remain = remain & 0xF;
float32x4_t __scale = vdupq_n_f32(scale);
float32x4_t __postive_max = vdupq_n_f32(max_abs);
float32x4_t __negtive_max = vdupq_n_f32(-max_abs);
#pragma omp parallel for
for (size_t i = 0; i < loop; ++i) {
const float *local_x = x + (i << 4);
int8_t *local_y = y + (i << 4);
float32x4_t r0 = vld1q_f32(local_x);
float32x4_t r1 = vld1q_f32(local_x + 4);
float32x4_t r2 = vld1q_f32(local_x + 8);
float32x4_t r3 = vld1q_f32(local_x + 12);
r0 = vmaxq_f32(vminq_f32(r0, __postive_max), __negtive_max);
r1 = vmaxq_f32(vminq_f32(r1, __postive_max), __negtive_max);
r2 = vmaxq_f32(vminq_f32(r2, __postive_max), __negtive_max);
r3 = vmaxq_f32(vminq_f32(r3, __postive_max), __negtive_max);
r0 = vmulq_f32(r0, __scale);
r1 = vmulq_f32(r1, __scale);
r2 = vmulq_f32(r2, __scale);
r3 = vmulq_f32(r3, __scale);
int32x4_t q0 = math::vRoundq_f32<R>(r0);
int32x4_t q1 = math::vRoundq_f32<R>(r1);
int32x4_t q2 = math::vRoundq_f32<R>(r2);
int32x4_t q3 = math::vRoundq_f32<R>(r3);
int16x4_t d0 = vmovn_s32(q0);
int16x4_t d1 = vmovn_s32(q1);
int16x4_t d2 = vmovn_s32(q2);
int16x4_t d3 = vmovn_s32(q3);
int16x8_t q5 = vcombine_s16(d0, d1);
int16x8_t q6 = vcombine_s16(d2, d3);
int8x8_t d5 = vmovn_s16(q5);
int8x8_t d6 = vmovn_s16(q6);
vst1_s8(local_y, d5);
vst1_s8(local_y + 8, d6);
}
x += (loop << 4);
y += (loop << 4);
#endif
for (size_t i = 0; i < remain; ++i) {
float x_temp = std::max(std::min(x[i], max_abs), -max_abs);
y[i] = math::Round<R>(x_temp * scale);
}
}
template <RoundType R>
inline void QuantizeOnline(const Tensor *input, const float scale,
Tensor *output) {
const float *x = input->data<const float>();
int8_t *y = output->mutable_data<int8_t>();
size_t remain = input->numel();
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
size_t loop = remain >> 4;
remain = remain & 0xF;
float32x4_t __scale = vdupq_n_f32(scale);
#pragma omp parallel for
for (size_t i = 0; i < loop; ++i) {
......@@ -78,6 +130,17 @@ static void Quantize(const Tensor *input, const float scale, Tensor *output) {
}
}
template <RoundType R>
static void Quantize(const Tensor *input, const float max_abs,
const bool offline, Tensor *output) {
float scale = 127.f / max_abs;
if (offline) {
QuantizeOffline<R>(input, scale, max_abs, output);
} else {
QuantizeOnline<R>(input, scale, output);
}
}
float find_abs_max(const Tensor *input) {
float max_abs = 0.f;
const float *x = input->data<const float>();
......@@ -133,18 +196,17 @@ void QuantizeKernel<CPU, float>::Compute(const QuantizeParam<CPU> &param) {
max_abs = find_abs_max(input);
}
max_abs = std::max(max_abs, 1e-6f);
// only support int8 currently
float scale = 127 / max_abs;
param.online_scale_->mutable_data<float>()[0] = max_abs;
switch (param.round_type_) {
case ROUND_NEAREST_TO_EVEN:
Quantize<ROUND_NEAREST_TO_EVEN>(input, scale, output);
Quantize<ROUND_NEAREST_TO_EVEN>(input, max_abs, param.offline_, output);
break;
case ROUND_NEAREST_TOWARDS_ZERO:
Quantize<ROUND_NEAREST_TOWARDS_ZERO>(input, scale, output);
Quantize<ROUND_NEAREST_TOWARDS_ZERO>(input, max_abs, param.offline_,
output);
break;
case ROUND_NEAREST_AWAY_ZERO:
Quantize<ROUND_NEAREST_AWAY_ZERO>(input, scale, output);
Quantize<ROUND_NEAREST_AWAY_ZERO>(input, max_abs, param.offline_, output);
break;
default:
LOG(kLOG_ERROR) << "round type is not supported.";
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册