提交 21b43e2b 编写于 作者: 李寅

Merge branch 'compress' into 'master'

Dequantize weights to half

See merge request !1115
......@@ -23,7 +23,7 @@ namespace mace {
#ifdef MACE_ENABLE_NEON
template<>
void QuantizeUtil<uint8_t>::QuantizeWithScaleAndZeropoint(
void QuantizeUtil<float, uint8_t>::QuantizeWithScaleAndZeropoint(
const float *input,
const index_t size,
float scale,
......@@ -65,11 +65,11 @@ void QuantizeUtil<uint8_t>::QuantizeWithScaleAndZeropoint(
}
template<>
void QuantizeUtil<uint8_t>::Dequantize(const uint8_t *input,
const index_t size,
const float scale,
const int32_t zero_point,
float *output) {
void QuantizeUtil<float, uint8_t>::Dequantize(const uint8_t *input,
const index_t size,
const float scale,
const int32_t zero_point,
float *output) {
const index_t block_count = size / 16;
const int32x4_t vzero = vdupq_n_s32(zero_point);
const float32x4_t vscale = vdupq_n_f32(scale);
......@@ -104,11 +104,11 @@ void QuantizeUtil<uint8_t>::Dequantize(const uint8_t *input,
}
template<>
void QuantizeUtil<int32_t>::Dequantize(const int *input,
const index_t size,
const float scale,
const int32_t zero_point,
float *output) {
void QuantizeUtil<float, int32_t>::Dequantize(const int *input,
const index_t size,
const float scale,
const int32_t zero_point,
float *output) {
const index_t block_count = size / 4;
const int32x4_t vzero = vdupq_n_s32(zero_point);
const float32x4_t vscale = vdupq_n_f32(scale);
......
......@@ -25,7 +25,7 @@
namespace mace {
template<typename T>
template<typename Q>
inline void AdjustRange(const float in_min_data,
const float in_max_data,
const bool non_zero,
......@@ -33,8 +33,8 @@ inline void AdjustRange(const float in_min_data,
int32_t *zero_point) {
// re-range to make range include zero float and
// make zero float as integer u8
const T quantized_min = std::numeric_limits<T>::lowest();
const T quantized_max = std::numeric_limits<T>::max();
const Q quantized_min = std::numeric_limits<Q>::lowest();
const Q quantized_max = std::numeric_limits<Q>::max();
if (quantized_min < 0) {
MACE_ASSERT(!non_zero, "Cannot nudge to non_zero quantize value.");
}
......@@ -65,15 +65,15 @@ inline void AdjustRange(const float in_min_data,
}
}
template<typename T>
inline T Saturate(float value) {
template<typename Q>
inline Q Saturate(float value) {
int rounded_value = static_cast<int>(value);
if (rounded_value <= std::numeric_limits<T>::lowest()) {
return std::numeric_limits<T>::lowest();
} else if (rounded_value >= std::numeric_limits<T>::max()) {
return std::numeric_limits<T>::max();
if (rounded_value <= std::numeric_limits<Q>::lowest()) {
return std::numeric_limits<Q>::lowest();
} else if (rounded_value >= std::numeric_limits<Q>::max()) {
return std::numeric_limits<Q>::max();
} else {
return static_cast<T>(rounded_value);
return static_cast<Q>(rounded_value);
}
}
......@@ -115,7 +115,7 @@ inline void GetOutputMultiplierAndShift(
MACE_CHECK(*right_shift >= 0);
}
template<typename T>
template<typename F, typename Q>
class QuantizeUtil {
public:
explicit QuantizeUtil(utils::ThreadPool *thread_pool)
......@@ -125,11 +125,11 @@ class QuantizeUtil {
const index_t size,
float scale,
int32_t zero_point,
T *output) {
Q *output) {
float recip_scale = 1 / scale;
thread_pool_->Compute1D([=](index_t start, index_t end, index_t step) {
for (index_t i = start; i < end; i += step) {
output[i] = Saturate<T>(roundf(zero_point + recip_scale * input[i]));
output[i] = Saturate<Q>(roundf(zero_point + recip_scale * input[i]));
}
}, 0, size, 1);
}
......@@ -137,14 +137,14 @@ class QuantizeUtil {
void Quantize(const float *input,
const index_t size,
bool non_zero,
T *output,
Q *output,
float *scale,
int32_t *zero_point) {
float in_min_data;
float in_max_data;
FindMinMax(input, size, &in_min_data, &in_max_data);
AdjustRange<T>(in_min_data, in_max_data, non_zero,
AdjustRange<Q>(in_min_data, in_max_data, non_zero,
scale, zero_point);
QuantizeWithScaleAndZeropoint(input, size, *scale, *zero_point, output);
......@@ -158,24 +158,24 @@ class QuantizeUtil {
Tensor::MappingGuard input_guard(&input);
Tensor::MappingGuard output_guard(output);
auto *input_data = input.data<float>();
auto *output_data = output->mutable_data<T>();
auto *output_data = output->mutable_data<Q>();
float scale;
int32_t zero_point;
Quantize(input_data, input.size(), false, output_data, &scale, &zero_point);
*min_out = scale * (std::numeric_limits<T>::lowest() - zero_point);
*max_out = scale * (std::numeric_limits<T>::max() - zero_point);
*min_out = scale * (std::numeric_limits<Q>::lowest() - zero_point);
*max_out = scale * (std::numeric_limits<Q>::max() - zero_point);
}
void Dequantize(const T *input,
void Dequantize(const Q *input,
const index_t size,
const float scale,
const int32_t zero_point,
float *output) {
F *output) {
thread_pool_->Compute1D([=](index_t start, index_t end, index_t step) {
for (index_t i = start; i < end; i += step) {
output[i] = scale * (input[i] - zero_point);
output[i] = FloatCast<F>(scale * (input[i] - zero_point));
}
}, 0, size, 1);
}
......@@ -187,12 +187,12 @@ class QuantizeUtil {
MACE_CHECK(input.size() != 0);
Tensor::MappingGuard input_guard(&input);
Tensor::MappingGuard output_guard(output);
auto *input_data = input.data<T>();
auto *output_data = output->mutable_data<float>();
auto *input_data = input.data<Q>();
auto *output_data = output->mutable_data<F>();
float scale;
int32_t zero_point;
AdjustRange<T>(min_in, max_in, false, &scale, &zero_point);
AdjustRange<Q>(min_in, max_in, false, &scale, &zero_point);
Dequantize(input_data, input.size(), scale, zero_point, output_data);
}
......@@ -204,7 +204,7 @@ class QuantizeUtil {
#ifdef MACE_ENABLE_NEON
template<>
void QuantizeUtil<uint8_t>::QuantizeWithScaleAndZeropoint(
void QuantizeUtil<float, uint8_t>::QuantizeWithScaleAndZeropoint(
const float *input,
const index_t size,
float scale,
......@@ -212,18 +212,18 @@ void QuantizeUtil<uint8_t>::QuantizeWithScaleAndZeropoint(
uint8_t *output);
template<>
void QuantizeUtil<uint8_t>::Dequantize(const uint8_t *input,
const index_t size,
const float scale,
const int32_t zero_point,
float *output);
void QuantizeUtil<float, uint8_t>::Dequantize(const uint8_t *input,
const index_t size,
const float scale,
const int32_t zero_point,
float *output);
template<>
void QuantizeUtil<int32_t>::Dequantize(const int *input,
const index_t size,
const float scale,
const int32_t zero_point,
float *output);
void QuantizeUtil<float, int32_t>::Dequantize(const int *input,
const index_t size,
const float scale,
const int32_t zero_point,
float *output);
#endif
......
......@@ -55,7 +55,7 @@ struct tensor_info {
ApuFrontend* frontend;
std::vector<tensor_info> input_infos;
std::vector<tensor_info> output_infos;
QuantizeUtil<uint8_t> quantize_util_;
QuantizeUtil<float, uint8_t> quantize_util_;
};
} // namespace mace
......
......@@ -50,7 +50,7 @@ class HexagonHTAWrapper : public HexagonControlWrapper {
void SetDebugLevel(int level) override;
private:
QuantizeUtil<uint8_t> quantize_util_;
QuantizeUtil<float, uint8_t> quantize_util_;
MACE_DISABLE_COPY_AND_ASSIGN(HexagonHTAWrapper);
};
} // namespace mace
......
......@@ -66,6 +66,16 @@ enum FrameworkType {
CAFFE = 1,
};
template <typename T>
inline T FloatCast(float data) {
return data;
}
template <>
inline half FloatCast(float data) {
return half_float::half_cast<half>(data);
}
} // namespace mace
#endif // MACE_CORE_TYPES_H_
......@@ -46,6 +46,24 @@ bool HasHalfTensor(const NetDef &net_def) {
return false;
}
template <typename T>
void DequantizeTensor(Device *device,
const unsigned char *model_data,
const ConstTensor &const_tensor,
Tensor *output_tensor) {
Tensor::MappingGuard guard(output_tensor);
auto quantized_data = reinterpret_cast<const uint8_t *>(
model_data + const_tensor.offset());
auto dequantized_data = output_tensor->mutable_data<T>();
QuantizeUtil<T, uint8_t>
quantize_util(&device->cpu_runtime()->thread_pool());
quantize_util.Dequantize(quantized_data,
output_tensor->size(),
const_tensor.scale(),
const_tensor.zero_point(),
dequantized_data);
}
} // namespace
Workspace::Workspace() = default;
......@@ -125,10 +143,15 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def,
}
DataType dst_data_type = const_tensor.data_type();
if ((device_type == DeviceType::CPU &&
const_tensor.data_type() == DataType::DT_HALF) ||
(!is_quantize_model && const_tensor.quantized())) {
if (device_type == DeviceType::CPU &&
const_tensor.data_type() == DataType::DT_HALF) {
dst_data_type = DataType::DT_FLOAT;
} else if (!is_quantize_model && const_tensor.quantized()) {
if (device_type == GPU && net_def.data_type() != DataType::DT_FLOAT) {
dst_data_type = DataType::DT_HALF;
} else {
dst_data_type = DataType::DT_FLOAT;
}
}
std::unique_ptr<Tensor> tensor(
......@@ -159,17 +182,17 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def,
}
} else if (!is_quantize_model && const_tensor.quantized()) {
// uncompress the weights of uint8
Tensor::MappingGuard guard(tensor.get());
auto quantized_data = reinterpret_cast<const uint8_t *>(
model_data + const_tensor.offset());
auto dequantized_data = tensor->mutable_data<float>();
QuantizeUtil<uint8_t>
quantize_util(&device->cpu_runtime()->thread_pool());
quantize_util.Dequantize(quantized_data,
tensor->size(),
const_tensor.scale(),
const_tensor.zero_point(),
dequantized_data);
if (dst_data_type != DT_FLOAT) {
DequantizeTensor<half>(device,
model_data,
const_tensor,
tensor.get());
} else {
DequantizeTensor<float>(device,
model_data,
const_tensor,
tensor.get());
}
} else {
tensor->CopyBytes(model_data + const_tensor.offset(),
const_tensor.data_size() *
......
......@@ -72,7 +72,7 @@ class QuantizeOp<DeviceType::CPU, uint8_t> : public Operation {
private:
bool non_zero_;
bool find_range_every_time_;
QuantizeUtil<uint8_t> quantize_util_;
QuantizeUtil<float, uint8_t> quantize_util_;
};
template<DeviceType D, class T>
......@@ -103,7 +103,7 @@ class DequantizeOp<DeviceType::CPU, T> : public Operation {
}
private:
QuantizeUtil<T> quantize_util_;
QuantizeUtil<float, T> quantize_util_;
};
void RegisterQuantize(OpRegistryBase *op_registry) {
......
......@@ -101,6 +101,7 @@ message NetDef {
repeated OperatorDef op = 1;
repeated Argument arg = 2;
repeated ConstTensor tensors = 3;
optional DataType data_type = 4 [default = DT_FLOAT];
repeated InputOutputInfo input_info = 100;
repeated InputOutputInfo output_info = 101;
......
......@@ -281,6 +281,7 @@ def save_model(option, net_def, model_checksum, weight_checksum, template_dir,
obfuscate_name(option, net_def)
output_dir = output_dir + '/'
net_def.data_type = option.data_type
# update tensor type
update_tensor_infos(net_def, option.data_type)
......
......@@ -1172,7 +1172,8 @@ void TestQuant(const index_t batch,
auto bias_data = bias->data<float>();
float bias_scale = q_input->scale() * q_filter->scale();
std::vector<int32_t> q_bias(bias->size());
QuantizeUtil<int32_t> quantize_util(OpTestContext::Get()->thread_pool());
QuantizeUtil<float, int32_t>
quantize_util(OpTestContext::Get()->thread_pool());
quantize_util.QuantizeWithScaleAndZeropoint(
bias_data, bias->size(), bias_scale, 0, q_bias.data());
net.AddInputFromArray<DeviceType::CPU, int32_t>(
......
......@@ -440,7 +440,8 @@ void TestQuant(const index_t batch,
auto bias_data = bias->data<float>();
float bias_scale = q_input->scale() * q_filter->scale();
std::vector<int32_t> q_bias(bias->size());
QuantizeUtil<int32_t> quantize_util(OpTestContext::Get()->thread_pool());
QuantizeUtil<float, int32_t>
quantize_util(OpTestContext::Get()->thread_pool());
quantize_util.QuantizeWithScaleAndZeropoint(
bias_data, bias->size(), bias_scale, 0, q_bias.data());
net.AddInputFromArray<DeviceType::CPU, int32_t>(
......
......@@ -267,7 +267,8 @@ void QuantRandom(const index_t batch,
float bias_scale = q_input->scale() * q_weight->scale();
std::vector<int32_t> q_bias(bias->size());
QuantizeUtil<int32_t> quantize_util(OpTestContext::Get()->thread_pool());
QuantizeUtil<float, int32_t>
quantize_util(OpTestContext::Get()->thread_pool());
quantize_util.QuantizeWithScaleAndZeropoint(
bias_data, bias->size(), bias_scale, 0, q_bias.data());
net.AddInputFromArray<DeviceType::CPU, int32_t>(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册