提交 23bd28c8 编写于 作者: B Bin Li

Dequantize weights to half

上级 23d985f7
...@@ -23,7 +23,7 @@ namespace mace { ...@@ -23,7 +23,7 @@ namespace mace {
#ifdef MACE_ENABLE_NEON #ifdef MACE_ENABLE_NEON
template<> template<>
void QuantizeUtil<uint8_t>::QuantizeWithScaleAndZeropoint( void QuantizeUtil<float, uint8_t>::QuantizeWithScaleAndZeropoint(
const float *input, const float *input,
const index_t size, const index_t size,
float scale, float scale,
...@@ -65,11 +65,11 @@ void QuantizeUtil<uint8_t>::QuantizeWithScaleAndZeropoint( ...@@ -65,11 +65,11 @@ void QuantizeUtil<uint8_t>::QuantizeWithScaleAndZeropoint(
} }
template<> template<>
void QuantizeUtil<uint8_t>::Dequantize(const uint8_t *input, void QuantizeUtil<float, uint8_t>::Dequantize(const uint8_t *input,
const index_t size, const index_t size,
const float scale, const float scale,
const int32_t zero_point, const int32_t zero_point,
float *output) { float *output) {
const index_t block_count = size / 16; const index_t block_count = size / 16;
const int32x4_t vzero = vdupq_n_s32(zero_point); const int32x4_t vzero = vdupq_n_s32(zero_point);
const float32x4_t vscale = vdupq_n_f32(scale); const float32x4_t vscale = vdupq_n_f32(scale);
...@@ -104,11 +104,11 @@ void QuantizeUtil<uint8_t>::Dequantize(const uint8_t *input, ...@@ -104,11 +104,11 @@ void QuantizeUtil<uint8_t>::Dequantize(const uint8_t *input,
} }
template<> template<>
void QuantizeUtil<int32_t>::Dequantize(const int *input, void QuantizeUtil<float, int32_t>::Dequantize(const int *input,
const index_t size, const index_t size,
const float scale, const float scale,
const int32_t zero_point, const int32_t zero_point,
float *output) { float *output) {
const index_t block_count = size / 4; const index_t block_count = size / 4;
const int32x4_t vzero = vdupq_n_s32(zero_point); const int32x4_t vzero = vdupq_n_s32(zero_point);
const float32x4_t vscale = vdupq_n_f32(scale); const float32x4_t vscale = vdupq_n_f32(scale);
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
namespace mace { namespace mace {
template<typename T> template<typename Q>
inline void AdjustRange(const float in_min_data, inline void AdjustRange(const float in_min_data,
const float in_max_data, const float in_max_data,
const bool non_zero, const bool non_zero,
...@@ -33,8 +33,8 @@ inline void AdjustRange(const float in_min_data, ...@@ -33,8 +33,8 @@ inline void AdjustRange(const float in_min_data,
int32_t *zero_point) { int32_t *zero_point) {
// re-range to make range include zero float and // re-range to make range include zero float and
// make zero float as integer u8 // make zero float as integer u8
const T quantized_min = std::numeric_limits<T>::lowest(); const Q quantized_min = std::numeric_limits<Q>::lowest();
const T quantized_max = std::numeric_limits<T>::max(); const Q quantized_max = std::numeric_limits<Q>::max();
if (quantized_min < 0) { if (quantized_min < 0) {
MACE_ASSERT(!non_zero, "Cannot nudge to non_zero quantize value."); MACE_ASSERT(!non_zero, "Cannot nudge to non_zero quantize value.");
} }
...@@ -65,15 +65,15 @@ inline void AdjustRange(const float in_min_data, ...@@ -65,15 +65,15 @@ inline void AdjustRange(const float in_min_data,
} }
} }
template<typename T> template<typename Q>
inline T Saturate(float value) { inline Q Saturate(float value) {
int rounded_value = static_cast<int>(value); int rounded_value = static_cast<int>(value);
if (rounded_value <= std::numeric_limits<T>::lowest()) { if (rounded_value <= std::numeric_limits<Q>::lowest()) {
return std::numeric_limits<T>::lowest(); return std::numeric_limits<Q>::lowest();
} else if (rounded_value >= std::numeric_limits<T>::max()) { } else if (rounded_value >= std::numeric_limits<Q>::max()) {
return std::numeric_limits<T>::max(); return std::numeric_limits<Q>::max();
} else { } else {
return static_cast<T>(rounded_value); return static_cast<Q>(rounded_value);
} }
} }
...@@ -115,7 +115,7 @@ inline void GetOutputMultiplierAndShift( ...@@ -115,7 +115,7 @@ inline void GetOutputMultiplierAndShift(
MACE_CHECK(*right_shift >= 0); MACE_CHECK(*right_shift >= 0);
} }
template<typename T> template<typename F, typename Q>
class QuantizeUtil { class QuantizeUtil {
public: public:
explicit QuantizeUtil(utils::ThreadPool *thread_pool) explicit QuantizeUtil(utils::ThreadPool *thread_pool)
...@@ -125,11 +125,11 @@ class QuantizeUtil { ...@@ -125,11 +125,11 @@ class QuantizeUtil {
const index_t size, const index_t size,
float scale, float scale,
int32_t zero_point, int32_t zero_point,
T *output) { Q *output) {
float recip_scale = 1 / scale; float recip_scale = 1 / scale;
thread_pool_->Compute1D([=](index_t start, index_t end, index_t step) { thread_pool_->Compute1D([=](index_t start, index_t end, index_t step) {
for (index_t i = start; i < end; i += step) { for (index_t i = start; i < end; i += step) {
output[i] = Saturate<T>(roundf(zero_point + recip_scale * input[i])); output[i] = Saturate<Q>(roundf(zero_point + recip_scale * input[i]));
} }
}, 0, size, 1); }, 0, size, 1);
} }
...@@ -137,14 +137,14 @@ class QuantizeUtil { ...@@ -137,14 +137,14 @@ class QuantizeUtil {
void Quantize(const float *input, void Quantize(const float *input,
const index_t size, const index_t size,
bool non_zero, bool non_zero,
T *output, Q *output,
float *scale, float *scale,
int32_t *zero_point) { int32_t *zero_point) {
float in_min_data; float in_min_data;
float in_max_data; float in_max_data;
FindMinMax(input, size, &in_min_data, &in_max_data); FindMinMax(input, size, &in_min_data, &in_max_data);
AdjustRange<T>(in_min_data, in_max_data, non_zero, AdjustRange<Q>(in_min_data, in_max_data, non_zero,
scale, zero_point); scale, zero_point);
QuantizeWithScaleAndZeropoint(input, size, *scale, *zero_point, output); QuantizeWithScaleAndZeropoint(input, size, *scale, *zero_point, output);
...@@ -158,24 +158,24 @@ class QuantizeUtil { ...@@ -158,24 +158,24 @@ class QuantizeUtil {
Tensor::MappingGuard input_guard(&input); Tensor::MappingGuard input_guard(&input);
Tensor::MappingGuard output_guard(output); Tensor::MappingGuard output_guard(output);
auto *input_data = input.data<float>(); auto *input_data = input.data<float>();
auto *output_data = output->mutable_data<T>(); auto *output_data = output->mutable_data<Q>();
float scale; float scale;
int32_t zero_point; int32_t zero_point;
Quantize(input_data, input.size(), false, output_data, &scale, &zero_point); Quantize(input_data, input.size(), false, output_data, &scale, &zero_point);
*min_out = scale * (std::numeric_limits<T>::lowest() - zero_point); *min_out = scale * (std::numeric_limits<Q>::lowest() - zero_point);
*max_out = scale * (std::numeric_limits<T>::max() - zero_point); *max_out = scale * (std::numeric_limits<Q>::max() - zero_point);
} }
void Dequantize(const T *input, void Dequantize(const Q *input,
const index_t size, const index_t size,
const float scale, const float scale,
const int32_t zero_point, const int32_t zero_point,
float *output) { F *output) {
thread_pool_->Compute1D([=](index_t start, index_t end, index_t step) { thread_pool_->Compute1D([=](index_t start, index_t end, index_t step) {
for (index_t i = start; i < end; i += step) { for (index_t i = start; i < end; i += step) {
output[i] = scale * (input[i] - zero_point); output[i] = FloatCast<F>(scale * (input[i] - zero_point));
} }
}, 0, size, 1); }, 0, size, 1);
} }
...@@ -187,12 +187,12 @@ class QuantizeUtil { ...@@ -187,12 +187,12 @@ class QuantizeUtil {
MACE_CHECK(input.size() != 0); MACE_CHECK(input.size() != 0);
Tensor::MappingGuard input_guard(&input); Tensor::MappingGuard input_guard(&input);
Tensor::MappingGuard output_guard(output); Tensor::MappingGuard output_guard(output);
auto *input_data = input.data<T>(); auto *input_data = input.data<Q>();
auto *output_data = output->mutable_data<float>(); auto *output_data = output->mutable_data<F>();
float scale; float scale;
int32_t zero_point; int32_t zero_point;
AdjustRange<T>(min_in, max_in, false, &scale, &zero_point); AdjustRange<Q>(min_in, max_in, false, &scale, &zero_point);
Dequantize(input_data, input.size(), scale, zero_point, output_data); Dequantize(input_data, input.size(), scale, zero_point, output_data);
} }
...@@ -204,7 +204,7 @@ class QuantizeUtil { ...@@ -204,7 +204,7 @@ class QuantizeUtil {
#ifdef MACE_ENABLE_NEON #ifdef MACE_ENABLE_NEON
template<> template<>
void QuantizeUtil<uint8_t>::QuantizeWithScaleAndZeropoint( void QuantizeUtil<float, uint8_t>::QuantizeWithScaleAndZeropoint(
const float *input, const float *input,
const index_t size, const index_t size,
float scale, float scale,
...@@ -212,18 +212,18 @@ void QuantizeUtil<uint8_t>::QuantizeWithScaleAndZeropoint( ...@@ -212,18 +212,18 @@ void QuantizeUtil<uint8_t>::QuantizeWithScaleAndZeropoint(
uint8_t *output); uint8_t *output);
template<> template<>
void QuantizeUtil<uint8_t>::Dequantize(const uint8_t *input, void QuantizeUtil<float, uint8_t>::Dequantize(const uint8_t *input,
const index_t size, const index_t size,
const float scale, const float scale,
const int32_t zero_point, const int32_t zero_point,
float *output); float *output);
template<> template<>
void QuantizeUtil<int32_t>::Dequantize(const int *input, void QuantizeUtil<float, int32_t>::Dequantize(const int *input,
const index_t size, const index_t size,
const float scale, const float scale,
const int32_t zero_point, const int32_t zero_point,
float *output); float *output);
#endif #endif
......
...@@ -55,7 +55,7 @@ struct tensor_info { ...@@ -55,7 +55,7 @@ struct tensor_info {
ApuFrontend* frontend; ApuFrontend* frontend;
std::vector<tensor_info> input_infos; std::vector<tensor_info> input_infos;
std::vector<tensor_info> output_infos; std::vector<tensor_info> output_infos;
QuantizeUtil<uint8_t> quantize_util_; QuantizeUtil<float, uint8_t> quantize_util_;
}; };
} // namespace mace } // namespace mace
......
...@@ -50,7 +50,7 @@ class HexagonHTAWrapper : public HexagonControlWrapper { ...@@ -50,7 +50,7 @@ class HexagonHTAWrapper : public HexagonControlWrapper {
void SetDebugLevel(int level) override; void SetDebugLevel(int level) override;
private: private:
QuantizeUtil<uint8_t> quantize_util_; QuantizeUtil<float, uint8_t> quantize_util_;
MACE_DISABLE_COPY_AND_ASSIGN(HexagonHTAWrapper); MACE_DISABLE_COPY_AND_ASSIGN(HexagonHTAWrapper);
}; };
} // namespace mace } // namespace mace
......
...@@ -66,6 +66,16 @@ enum FrameworkType { ...@@ -66,6 +66,16 @@ enum FrameworkType {
CAFFE = 1, CAFFE = 1,
}; };
template <typename T>
inline T FloatCast(float data) {
return data;
}
template <>
inline half FloatCast(float data) {
return half_float::half_cast<half>(data);
}
} // namespace mace } // namespace mace
#endif // MACE_CORE_TYPES_H_ #endif // MACE_CORE_TYPES_H_
...@@ -46,6 +46,24 @@ bool HasHalfTensor(const NetDef &net_def) { ...@@ -46,6 +46,24 @@ bool HasHalfTensor(const NetDef &net_def) {
return false; return false;
} }
template <typename T>
void DequantizeTensor(Device *device,
const unsigned char *model_data,
const ConstTensor &const_tensor,
Tensor *output_tensor) {
Tensor::MappingGuard guard(output_tensor);
auto quantized_data = reinterpret_cast<const uint8_t *>(
model_data + const_tensor.offset());
auto dequantized_data = output_tensor->mutable_data<T>();
QuantizeUtil<T, uint8_t>
quantize_util(&device->cpu_runtime()->thread_pool());
quantize_util.Dequantize(quantized_data,
output_tensor->size(),
const_tensor.scale(),
const_tensor.zero_point(),
dequantized_data);
}
} // namespace } // namespace
Workspace::Workspace() = default; Workspace::Workspace() = default;
...@@ -125,10 +143,15 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, ...@@ -125,10 +143,15 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def,
} }
DataType dst_data_type = const_tensor.data_type(); DataType dst_data_type = const_tensor.data_type();
if ((device_type == DeviceType::CPU && if (device_type == DeviceType::CPU &&
const_tensor.data_type() == DataType::DT_HALF) || const_tensor.data_type() == DataType::DT_HALF) {
(!is_quantize_model && const_tensor.quantized())) {
dst_data_type = DataType::DT_FLOAT; dst_data_type = DataType::DT_FLOAT;
} else if (!is_quantize_model && const_tensor.quantized()) {
if (device_type == GPU && net_def.data_type() != DataType::DT_FLOAT) {
dst_data_type = DataType::DT_HALF;
} else {
dst_data_type = DataType::DT_FLOAT;
}
} }
std::unique_ptr<Tensor> tensor( std::unique_ptr<Tensor> tensor(
...@@ -159,17 +182,17 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, ...@@ -159,17 +182,17 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def,
} }
} else if (!is_quantize_model && const_tensor.quantized()) { } else if (!is_quantize_model && const_tensor.quantized()) {
// uncompress the weights of uint8 // uncompress the weights of uint8
Tensor::MappingGuard guard(tensor.get()); if (dst_data_type != DT_FLOAT) {
auto quantized_data = reinterpret_cast<const uint8_t *>( DequantizeTensor<half>(device,
model_data + const_tensor.offset()); model_data,
auto dequantized_data = tensor->mutable_data<float>(); const_tensor,
QuantizeUtil<uint8_t> tensor.get());
quantize_util(&device->cpu_runtime()->thread_pool()); } else {
quantize_util.Dequantize(quantized_data, DequantizeTensor<float>(device,
tensor->size(), model_data,
const_tensor.scale(), const_tensor,
const_tensor.zero_point(), tensor.get());
dequantized_data); }
} else { } else {
tensor->CopyBytes(model_data + const_tensor.offset(), tensor->CopyBytes(model_data + const_tensor.offset(),
const_tensor.data_size() * const_tensor.data_size() *
......
...@@ -72,7 +72,7 @@ class QuantizeOp<DeviceType::CPU, uint8_t> : public Operation { ...@@ -72,7 +72,7 @@ class QuantizeOp<DeviceType::CPU, uint8_t> : public Operation {
private: private:
bool non_zero_; bool non_zero_;
bool find_range_every_time_; bool find_range_every_time_;
QuantizeUtil<uint8_t> quantize_util_; QuantizeUtil<float, uint8_t> quantize_util_;
}; };
template<DeviceType D, class T> template<DeviceType D, class T>
...@@ -103,7 +103,7 @@ class DequantizeOp<DeviceType::CPU, T> : public Operation { ...@@ -103,7 +103,7 @@ class DequantizeOp<DeviceType::CPU, T> : public Operation {
} }
private: private:
QuantizeUtil<T> quantize_util_; QuantizeUtil<float, T> quantize_util_;
}; };
void RegisterQuantize(OpRegistryBase *op_registry) { void RegisterQuantize(OpRegistryBase *op_registry) {
......
...@@ -101,6 +101,7 @@ message NetDef { ...@@ -101,6 +101,7 @@ message NetDef {
repeated OperatorDef op = 1; repeated OperatorDef op = 1;
repeated Argument arg = 2; repeated Argument arg = 2;
repeated ConstTensor tensors = 3; repeated ConstTensor tensors = 3;
optional DataType data_type = 4 [default = DT_FLOAT];
repeated InputOutputInfo input_info = 100; repeated InputOutputInfo input_info = 100;
repeated InputOutputInfo output_info = 101; repeated InputOutputInfo output_info = 101;
......
...@@ -281,6 +281,7 @@ def save_model(option, net_def, model_checksum, weight_checksum, template_dir, ...@@ -281,6 +281,7 @@ def save_model(option, net_def, model_checksum, weight_checksum, template_dir,
obfuscate_name(option, net_def) obfuscate_name(option, net_def)
output_dir = output_dir + '/' output_dir = output_dir + '/'
net_def.data_type = option.data_type
# update tensor type # update tensor type
update_tensor_infos(net_def, option.data_type) update_tensor_infos(net_def, option.data_type)
......
...@@ -1172,7 +1172,8 @@ void TestQuant(const index_t batch, ...@@ -1172,7 +1172,8 @@ void TestQuant(const index_t batch,
auto bias_data = bias->data<float>(); auto bias_data = bias->data<float>();
float bias_scale = q_input->scale() * q_filter->scale(); float bias_scale = q_input->scale() * q_filter->scale();
std::vector<int32_t> q_bias(bias->size()); std::vector<int32_t> q_bias(bias->size());
QuantizeUtil<int32_t> quantize_util(OpTestContext::Get()->thread_pool()); QuantizeUtil<float, int32_t>
quantize_util(OpTestContext::Get()->thread_pool());
quantize_util.QuantizeWithScaleAndZeropoint( quantize_util.QuantizeWithScaleAndZeropoint(
bias_data, bias->size(), bias_scale, 0, q_bias.data()); bias_data, bias->size(), bias_scale, 0, q_bias.data());
net.AddInputFromArray<DeviceType::CPU, int32_t>( net.AddInputFromArray<DeviceType::CPU, int32_t>(
......
...@@ -440,7 +440,8 @@ void TestQuant(const index_t batch, ...@@ -440,7 +440,8 @@ void TestQuant(const index_t batch,
auto bias_data = bias->data<float>(); auto bias_data = bias->data<float>();
float bias_scale = q_input->scale() * q_filter->scale(); float bias_scale = q_input->scale() * q_filter->scale();
std::vector<int32_t> q_bias(bias->size()); std::vector<int32_t> q_bias(bias->size());
QuantizeUtil<int32_t> quantize_util(OpTestContext::Get()->thread_pool()); QuantizeUtil<float, int32_t>
quantize_util(OpTestContext::Get()->thread_pool());
quantize_util.QuantizeWithScaleAndZeropoint( quantize_util.QuantizeWithScaleAndZeropoint(
bias_data, bias->size(), bias_scale, 0, q_bias.data()); bias_data, bias->size(), bias_scale, 0, q_bias.data());
net.AddInputFromArray<DeviceType::CPU, int32_t>( net.AddInputFromArray<DeviceType::CPU, int32_t>(
......
...@@ -267,7 +267,8 @@ void QuantRandom(const index_t batch, ...@@ -267,7 +267,8 @@ void QuantRandom(const index_t batch,
float bias_scale = q_input->scale() * q_weight->scale(); float bias_scale = q_input->scale() * q_weight->scale();
std::vector<int32_t> q_bias(bias->size()); std::vector<int32_t> q_bias(bias->size());
QuantizeUtil<int32_t> quantize_util(OpTestContext::Get()->thread_pool()); QuantizeUtil<float, int32_t>
quantize_util(OpTestContext::Get()->thread_pool());
quantize_util.QuantizeWithScaleAndZeropoint( quantize_util.QuantizeWithScaleAndZeropoint(
bias_data, bias->size(), bias_scale, 0, q_bias.data()); bias_data, bias->size(), bias_scale, 0, q_bias.data());
net.AddInputFromArray<DeviceType::CPU, int32_t>( net.AddInputFromArray<DeviceType::CPU, int32_t>(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册