From 23bd28c86af4f0902542356830d3fd42874bacdb Mon Sep 17 00:00:00 2001 From: Bin Li Date: Wed, 22 May 2019 16:29:40 +0800 Subject: [PATCH] Dequantize weights to half --- mace/core/quantize.cc | 22 +++--- mace/core/quantize.h | 70 +++++++++---------- mace/core/runtime/apu/apu_wrapper.h | 2 +- .../runtime/hexagon/hexagon_hta_wrapper.h | 2 +- mace/core/types.h | 10 +++ mace/core/workspace.cc | 51 ++++++++++---- mace/ops/arm/q8/quantize.cc | 4 +- mace/proto/mace.proto | 1 + mace/python/tools/model_saver.py | 1 + test/ccunit/mace/ops/conv_2d_test.cc | 3 +- test/ccunit/mace/ops/depthwise_conv2d_test.cc | 3 +- test/ccunit/mace/ops/fully_connected_test.cc | 3 +- 12 files changed, 105 insertions(+), 67 deletions(-) diff --git a/mace/core/quantize.cc b/mace/core/quantize.cc index ec4c65ac..b600ab31 100644 --- a/mace/core/quantize.cc +++ b/mace/core/quantize.cc @@ -23,7 +23,7 @@ namespace mace { #ifdef MACE_ENABLE_NEON template<> -void QuantizeUtil::QuantizeWithScaleAndZeropoint( +void QuantizeUtil::QuantizeWithScaleAndZeropoint( const float *input, const index_t size, float scale, @@ -65,11 +65,11 @@ void QuantizeUtil::QuantizeWithScaleAndZeropoint( } template<> -void QuantizeUtil::Dequantize(const uint8_t *input, - const index_t size, - const float scale, - const int32_t zero_point, - float *output) { +void QuantizeUtil::Dequantize(const uint8_t *input, + const index_t size, + const float scale, + const int32_t zero_point, + float *output) { const index_t block_count = size / 16; const int32x4_t vzero = vdupq_n_s32(zero_point); const float32x4_t vscale = vdupq_n_f32(scale); @@ -104,11 +104,11 @@ void QuantizeUtil::Dequantize(const uint8_t *input, } template<> -void QuantizeUtil::Dequantize(const int *input, - const index_t size, - const float scale, - const int32_t zero_point, - float *output) { +void QuantizeUtil::Dequantize(const int *input, + const index_t size, + const float scale, + const int32_t zero_point, + float *output) { const index_t block_count = size / 4; const int32x4_t vzero = vdupq_n_s32(zero_point); const float32x4_t vscale = vdupq_n_f32(scale); diff --git a/mace/core/quantize.h b/mace/core/quantize.h index 3e755bf0..00fb3db9 100644 --- a/mace/core/quantize.h +++ b/mace/core/quantize.h @@ -25,7 +25,7 @@ namespace mace { -template +template inline void AdjustRange(const float in_min_data, const float in_max_data, const bool non_zero, @@ -33,8 +33,8 @@ inline void AdjustRange(const float in_min_data, int32_t *zero_point) { // re-range to make range include zero float and // make zero float as integer u8 - const T quantized_min = std::numeric_limits::lowest(); - const T quantized_max = std::numeric_limits::max(); + const Q quantized_min = std::numeric_limits::lowest(); + const Q quantized_max = std::numeric_limits::max(); if (quantized_min < 0) { MACE_ASSERT(!non_zero, "Cannot nudge to non_zero quantize value."); } @@ -65,15 +65,15 @@ inline void AdjustRange(const float in_min_data, } } -template -inline T Saturate(float value) { +template +inline Q Saturate(float value) { int rounded_value = static_cast(value); - if (rounded_value <= std::numeric_limits::lowest()) { - return std::numeric_limits::lowest(); - } else if (rounded_value >= std::numeric_limits::max()) { - return std::numeric_limits::max(); + if (rounded_value <= std::numeric_limits::lowest()) { + return std::numeric_limits::lowest(); + } else if (rounded_value >= std::numeric_limits::max()) { + return std::numeric_limits::max(); } else { - return static_cast(rounded_value); + return static_cast(rounded_value); } } @@ -115,7 +115,7 @@ inline void GetOutputMultiplierAndShift( MACE_CHECK(*right_shift >= 0); } -template +template class QuantizeUtil { public: explicit QuantizeUtil(utils::ThreadPool *thread_pool) @@ -125,11 +125,11 @@ class QuantizeUtil { const index_t size, float scale, int32_t zero_point, - T *output) { + Q *output) { float recip_scale = 1 / scale; thread_pool_->Compute1D([=](index_t start, index_t end, index_t step) { for (index_t i = start; i < end; i += step) { - output[i] = Saturate(roundf(zero_point + recip_scale * input[i])); + output[i] = Saturate(roundf(zero_point + recip_scale * input[i])); } }, 0, size, 1); } @@ -137,14 +137,14 @@ class QuantizeUtil { void Quantize(const float *input, const index_t size, bool non_zero, - T *output, + Q *output, float *scale, int32_t *zero_point) { float in_min_data; float in_max_data; FindMinMax(input, size, &in_min_data, &in_max_data); - AdjustRange(in_min_data, in_max_data, non_zero, + AdjustRange(in_min_data, in_max_data, non_zero, scale, zero_point); QuantizeWithScaleAndZeropoint(input, size, *scale, *zero_point, output); @@ -158,24 +158,24 @@ class QuantizeUtil { Tensor::MappingGuard input_guard(&input); Tensor::MappingGuard output_guard(output); auto *input_data = input.data(); - auto *output_data = output->mutable_data(); + auto *output_data = output->mutable_data(); float scale; int32_t zero_point; Quantize(input_data, input.size(), false, output_data, &scale, &zero_point); - *min_out = scale * (std::numeric_limits::lowest() - zero_point); - *max_out = scale * (std::numeric_limits::max() - zero_point); + *min_out = scale * (std::numeric_limits::lowest() - zero_point); + *max_out = scale * (std::numeric_limits::max() - zero_point); } - void Dequantize(const T *input, + void Dequantize(const Q *input, const index_t size, const float scale, const int32_t zero_point, - float *output) { + F *output) { thread_pool_->Compute1D([=](index_t start, index_t end, index_t step) { for (index_t i = start; i < end; i += step) { - output[i] = scale * (input[i] - zero_point); + output[i] = FloatCast(scale * (input[i] - zero_point)); } }, 0, size, 1); } @@ -187,12 +187,12 @@ class QuantizeUtil { MACE_CHECK(input.size() != 0); Tensor::MappingGuard input_guard(&input); Tensor::MappingGuard output_guard(output); - auto *input_data = input.data(); - auto *output_data = output->mutable_data(); + auto *input_data = input.data(); + auto *output_data = output->mutable_data(); float scale; int32_t zero_point; - AdjustRange(min_in, max_in, false, &scale, &zero_point); + AdjustRange(min_in, max_in, false, &scale, &zero_point); Dequantize(input_data, input.size(), scale, zero_point, output_data); } @@ -204,7 +204,7 @@ class QuantizeUtil { #ifdef MACE_ENABLE_NEON template<> -void QuantizeUtil::QuantizeWithScaleAndZeropoint( +void QuantizeUtil::QuantizeWithScaleAndZeropoint( const float *input, const index_t size, float scale, @@ -212,18 +212,18 @@ void QuantizeUtil::QuantizeWithScaleAndZeropoint( uint8_t *output); template<> -void QuantizeUtil::Dequantize(const uint8_t *input, - const index_t size, - const float scale, - const int32_t zero_point, - float *output); +void QuantizeUtil::Dequantize(const uint8_t *input, + const index_t size, + const float scale, + const int32_t zero_point, + float *output); template<> -void QuantizeUtil::Dequantize(const int *input, - const index_t size, - const float scale, - const int32_t zero_point, - float *output); +void QuantizeUtil::Dequantize(const int *input, + const index_t size, + const float scale, + const int32_t zero_point, + float *output); #endif diff --git a/mace/core/runtime/apu/apu_wrapper.h b/mace/core/runtime/apu/apu_wrapper.h index cb361b9d..ea0fb012 100755 --- a/mace/core/runtime/apu/apu_wrapper.h +++ b/mace/core/runtime/apu/apu_wrapper.h @@ -55,7 +55,7 @@ struct tensor_info { ApuFrontend* frontend; std::vector input_infos; std::vector output_infos; - QuantizeUtil quantize_util_; + QuantizeUtil quantize_util_; }; } // namespace mace diff --git a/mace/core/runtime/hexagon/hexagon_hta_wrapper.h b/mace/core/runtime/hexagon/hexagon_hta_wrapper.h index 04e3f3e2..6b33514c 100644 --- a/mace/core/runtime/hexagon/hexagon_hta_wrapper.h +++ b/mace/core/runtime/hexagon/hexagon_hta_wrapper.h @@ -50,7 +50,7 @@ class HexagonHTAWrapper : public HexagonControlWrapper { void SetDebugLevel(int level) override; private: - QuantizeUtil quantize_util_; + QuantizeUtil quantize_util_; MACE_DISABLE_COPY_AND_ASSIGN(HexagonHTAWrapper); }; } // namespace mace diff --git a/mace/core/types.h b/mace/core/types.h index f2fbad30..aa1f9a89 100644 --- a/mace/core/types.h +++ b/mace/core/types.h @@ -66,6 +66,16 @@ enum FrameworkType { CAFFE = 1, }; +template +inline T FloatCast(float data) { + return data; +} + +template <> +inline half FloatCast(float data) { + return half_float::half_cast(data); +} + } // namespace mace #endif // MACE_CORE_TYPES_H_ diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc index a70fe3af..fa9a5891 100644 --- a/mace/core/workspace.cc +++ b/mace/core/workspace.cc @@ -46,6 +46,24 @@ bool HasHalfTensor(const NetDef &net_def) { return false; } +template +void DequantizeTensor(Device *device, + const unsigned char *model_data, + const ConstTensor &const_tensor, + Tensor *output_tensor) { + Tensor::MappingGuard guard(output_tensor); + auto quantized_data = reinterpret_cast( + model_data + const_tensor.offset()); + auto dequantized_data = output_tensor->mutable_data(); + QuantizeUtil + quantize_util(&device->cpu_runtime()->thread_pool()); + quantize_util.Dequantize(quantized_data, + output_tensor->size(), + const_tensor.scale(), + const_tensor.zero_point(), + dequantized_data); +} + } // namespace Workspace::Workspace() = default; @@ -125,10 +143,15 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, } DataType dst_data_type = const_tensor.data_type(); - if ((device_type == DeviceType::CPU && - const_tensor.data_type() == DataType::DT_HALF) || - (!is_quantize_model && const_tensor.quantized())) { + if (device_type == DeviceType::CPU && + const_tensor.data_type() == DataType::DT_HALF) { dst_data_type = DataType::DT_FLOAT; + } else if (!is_quantize_model && const_tensor.quantized()) { + if (device_type == GPU && net_def.data_type() != DataType::DT_FLOAT) { + dst_data_type = DataType::DT_HALF; + } else { + dst_data_type = DataType::DT_FLOAT; + } } std::unique_ptr tensor( @@ -159,17 +182,17 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, } } else if (!is_quantize_model && const_tensor.quantized()) { // uncompress the weights of uint8 - Tensor::MappingGuard guard(tensor.get()); - auto quantized_data = reinterpret_cast( - model_data + const_tensor.offset()); - auto dequantized_data = tensor->mutable_data(); - QuantizeUtil - quantize_util(&device->cpu_runtime()->thread_pool()); - quantize_util.Dequantize(quantized_data, - tensor->size(), - const_tensor.scale(), - const_tensor.zero_point(), - dequantized_data); + if (dst_data_type != DT_FLOAT) { + DequantizeTensor(device, + model_data, + const_tensor, + tensor.get()); + } else { + DequantizeTensor(device, + model_data, + const_tensor, + tensor.get()); + } } else { tensor->CopyBytes(model_data + const_tensor.offset(), const_tensor.data_size() * diff --git a/mace/ops/arm/q8/quantize.cc b/mace/ops/arm/q8/quantize.cc index 09354a45..9c80dcbc 100644 --- a/mace/ops/arm/q8/quantize.cc +++ b/mace/ops/arm/q8/quantize.cc @@ -72,7 +72,7 @@ class QuantizeOp : public Operation { private: bool non_zero_; bool find_range_every_time_; - QuantizeUtil quantize_util_; + QuantizeUtil quantize_util_; }; template @@ -103,7 +103,7 @@ class DequantizeOp : public Operation { } private: - QuantizeUtil quantize_util_; + QuantizeUtil quantize_util_; }; void RegisterQuantize(OpRegistryBase *op_registry) { diff --git a/mace/proto/mace.proto b/mace/proto/mace.proto index 369c814a..0d28a079 100644 --- a/mace/proto/mace.proto +++ b/mace/proto/mace.proto @@ -101,6 +101,7 @@ message NetDef { repeated OperatorDef op = 1; repeated Argument arg = 2; repeated ConstTensor tensors = 3; + optional DataType data_type = 4 [default = DT_FLOAT]; repeated InputOutputInfo input_info = 100; repeated InputOutputInfo output_info = 101; diff --git a/mace/python/tools/model_saver.py b/mace/python/tools/model_saver.py index 270ac8e4..4d5fd77a 100644 --- a/mace/python/tools/model_saver.py +++ b/mace/python/tools/model_saver.py @@ -281,6 +281,7 @@ def save_model(option, net_def, model_checksum, weight_checksum, template_dir, obfuscate_name(option, net_def) output_dir = output_dir + '/' + net_def.data_type = option.data_type # update tensor type update_tensor_infos(net_def, option.data_type) diff --git a/test/ccunit/mace/ops/conv_2d_test.cc b/test/ccunit/mace/ops/conv_2d_test.cc index 42929057..3f97d0d2 100644 --- a/test/ccunit/mace/ops/conv_2d_test.cc +++ b/test/ccunit/mace/ops/conv_2d_test.cc @@ -1172,7 +1172,8 @@ void TestQuant(const index_t batch, auto bias_data = bias->data(); float bias_scale = q_input->scale() * q_filter->scale(); std::vector q_bias(bias->size()); - QuantizeUtil quantize_util(OpTestContext::Get()->thread_pool()); + QuantizeUtil + quantize_util(OpTestContext::Get()->thread_pool()); quantize_util.QuantizeWithScaleAndZeropoint( bias_data, bias->size(), bias_scale, 0, q_bias.data()); net.AddInputFromArray( diff --git a/test/ccunit/mace/ops/depthwise_conv2d_test.cc b/test/ccunit/mace/ops/depthwise_conv2d_test.cc index d34722a5..a91d7961 100644 --- a/test/ccunit/mace/ops/depthwise_conv2d_test.cc +++ b/test/ccunit/mace/ops/depthwise_conv2d_test.cc @@ -440,7 +440,8 @@ void TestQuant(const index_t batch, auto bias_data = bias->data(); float bias_scale = q_input->scale() * q_filter->scale(); std::vector q_bias(bias->size()); - QuantizeUtil quantize_util(OpTestContext::Get()->thread_pool()); + QuantizeUtil + quantize_util(OpTestContext::Get()->thread_pool()); quantize_util.QuantizeWithScaleAndZeropoint( bias_data, bias->size(), bias_scale, 0, q_bias.data()); net.AddInputFromArray( diff --git a/test/ccunit/mace/ops/fully_connected_test.cc b/test/ccunit/mace/ops/fully_connected_test.cc index 586eb166..25dcbebb 100644 --- a/test/ccunit/mace/ops/fully_connected_test.cc +++ b/test/ccunit/mace/ops/fully_connected_test.cc @@ -267,7 +267,8 @@ void QuantRandom(const index_t batch, float bias_scale = q_input->scale() * q_weight->scale(); std::vector q_bias(bias->size()); - QuantizeUtil quantize_util(OpTestContext::Get()->thread_pool()); + QuantizeUtil + quantize_util(OpTestContext::Get()->thread_pool()); quantize_util.QuantizeWithScaleAndZeropoint( bias_data, bias->size(), bias_scale, 0, q_bias.data()); net.AddInputFromArray( -- GitLab