From 23934855be709381bd5bd224bc344da6883b384a Mon Sep 17 00:00:00 2001 From: Bin Li Date: Thu, 28 Nov 2019 12:26:11 +0800 Subject: [PATCH] Fix half storage --- docs/user_guide/advanced_usage.rst | 10 +++++++--- docs/user_guide/advanced_usage_cmake.rst | 10 +++++++--- tools/python/utils/config_parser.py | 2 +- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/docs/user_guide/advanced_usage.rst b/docs/user_guide/advanced_usage.rst index 091fcb98..0f9d7609 100644 --- a/docs/user_guide/advanced_usage.rst +++ b/docs/user_guide/advanced_usage.rst @@ -577,12 +577,16 @@ so MACE provides several ways to reduce the model size with no or little perform **1. Save model weights in half-precision floating point format** -The default data type of a regular model is float (32bit). To reduce the model weights size, +The data type of a regular model is float (32bit). To reduce the model weights size, half (16bit) can be used to reduce it by half with negligible accuracy degradation. +Therefore, the default storage type for a regular model in MACE is half. However, +if the model is very sensitive to accuracy, storage type can be changed to float. -For CPU, ``data_type`` can be specified as ``fp16_fp32`` in the deployment file to save the weights in half and actual inference in float. +In the deployment file, ``data_type`` is ``fp16_fp32`` by default and can be changed to ``fp32_fp32``. -For GPU, ``fp16_fp32`` is default. The ops in GPU take half as inputs and outputs while kernel execution in float. +For CPU, ``fp16_fp32`` means that the weights are saved in half and actual inference is in float. + +For GPU, ``fp16_fp32`` means that the ops in GPU take half as inputs and outputs while kernel execution in float. **2. Save model weights in quantized fixed point format** diff --git a/docs/user_guide/advanced_usage_cmake.rst b/docs/user_guide/advanced_usage_cmake.rst index 87d17fe4..7be5e2f2 100644 --- a/docs/user_guide/advanced_usage_cmake.rst +++ b/docs/user_guide/advanced_usage_cmake.rst @@ -406,12 +406,16 @@ so MACE provides several ways to reduce the model size with no or little perform **1. Save model weights in half-precision floating point format** -The default data type of a regular model is float (32bit). To reduce the model weights size, +The data type of a regular model is float (32bit). To reduce the model weights size, half (16bit) can be used to reduce it by half with negligible accuracy degradation. +Therefore, the default storage type for a regular model in MACE is half. However, +if the model is very sensitive to accuracy, storage type can be changed to float. -For CPU, ``data_type`` can be specified as ``fp16_fp32`` in the deployment file to save the weights in half and actual inference in float. +In the deployment file, ``data_type`` is ``fp16_fp32`` by default and can be changed to ``fp32_fp32``. -For GPU, ``fp16_fp32`` is default. The ops in GPU take half as inputs and outputs while kernel execution in float. +For CPU, ``fp16_fp32`` means that the weights are saved in half and actual inference is in float. + +For GPU, ``fp16_fp32`` means that the ops in GPU take half as inputs and outputs while kernel execution in float. **2. Save model weights in quantized fixed point format** diff --git a/tools/python/utils/config_parser.py b/tools/python/utils/config_parser.py index 9e5c9f6d..5a56fd3c 100644 --- a/tools/python/utils/config_parser.py +++ b/tools/python/utils/config_parser.py @@ -204,7 +204,7 @@ def normalize_model_config(conf): conf[ModelKeys.platform] = parse_platform(conf[ModelKeys.platform]) conf[ModelKeys.runtime] = parse_device_type(conf[ModelKeys.runtime]) - if ModelKeys.quantize in conf: + if ModelKeys.quantize in conf and conf[ModelKeys.quantize] == 1: conf[ModelKeys.data_type] = mace_pb2.DT_FLOAT else: if ModelKeys.data_type in conf: -- GitLab