diff --git a/docs/index.rst b/docs/index.rst index 1441180820bb48b8c52f0f3d33e0f5e3eec71563..f839a13f7cf8d04c39d63280306ee3fb8dff513b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -28,6 +28,7 @@ The main documentation is organized into the following sections: user_guide/basic_usage user_guide/advanced_usage user_guide/op_lists + user_guide/quantization_usage .. toctree:: :maxdepth: 1 diff --git a/docs/user_guide/basic_usage.rst b/docs/user_guide/basic_usage.rst index 131837ebb660e4f027fd2dd5f7ebd1c7687028c6..eb067c49a5b42fc2f33d8a614da6f067eb5452ff 100644 --- a/docs/user_guide/basic_usage.rst +++ b/docs/user_guide/basic_usage.rst @@ -28,7 +28,7 @@ Here we use the mobilenet-v2 model as an example. .. note:: - It's highly recommanded to use a release version instead of master branch. + It's highly recommended to use a release version instead of master branch. 2. Pull `MACE Model Zoo `__ project. diff --git a/docs/user_guide/quantization_usage.rst b/docs/user_guide/quantization_usage.rst new file mode 100644 index 0000000000000000000000000000000000000000..11f8bee2205537ed515c3329532a2974d3ff3621 --- /dev/null +++ b/docs/user_guide/quantization_usage.rst @@ -0,0 +1,62 @@ +Quantization +=============== + +MACE supports two kinds of quantization mechanisms, i.e., + +* **Quantization-aware training (Recommend)** + +After pre-training model using float point, insert simulated quantization operations into the model. Fine tune the new model. +Refer to `Tensorflow quantization-aware training `__. + +* **Post training quantization** + +After pre-training model using float point, estimate output range of each activation layer using sample inputs. + + +Quantization-aware training +---------------------------- +It is recommended that developers fine tune the fixed-point model, as experiments show that by this way accuracy could be improved, especially for lightweight +models, e.g., MobileNet. The only thing you need to make it run using MACE is to add the following config to model yaml file: + + 1. `input_ranges`: the ranges of model's inputs, e.g., -1.0,1.0. + + 2. `quantize`: set `quantize` to be 1. + + .. note:: + You need set `runtime` to be `cpu` because we only support this quantization method to run on CPU for now (soon DSP will be supported). + + +Post training quantization +--------------------------- +MACE supports post-training quantization if you want to take a chance to quantize model directly without fine tuning. +This method requires developer to calculate tensor range of each activation layer statistically using sample inputs. +MACE provides tools to do statistics with following steps: + + 1. Convert original model to run on CPU host without obfuscation (by setting `target_abis` to `host`, `runtime` to `cpu`, and `obfuscate` to `0`, appending `:0` to `output_tensors` if missing in yaml config). + E.g., + + .. code:: sh + + python tools/converter.py convert --config ../mace-models/inception-v3/inception-v3.yml + + + 2. Log tensor range of each activation layer by inferring several samples. + + .. code:: sh + + python tools/converter.py run --config ../mace-models/inception-v3/inception-v3.yml --example --quantize_stat --input_dir samples > range_log + + + 3. Calculate overall range of each activation layer by specifying percentage cutoff. + + .. code:: sh + + python mace/python/tools/quantization/quantize_stat.py --log_file range_log --percentile 5 > overall_range + + + 4. Convert quantized model (by setting `quantize` to `1` and `quantize_range_file` to the overall_range file path in yaml config). + + +.. note:: + + `quantize_weights` and `quantize_nodes` should not be specified when using `TransformGraph` tool if using MACE quantization. diff --git a/mace/core/net.cc b/mace/core/net.cc index 2766de115cfd8d745633f0ee182044010552fde6..d71a14826a5ae8b907a56526ba79c6bce245e12d 100644 --- a/mace/core/net.cc +++ b/mace/core/net.cc @@ -133,7 +133,7 @@ MaceStatus SerialNet::Run(RunMetadata *run_metadata) { VLOG(3) << "Operator " << op->debug_def().name() << " has shape: " << MakeString(op->Output(0)->shape()); - if (EnvEnabled("MACE_LOG_TENSOR_RANGE") && device_type == CPU) { + if (EnvEnabled("MACE_LOG_TENSOR_RANGE")) { for (int i = 0; i < op->OutputSize(); ++i) { if (op->debug_def().quantize_info_size() == 0) { int data_type = op->GetOptionalArg("T", static_cast(DT_FLOAT)); diff --git a/mace/examples/cli/example.cc b/mace/examples/cli/example.cc index 6679c17e3f876eff0db7604a5efded5fc3d424df..204be499ebd0c1500072da106f39800e2fca1384 100644 --- a/mace/examples/cli/example.cc +++ b/mace/examples/cli/example.cc @@ -305,7 +305,7 @@ bool RunModel(const std::vector &input_names, out_file.flush(); out_file.close(); } else { - std::cerr << "Open output file failed"; + std::cerr << "Open output file failed" << std::endl; return -1; } } @@ -315,7 +315,8 @@ bool RunModel(const std::vector &input_names, closedir(dir_parent); } else { - std::cerr << "Directory " << FLAGS_input_dir << " does not exist."; + std::cerr << "Directory " << FLAGS_input_dir << " does not exist." + << std::endl; } } else { for (size_t i = 0; i < input_count; ++i) { @@ -346,7 +347,7 @@ bool RunModel(const std::vector &input_names, out_file.flush(); out_file.close(); } else { - std::cerr << "Open output file failed"; + std::cerr << "Open output file failed" << std::endl; return -1; } } diff --git a/mace/ops/quantize_benchmark.cc b/mace/ops/quantize_benchmark.cc new file mode 100644 index 0000000000000000000000000000000000000000..62a534b721894360b922270fe03833be60ad582a --- /dev/null +++ b/mace/ops/quantize_benchmark.cc @@ -0,0 +1,117 @@ +// Copyright 2018 Xiaomi, Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/core/operator.h" +#include "mace/core/testing/test_benchmark.h" +#include "mace/ops/ops_test_util.h" + +namespace mace { +namespace ops { +namespace test { + +namespace { +template +void Quantize(int iters, int count) { + mace::testing::StopTiming(); + + OpsTestNet net; + // Add input data + net.AddRandomInput("Input", {count}); + + OpDefBuilder("Quantize", "QuantizeBM") + .Input("Input") + .Output("Output") + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); + + // Warm-up + for (int i = 0; i < 2; ++i) { + net.RunOp(D); + } + net.Sync(); + + mace::testing::StartTiming(); + while (iters--) { + net.RunOp(D); + } + net.Sync(); +} + +template +void Dequantize(int iters, int count) { + mace::testing::StopTiming(); + + OpsTestNet net; + // Add input data + net.AddRandomInput("Input", {count}); + + OpDefBuilder("Dequantize", "DequantizeBM") + .Input("Input") + .Output("Output") + .OutputType({DT_FLOAT}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); + + // Warm-up + for (int i = 0; i < 2; ++i) { + net.RunOp(D); + } + net.Sync(); + + mace::testing::StartTiming(); + while (iters--) { + net.RunOp(D); + } + net.Sync(); +} +} // namespace + +#define MACE_BM_QUANTIZE_MACRO(N, TYPE, DEVICE) \ + static void \ + MACE_BM_QUANTIZE_##N##_##TYPE##_##DEVICE( \ + int iters) { \ + const int64_t tot = static_cast(iters) * N; \ + mace::testing::MaccProcessed(tot); \ + mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + Quantize(iters, N); \ + } \ + MACE_BENCHMARK( \ + MACE_BM_QUANTIZE_##N##_##TYPE##_##DEVICE) + +#define MACE_BM_QUANTIZE(N) \ + MACE_BM_QUANTIZE_MACRO(N, uint8_t, CPU); + +#define MACE_BM_DEQUANTIZE_MACRO(N, TYPE, DEVICE) \ + static void \ + MACE_BM_DEQUANTIZE_##N##_##TYPE##_##DEVICE( \ + int iters) { \ + const int64_t tot = static_cast(iters) * N; \ + mace::testing::MaccProcessed(tot); \ + mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + Dequantize(iters, N); \ + } \ + MACE_BENCHMARK( \ + MACE_BM_DEQUANTIZE_##N##_##TYPE##_##DEVICE) + +#define MACE_BM_DEQUANTIZE(N) \ + MACE_BM_DEQUANTIZE_MACRO(N, uint8_t, CPU); + +MACE_BM_QUANTIZE(256); +MACE_BM_QUANTIZE(1470000); +MACE_BM_DEQUANTIZE(256); +MACE_BM_DEQUANTIZE(1470000); + +} // namespace test +} // namespace ops +} // namespace mace diff --git a/tools/sh_commands.py b/tools/sh_commands.py index b920d43fbe85fc85bdd23831173e31e1e1e282b1..367da8f9d551717dd508a4a0ab0d26f8ec98eeb9 100644 --- a/tools/sh_commands.py +++ b/tools/sh_commands.py @@ -721,6 +721,8 @@ def tuning_run(abi, (model_tag, running_round, restart_round, str(tuning), str(out_of_range_check), omp_num_threads, cpu_affinity_policy, gpu_perf_hint, gpu_priority_hint)) + sys.stdout.flush() + mace_model_path = "" if model_graph_format == ModelFormat.file: mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag) @@ -880,6 +882,7 @@ def tuning_run(abi, six.print_("Running finished!\n") + sys.stdout.flush() return stdout