提交 474645fb 编写于 作者: 李滨

Merge branch 'quantize' into 'master'

Add quantization docs

See merge request !845
......@@ -28,6 +28,7 @@ The main documentation is organized into the following sections:
user_guide/basic_usage
user_guide/advanced_usage
user_guide/op_lists
user_guide/quantization_usage
.. toctree::
:maxdepth: 1
......
......@@ -28,7 +28,7 @@ Here we use the mobilenet-v2 model as an example.
.. note::
It's highly recommanded to use a release version instead of master branch.
It's highly recommended to use a release version instead of master branch.
2. Pull `MACE Model Zoo <https://github.com/XiaoMi/mace-models>`__ project.
......
Quantization
===============
MACE supports two kinds of quantization mechanisms, i.e.,
* **Quantization-aware training (Recommend)**
After pre-training model using float point, insert simulated quantization operations into the model. Fine tune the new model.
Refer to `Tensorflow quantization-aware training <https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/quantize>`__.
* **Post training quantization**
After pre-training model using float point, estimate output range of each activation layer using sample inputs.
Quantization-aware training
----------------------------
It is recommended that developers fine tune the fixed-point model, as experiments show that by this way accuracy could be improved, especially for lightweight
models, e.g., MobileNet. The only thing you need to make it run using MACE is to add the following config to model yaml file:
1. `input_ranges`: the ranges of model's inputs, e.g., -1.0,1.0.
2. `quantize`: set `quantize` to be 1.
.. note::
You need set `runtime` to be `cpu` because we only support this quantization method to run on CPU for now (soon DSP will be supported).
Post training quantization
---------------------------
MACE supports post-training quantization if you want to take a chance to quantize model directly without fine tuning.
This method requires developer to calculate tensor range of each activation layer statistically using sample inputs.
MACE provides tools to do statistics with following steps:
1. Convert original model to run on CPU host without obfuscation (by setting `target_abis` to `host`, `runtime` to `cpu`, and `obfuscate` to `0`, appending `:0` to `output_tensors` if missing in yaml config).
E.g.,
.. code:: sh
python tools/converter.py convert --config ../mace-models/inception-v3/inception-v3.yml
2. Log tensor range of each activation layer by inferring several samples.
.. code:: sh
python tools/converter.py run --config ../mace-models/inception-v3/inception-v3.yml --example --quantize_stat --input_dir samples > range_log
3. Calculate overall range of each activation layer by specifying percentage cutoff.
.. code:: sh
python mace/python/tools/quantization/quantize_stat.py --log_file range_log --percentile 5 > overall_range
4. Convert quantized model (by setting `quantize` to `1` and `quantize_range_file` to the overall_range file path in yaml config).
.. note::
`quantize_weights` and `quantize_nodes` should not be specified when using `TransformGraph` tool if using MACE quantization.
......@@ -133,7 +133,7 @@ MaceStatus SerialNet::Run(RunMetadata *run_metadata) {
VLOG(3) << "Operator " << op->debug_def().name()
<< " has shape: " << MakeString(op->Output(0)->shape());
if (EnvEnabled("MACE_LOG_TENSOR_RANGE") && device_type == CPU) {
if (EnvEnabled("MACE_LOG_TENSOR_RANGE")) {
for (int i = 0; i < op->OutputSize(); ++i) {
if (op->debug_def().quantize_info_size() == 0) {
int data_type = op->GetOptionalArg("T", static_cast<int>(DT_FLOAT));
......
......@@ -305,7 +305,7 @@ bool RunModel(const std::vector<std::string> &input_names,
out_file.flush();
out_file.close();
} else {
std::cerr << "Open output file failed";
std::cerr << "Open output file failed" << std::endl;
return -1;
}
}
......@@ -315,7 +315,8 @@ bool RunModel(const std::vector<std::string> &input_names,
closedir(dir_parent);
} else {
std::cerr << "Directory " << FLAGS_input_dir << " does not exist.";
std::cerr << "Directory " << FLAGS_input_dir << " does not exist."
<< std::endl;
}
} else {
for (size_t i = 0; i < input_count; ++i) {
......@@ -346,7 +347,7 @@ bool RunModel(const std::vector<std::string> &input_names,
out_file.flush();
out_file.close();
} else {
std::cerr << "Open output file failed";
std::cerr << "Open output file failed" << std::endl;
return -1;
}
}
......
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/core/operator.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h"
namespace mace {
namespace ops {
namespace test {
namespace {
template <DeviceType D, typename T>
void Quantize(int iters, int count) {
mace::testing::StopTiming();
OpsTestNet net;
// Add input data
net.AddRandomInput<D, float>("Input", {count});
OpDefBuilder("Quantize", "QuantizeBM")
.Input("Input")
.Output("Output")
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
// Warm-up
for (int i = 0; i < 2; ++i) {
net.RunOp(D);
}
net.Sync();
mace::testing::StartTiming();
while (iters--) {
net.RunOp(D);
}
net.Sync();
}
template <DeviceType D, typename T>
void Dequantize(int iters, int count) {
mace::testing::StopTiming();
OpsTestNet net;
// Add input data
net.AddRandomInput<D, T>("Input", {count});
OpDefBuilder("Dequantize", "DequantizeBM")
.Input("Input")
.Output("Output")
.OutputType({DT_FLOAT})
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
// Warm-up
for (int i = 0; i < 2; ++i) {
net.RunOp(D);
}
net.Sync();
mace::testing::StartTiming();
while (iters--) {
net.RunOp(D);
}
net.Sync();
}
} // namespace
#define MACE_BM_QUANTIZE_MACRO(N, TYPE, DEVICE) \
static void \
MACE_BM_QUANTIZE_##N##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N; \
mace::testing::MaccProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
Quantize<DEVICE, TYPE>(iters, N); \
} \
MACE_BENCHMARK( \
MACE_BM_QUANTIZE_##N##_##TYPE##_##DEVICE)
#define MACE_BM_QUANTIZE(N) \
MACE_BM_QUANTIZE_MACRO(N, uint8_t, CPU);
#define MACE_BM_DEQUANTIZE_MACRO(N, TYPE, DEVICE) \
static void \
MACE_BM_DEQUANTIZE_##N##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N; \
mace::testing::MaccProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
Dequantize<DEVICE, TYPE>(iters, N); \
} \
MACE_BENCHMARK( \
MACE_BM_DEQUANTIZE_##N##_##TYPE##_##DEVICE)
#define MACE_BM_DEQUANTIZE(N) \
MACE_BM_DEQUANTIZE_MACRO(N, uint8_t, CPU);
MACE_BM_QUANTIZE(256);
MACE_BM_QUANTIZE(1470000);
MACE_BM_DEQUANTIZE(256);
MACE_BM_DEQUANTIZE(1470000);
} // namespace test
} // namespace ops
} // namespace mace
......@@ -721,6 +721,8 @@ def tuning_run(abi,
(model_tag, running_round, restart_round, str(tuning),
str(out_of_range_check), omp_num_threads, cpu_affinity_policy,
gpu_perf_hint, gpu_priority_hint))
sys.stdout.flush()
mace_model_path = ""
if model_graph_format == ModelFormat.file:
mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag)
......@@ -880,6 +882,7 @@ def tuning_run(abi,
six.print_("Running finished!\n")
sys.stdout.flush()
return stdout
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册