提交 7023a86c 编写于 作者: M Michał Gallus 提交者: Tao Luo

Fix Pooling output scale (#18186)

* Int8: Fix Pooling output scale

test=develop

* Update scales quantization for certain operators

These include: concat, transpose, pool and reshape. test=develop

* Move concat minimum scale finding to quantizer

test=develop
上级 4bc2987d
......@@ -77,7 +77,9 @@ void CPUQuantizePass::QuantizeInputs(Graph* g, Node* op, std::string input_name,
VarQuantScale* scales, bool are_unsigned,
std::string scale_attr_name) const {
auto inputs = op->inputs;
auto output = op->outputs[0];
PADDLE_ENFORCE_GE(inputs.size(), 1);
PADDLE_ENFORCE_EQ(op->outputs.size(), 1);
// create a quantize op desc prototype
OpDesc q_desc;
......@@ -86,13 +88,9 @@ void CPUQuantizePass::QuantizeInputs(Graph* g, Node* op, std::string input_name,
std::vector<Node*> quantize_out_nodes(inputs.size());
std::vector<std::string> quantize_out_node_names(inputs.size());
double scale_min = std::numeric_limits<double>::max();
for (const auto& input : inputs) {
double scale = (*scales)[input->Name()].second.data<double>()[0];
if (scale < scale_min) scale_min = scale;
}
double scale_out = (*scales)[output->Name()].second.data<double>()[0];
unsigned max = are_unsigned ? U8_MAX : S8_MAX;
float scale = scale_min * max;
float scale = scale_out * max;
for (size_t i = 0; i < inputs.size(); i++) {
// Create quantize output variable
......
......@@ -14,6 +14,7 @@
#include "paddle/fluid/inference/api/mkldnn_quantizer.h"
#include <algorithm>
#include <limits>
#include <map>
#include <numeric>
#include <unordered_map>
......@@ -37,6 +38,7 @@ using framework::ir::Graph;
using ConstEigenVectorArrayMap =
Eigen::Map<const Eigen::Array<float, Eigen::Dynamic, 1>>;
using string::PrettyLogH1;
static LoDTensor CreateScaleTensor(int64_t channels_num = 1);
bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
PrettyLogH1("--- Calculating scales for quantization");
......@@ -52,7 +54,7 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
for (auto const& conn : connections) {
for (const auto& var_name : conn.second) {
// skip if scale already computed
if (scales_.find(var_name) != scales_.end()) return;
if (scales_.find(var_name) != scales_.end()) continue;
auto* var = predictor_.sub_scope_->FindVar(var_name);
PADDLE_ENFORCE(var, "%s is not in the scope", var_name);
......@@ -62,29 +64,49 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
// force unsigned type if already know it
bool is_unsigned = false;
if (is_output && op->Type() == "conv2d") {
// output of conv2d with relu must be unsigned
is_unsigned = (op->HasAttr("fuse_relu") &&
boost::get<bool>(op->GetAttr("fuse_relu"))) ||
(op->HasAttr("fuse_brelu") &&
boost::get<bool>(op->GetAttr("fuse_brelu")));
} else if (is_output && op->Type() == "relu") {
is_unsigned = true;
} else if (is_output &&
(op->Type() == "pool2d" || op->Type() == "transpose2" ||
op->Type() == "reshape2" || op->Type() == "concat")) {
// output of ops with unsigned input must be unsigned
is_unsigned = true;
for (auto input_var_name : op->Input("X")) {
bool compute_scale = true;
if (is_output) {
if (op->Type() == "conv2d") {
// output of conv2d with relu must be unsigned
is_unsigned = (op->HasAttr("fuse_relu") &&
boost::get<bool>(op->GetAttr("fuse_relu"))) ||
(op->HasAttr("fuse_brelu") &&
boost::get<bool>(op->GetAttr("fuse_brelu")));
} else if (op->Type() == "relu") {
is_unsigned = true;
} else if (op->Type() == "transpose2" ||
op->Type() == "reshape2" || op->Type() == "pool2d") {
auto input_var_name = op->Input("X")[0];
PADDLE_ENFORCE(scales_.find(input_var_name) != scales_.end(),
"Input scales must be calculated before the "
"output scales to infer if output is unsigned.");
is_unsigned = is_unsigned && scales_[input_var_name].first;
if (scales_.find(input_var_name) != scales_.end()) {
scales_[var_name] = scales_[input_var_name];
}
compute_scale = false;
} else if (op->Type() == "concat") {
// output of ops with unsigned input must be unsigned
is_unsigned = true;
double min_scale = std::numeric_limits<double>::max();
for (auto input_var_name : op->Input("X")) {
PADDLE_ENFORCE(
scales_.find(input_var_name) != scales_.end(),
"Input scales must be calculated before the "
"output scales to infer if output is unsigned.");
is_unsigned = is_unsigned && scales_[input_var_name].first;
min_scale = std::min(
min_scale,
scales_[input_var_name].second.data<double>()[0]);
}
auto scale_tensor = CreateScaleTensor();
scale_tensor.data<double>()[0] = min_scale;
scales_[var_name] = {is_unsigned, scale_tensor};
compute_scale = false;
}
}
CalculateSingleScale(op->Type(), conn.first, var_name, *var_tensor,
is_unsigned);
if (compute_scale)
CalculateSingleScale(op->Type(), conn.first, var_name,
*var_tensor, is_unsigned);
}
}
};
......@@ -127,6 +149,13 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateSingleScale(
}
}
static LoDTensor CreateScaleTensor(int64_t channels_num) {
LoDTensor scale_tensor;
scale_tensor.Resize({channels_num});
scale_tensor.mutable_data<double>(CPUPlace());
return scale_tensor;
}
std::vector<int> AnalysisPredictor::MkldnnQuantizer::ExpandQuantizedBins(
std::vector<int> quantized_bins, std::vector<int> reference_bins) const {
std::vector<int> expanded_quantized_bins(reference_bins.size(), 0);
......@@ -263,11 +292,8 @@ AnalysisPredictor::MkldnnQuantizer::GetKLScalingFactor(
min_kl_index = starting_iter;
}
LoDTensor scale_tensor;
scale_tensor.Resize({1});
auto* scale_ptr = scale_tensor.mutable_data<double>(CPUPlace());
scale_ptr[0] = 1.0 / ((min_kl_index + 0.5) * bin_width);
LoDTensor scale_tensor = CreateScaleTensor();
scale_tensor.data<double>()[0] = 1.0 / ((min_kl_index + 0.5) * bin_width);
return std::make_pair(is_unsigned, scale_tensor);
}
......@@ -285,10 +311,8 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor(
"Tensor is claimed to be unsigned, but its min value (%f) is < 0.0",
min_val);
LoDTensor scale_tensor;
scale_tensor.Resize({1});
auto* scale_ptr = scale_tensor.mutable_data<double>(CPUPlace());
scale_ptr[0] = 1.0 / max_abs;
LoDTensor scale_tensor = CreateScaleTensor();
scale_tensor.data<double>()[0] = 1.0 / max_abs;
return std::make_pair(is_unsigned, scale_tensor);
}
......@@ -308,8 +332,7 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxChScalingFactor(
min_val);
int channels = var_tensor.dims()[0];
LoDTensor scale_tensor;
scale_tensor.Resize({channels});
LoDTensor scale_tensor = CreateScaleTensor(channels);
auto* scale_ptr = scale_tensor.mutable_data<double>(CPUPlace());
for (int i = 0; i < channels; ++i) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册