提交 7023a86c 编写于 作者: M Michał Gallus 提交者: Tao Luo

Fix Pooling output scale (#18186)

* Int8: Fix Pooling output scale

test=develop

* Update scales quantization for certain operators

These include: concat, transpose, pool and reshape. test=develop

* Move concat minimum scale finding to quantizer

test=develop
上级 4bc2987d
...@@ -77,7 +77,9 @@ void CPUQuantizePass::QuantizeInputs(Graph* g, Node* op, std::string input_name, ...@@ -77,7 +77,9 @@ void CPUQuantizePass::QuantizeInputs(Graph* g, Node* op, std::string input_name,
VarQuantScale* scales, bool are_unsigned, VarQuantScale* scales, bool are_unsigned,
std::string scale_attr_name) const { std::string scale_attr_name) const {
auto inputs = op->inputs; auto inputs = op->inputs;
auto output = op->outputs[0];
PADDLE_ENFORCE_GE(inputs.size(), 1); PADDLE_ENFORCE_GE(inputs.size(), 1);
PADDLE_ENFORCE_EQ(op->outputs.size(), 1);
// create a quantize op desc prototype // create a quantize op desc prototype
OpDesc q_desc; OpDesc q_desc;
...@@ -86,13 +88,9 @@ void CPUQuantizePass::QuantizeInputs(Graph* g, Node* op, std::string input_name, ...@@ -86,13 +88,9 @@ void CPUQuantizePass::QuantizeInputs(Graph* g, Node* op, std::string input_name,
std::vector<Node*> quantize_out_nodes(inputs.size()); std::vector<Node*> quantize_out_nodes(inputs.size());
std::vector<std::string> quantize_out_node_names(inputs.size()); std::vector<std::string> quantize_out_node_names(inputs.size());
double scale_min = std::numeric_limits<double>::max(); double scale_out = (*scales)[output->Name()].second.data<double>()[0];
for (const auto& input : inputs) {
double scale = (*scales)[input->Name()].second.data<double>()[0];
if (scale < scale_min) scale_min = scale;
}
unsigned max = are_unsigned ? U8_MAX : S8_MAX; unsigned max = are_unsigned ? U8_MAX : S8_MAX;
float scale = scale_min * max; float scale = scale_out * max;
for (size_t i = 0; i < inputs.size(); i++) { for (size_t i = 0; i < inputs.size(); i++) {
// Create quantize output variable // Create quantize output variable
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "paddle/fluid/inference/api/mkldnn_quantizer.h" #include "paddle/fluid/inference/api/mkldnn_quantizer.h"
#include <algorithm> #include <algorithm>
#include <limits>
#include <map> #include <map>
#include <numeric> #include <numeric>
#include <unordered_map> #include <unordered_map>
...@@ -37,6 +38,7 @@ using framework::ir::Graph; ...@@ -37,6 +38,7 @@ using framework::ir::Graph;
using ConstEigenVectorArrayMap = using ConstEigenVectorArrayMap =
Eigen::Map<const Eigen::Array<float, Eigen::Dynamic, 1>>; Eigen::Map<const Eigen::Array<float, Eigen::Dynamic, 1>>;
using string::PrettyLogH1; using string::PrettyLogH1;
static LoDTensor CreateScaleTensor(int64_t channels_num = 1);
bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() { bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
PrettyLogH1("--- Calculating scales for quantization"); PrettyLogH1("--- Calculating scales for quantization");
...@@ -52,7 +54,7 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() { ...@@ -52,7 +54,7 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
for (auto const& conn : connections) { for (auto const& conn : connections) {
for (const auto& var_name : conn.second) { for (const auto& var_name : conn.second) {
// skip if scale already computed // skip if scale already computed
if (scales_.find(var_name) != scales_.end()) return; if (scales_.find(var_name) != scales_.end()) continue;
auto* var = predictor_.sub_scope_->FindVar(var_name); auto* var = predictor_.sub_scope_->FindVar(var_name);
PADDLE_ENFORCE(var, "%s is not in the scope", var_name); PADDLE_ENFORCE(var, "%s is not in the scope", var_name);
...@@ -62,29 +64,49 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() { ...@@ -62,29 +64,49 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
// force unsigned type if already know it // force unsigned type if already know it
bool is_unsigned = false; bool is_unsigned = false;
if (is_output && op->Type() == "conv2d") { bool compute_scale = true;
if (is_output) {
if (op->Type() == "conv2d") {
// output of conv2d with relu must be unsigned // output of conv2d with relu must be unsigned
is_unsigned = (op->HasAttr("fuse_relu") && is_unsigned = (op->HasAttr("fuse_relu") &&
boost::get<bool>(op->GetAttr("fuse_relu"))) || boost::get<bool>(op->GetAttr("fuse_relu"))) ||
(op->HasAttr("fuse_brelu") && (op->HasAttr("fuse_brelu") &&
boost::get<bool>(op->GetAttr("fuse_brelu"))); boost::get<bool>(op->GetAttr("fuse_brelu")));
} else if (is_output && op->Type() == "relu") { } else if (op->Type() == "relu") {
is_unsigned = true; is_unsigned = true;
} else if (is_output && } else if (op->Type() == "transpose2" ||
(op->Type() == "pool2d" || op->Type() == "transpose2" || op->Type() == "reshape2" || op->Type() == "pool2d") {
op->Type() == "reshape2" || op->Type() == "concat")) { auto input_var_name = op->Input("X")[0];
PADDLE_ENFORCE(scales_.find(input_var_name) != scales_.end(),
"Input scales must be calculated before the "
"output scales to infer if output is unsigned.");
if (scales_.find(input_var_name) != scales_.end()) {
scales_[var_name] = scales_[input_var_name];
}
compute_scale = false;
} else if (op->Type() == "concat") {
// output of ops with unsigned input must be unsigned // output of ops with unsigned input must be unsigned
is_unsigned = true; is_unsigned = true;
double min_scale = std::numeric_limits<double>::max();
for (auto input_var_name : op->Input("X")) { for (auto input_var_name : op->Input("X")) {
PADDLE_ENFORCE(scales_.find(input_var_name) != scales_.end(), PADDLE_ENFORCE(
scales_.find(input_var_name) != scales_.end(),
"Input scales must be calculated before the " "Input scales must be calculated before the "
"output scales to infer if output is unsigned."); "output scales to infer if output is unsigned.");
is_unsigned = is_unsigned && scales_[input_var_name].first; is_unsigned = is_unsigned && scales_[input_var_name].first;
min_scale = std::min(
min_scale,
scales_[input_var_name].second.data<double>()[0]);
} }
auto scale_tensor = CreateScaleTensor();
scale_tensor.data<double>()[0] = min_scale;
scales_[var_name] = {is_unsigned, scale_tensor};
compute_scale = false;
} }
}
CalculateSingleScale(op->Type(), conn.first, var_name, *var_tensor, if (compute_scale)
is_unsigned); CalculateSingleScale(op->Type(), conn.first, var_name,
*var_tensor, is_unsigned);
} }
} }
}; };
...@@ -127,6 +149,13 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateSingleScale( ...@@ -127,6 +149,13 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateSingleScale(
} }
} }
static LoDTensor CreateScaleTensor(int64_t channels_num) {
LoDTensor scale_tensor;
scale_tensor.Resize({channels_num});
scale_tensor.mutable_data<double>(CPUPlace());
return scale_tensor;
}
std::vector<int> AnalysisPredictor::MkldnnQuantizer::ExpandQuantizedBins( std::vector<int> AnalysisPredictor::MkldnnQuantizer::ExpandQuantizedBins(
std::vector<int> quantized_bins, std::vector<int> reference_bins) const { std::vector<int> quantized_bins, std::vector<int> reference_bins) const {
std::vector<int> expanded_quantized_bins(reference_bins.size(), 0); std::vector<int> expanded_quantized_bins(reference_bins.size(), 0);
...@@ -263,11 +292,8 @@ AnalysisPredictor::MkldnnQuantizer::GetKLScalingFactor( ...@@ -263,11 +292,8 @@ AnalysisPredictor::MkldnnQuantizer::GetKLScalingFactor(
min_kl_index = starting_iter; min_kl_index = starting_iter;
} }
LoDTensor scale_tensor; LoDTensor scale_tensor = CreateScaleTensor();
scale_tensor.Resize({1}); scale_tensor.data<double>()[0] = 1.0 / ((min_kl_index + 0.5) * bin_width);
auto* scale_ptr = scale_tensor.mutable_data<double>(CPUPlace());
scale_ptr[0] = 1.0 / ((min_kl_index + 0.5) * bin_width);
return std::make_pair(is_unsigned, scale_tensor); return std::make_pair(is_unsigned, scale_tensor);
} }
...@@ -285,10 +311,8 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor( ...@@ -285,10 +311,8 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor(
"Tensor is claimed to be unsigned, but its min value (%f) is < 0.0", "Tensor is claimed to be unsigned, but its min value (%f) is < 0.0",
min_val); min_val);
LoDTensor scale_tensor; LoDTensor scale_tensor = CreateScaleTensor();
scale_tensor.Resize({1}); scale_tensor.data<double>()[0] = 1.0 / max_abs;
auto* scale_ptr = scale_tensor.mutable_data<double>(CPUPlace());
scale_ptr[0] = 1.0 / max_abs;
return std::make_pair(is_unsigned, scale_tensor); return std::make_pair(is_unsigned, scale_tensor);
} }
...@@ -308,8 +332,7 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxChScalingFactor( ...@@ -308,8 +332,7 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxChScalingFactor(
min_val); min_val);
int channels = var_tensor.dims()[0]; int channels = var_tensor.dims()[0];
LoDTensor scale_tensor; LoDTensor scale_tensor = CreateScaleTensor(channels);
scale_tensor.Resize({channels});
auto* scale_ptr = scale_tensor.mutable_data<double>(CPUPlace()); auto* scale_ptr = scale_tensor.mutable_data<double>(CPUPlace());
for (int i = 0; i < channels; ++i) { for (int i = 0; i < channels; ++i) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册