提交 806832e0 编写于 作者: Z Zhen Wang

update the input format of channel wise dequantize op.

上级 89dee160
......@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/operators/fake_dequantize_op.h"
#include <string>
#include <vector>
namespace paddle {
namespace operators {
......@@ -84,8 +85,8 @@ class FakeChannelWiseDequantizeMaxAbsOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(
ctx->HasInput("X"),
"Input(X) of FakeChannelWiseDequantizeMaxAbsOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("WeightScales"),
"Input(WeightScales) of FakeChannelWiseDequantizeMaxAbsOp "
PADDLE_ENFORCE(ctx->HasInputs("Scales"),
"Input(Scales) of FakeChannelWiseDequantizeMaxAbsOp "
"should not be null.");
PADDLE_ENFORCE(
ctx->HasOutput("Out"),
......@@ -103,39 +104,32 @@ class FakeChannelWiseDequantizeMaxAbsOpMaker
AddInput("X",
"(Tensor) The input with float-32/64 type is the "
"low precision tensor.");
AddInput("ActivationScale",
"(float) The activation scale in quantization stage.")
.AsDispensable();
AddInput("WeightScales",
"(float array) The weight scales in quantization stage.");
AddInput("Scales",
"(Tensors) The scales in quantization stage. "
"Now, `Scales` is a vector with at most two tensors. "
"If Scales has two elements, the second tensor should only have "
"one value.")
.AsDuplicable();
AddOutput("Out",
"(Tensor) The output is the dequantized high "
"precision tensor.");
AddAttr<int>("activation_bits", "Quantization bit number for activation.")
.SetDefault(8)
.AddCustomChecker([](const int& bit_length) {
PADDLE_ENFORCE(bit_length >= 1 && bit_length <= 16,
"'activation_bits' should be between 1 and 16.");
});
AddAttr<int>("weight_bits", "Quantization bit number for weights.")
.SetDefault(8)
.AddCustomChecker([](const int& bit_length) {
PADDLE_ENFORCE(bit_length >= 1 && bit_length <= 16,
"'weight_bits' should be between 1 and 16.");
});
AddAttr<std::vector<int>>(
"quant_bits",
"Quantization bit numbers in quantization stage. "
"The size of `quant_bits` should be equal to the size of `Scales`.")
.SetDefault({8});
AddComment(R"DOC(
FakeChannelWiseDequantizeMaxAbsOp operator.
This calculation is an opposite operation of FakeChannelWiseQuantizeMaxAbsOp:
$$Out_c = \frac{ActivationScale*WeightScale_c*X_c}{(2^{weight\_bits-1}-1)*(2^{activation\_bits-1}-1)}$$
$$Out_c = \frac{X_c\prod_{i=1}^{n}Scales_{ic}}{\prod_{i=1}^{n}(2^{quant\_bits_i-1}-1)}$$
In the above formula, the range value of c is as follow:
$$0 \leq c \lt \ the\ channel\ number\ of\ X$$
In the above formula, the range value of $c$ can be represented as $0 \leq c \lt \ the\ channel\ number\ of\ X$.
Besides, the size of $quant\_bits$ should be equal to the size of $Scales$, and it is called $n$ in the formula.
Notes: Tha per-channel quantization is only applied to weights(channel size scale).
And the activations use per-layer quantization(only one scale).
Notes: In general, the per-channel quantization is only applied to weights and the activations use per-layer quantization.
)DOC");
}
};
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
......@@ -50,47 +51,40 @@ class FakeChannelWiseDequantizeMaxAbsKernel : public framework::OpKernel<T> {
public:
virtual void Compute(const framework::ExecutionContext& ctx) const {
auto* in = ctx.Input<framework::Tensor>("X");
auto* weight_scales = ctx.Input<framework::Tensor>("WeightScales");
auto scales = ctx.MultiInput<framework::Tensor>("Scales");
auto* out = ctx.Output<framework::Tensor>("Out");
PADDLE_ENFORCE_EQ(weight_scales->numel(), in->dims()[0],
"The weight uses the per-channel quantization type, so "
"the number of weight scale values must be the same with "
PADDLE_ENFORCE_EQ(scales[0]->numel(), in->dims()[0],
"The number of first scale values must be the same with "
"first dimension value of Input(X).");
int ativation_bits = ctx.Attr<int>("activation_bits");
int weight_bits = ctx.Attr<int>("weight_bits");
int range = std::pow(2, weight_bits - 1) - 1;
auto quant_bits = ctx.Attr<std::vector<int>>("quant_bits");
int max_range = std::pow(2, quant_bits[0] - 1) - 1;
auto& dev_ctx = ctx.template device_context<DeviceContext>();
out->mutable_data<T>(dev_ctx.GetPlace());
auto dequant = DequantizeFunctor<DeviceContext, T>();
if (ctx.HasInput("ActivationScale")) {
auto* activation_scale = ctx.Input<framework::Tensor>("ActivationScale");
PADDLE_ENFORCE_EQ(activation_scale->numel(), 1,
"The activation uses per-layer quantization type, so "
"it must have only one value.");
framework::Tensor cpu_weigth_scales;
framework::TensorCopy(*weight_scales, platform::CPUPlace(),
&cpu_weigth_scales);
dev_ctx.Wait();
const T* weight_scales_data = cpu_weigth_scales.data<T>();
range *= (std::pow(2, ativation_bits - 1) - 1);
if (scales.size() == 2) {
PADDLE_ENFORCE_EQ(
scales[1]->numel(), 1,
"The second scale tensor should only have one value at now.");
for (int64_t i = 0; i < in->dims()[0]; i++) {
framework::Tensor one_channel_in = in->Slice(i, i + 1);
framework::Tensor one_channel_out = out->Slice(i, i + 1);
auto max_range = range / weight_scales_data[i];
dequant(dev_ctx, &one_channel_in, activation_scale,
framework::Tensor one_channel_scale = scales[0]->Slice(i, i + 1);
max_range *= (std::pow(2, quant_bits[1] - 1) - 1);
dequant(dev_ctx, &one_channel_in, &one_channel_scale,
static_cast<T>(max_range), &one_channel_out);
}
dequant(dev_ctx, out, scales[1], static_cast<T>(1), out);
} else {
for (int64_t i = 0; i < in->dims()[0]; i++) {
framework::Tensor one_channel_in = in->Slice(i, i + 1);
framework::Tensor one_channel_out = out->Slice(i, i + 1);
framework::Tensor one_channel_scale = weight_scales->Slice(i, i + 1);
framework::Tensor one_channel_scale = scales[0]->Slice(i, i + 1);
dequant(dev_ctx, &one_channel_in, &one_channel_scale,
static_cast<T>(range), &one_channel_out);
static_cast<T>(max_range), &one_channel_out);
}
}
}
......
......@@ -49,53 +49,50 @@ def channel_wise_dequantize_max_abs(x, scales, max_range):
return y
class TestFakeChannelWiseDequantizeMaxAbsOp(OpTest):
class TestFakeChannelWiseDequantizeMaxAbsOpTwoScales(OpTest):
def set_args(self):
self.weight_bits = 8
self.activation_bits = 2
self.quant_bits = [8, 2]
self.data_type = "float32"
def setUp(self):
self.set_args()
self.op_type = "fake_channel_wise_dequantize_max_abs"
x = np.random.randn(4, 3, 64, 64).astype(self.data_type)
max_range = math.pow(2, self.weight_bits - 1) - 1
max_range = math.pow(2, self.quant_bits[0] - 1) - 1
max_range *= (math.pow(2, self.quant_bits[1] - 1) - 1)
yq, scales = channel_wise_quantize_max_abs(x, max_range)
ydq = channel_wise_dequantize_max_abs(yq, scales, max_range)
self.inputs = {
'X': yq,
'ActivationScale': np.array(1.0).astype(self.data_type),
'WeightScales': np.array(scales).astype(self.data_type)
}
self.attrs = {
'weight_bits': self.weight_bits,
'activation_bits': self.activation_bits
'Scales': [("scales0", np.array(scales).astype(self.data_type)),
("scales1", np.array([1.0]).astype(self.data_type))]
}
self.attrs = {'quant_bits': self.quant_bits}
self.outputs = {'Out': ydq}
def test_check_output(self):
self.check_output()
class TestFakeChannelWiseDequantizeMaxAbsOpNoActivationScale(OpTest):
class TestFakeChannelWiseDequantizeMaxAbsOpOneScale(OpTest):
def set_args(self):
self.weight_bits = 8
self.quant_bits = [8]
self.data_type = "float32"
def setUp(self):
self.set_args()
self.op_type = "fake_channel_wise_dequantize_max_abs"
x = np.random.randn(4, 3, 64, 64).astype(self.data_type)
max_range = math.pow(2, self.weight_bits - 1) - 1
max_range = math.pow(2, self.quant_bits[0] - 1) - 1
yq, scales = channel_wise_quantize_max_abs(x, max_range)
ydq = channel_wise_dequantize_max_abs(yq, scales, max_range)
self.inputs = {
'X': yq,
'WeightScales': np.array(scales).astype(self.data_type)
'Scales': [("scales0", np.array(scales).astype(self.data_type))]
}
self.attrs = {'weight_bits': self.weight_bits}
self.attrs = {'quant_bits': self.quant_bits}
self.outputs = {'Out': ydq}
def test_check_output(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册