提交 806832e0 编写于 作者: Z Zhen Wang

update the input format of channel wise dequantize op.

上级 89dee160
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/operators/fake_dequantize_op.h" #include "paddle/fluid/operators/fake_dequantize_op.h"
#include <string> #include <string>
#include <vector>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -84,8 +85,8 @@ class FakeChannelWiseDequantizeMaxAbsOp : public framework::OperatorWithKernel { ...@@ -84,8 +85,8 @@ class FakeChannelWiseDequantizeMaxAbsOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE( PADDLE_ENFORCE(
ctx->HasInput("X"), ctx->HasInput("X"),
"Input(X) of FakeChannelWiseDequantizeMaxAbsOp should not be null."); "Input(X) of FakeChannelWiseDequantizeMaxAbsOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("WeightScales"), PADDLE_ENFORCE(ctx->HasInputs("Scales"),
"Input(WeightScales) of FakeChannelWiseDequantizeMaxAbsOp " "Input(Scales) of FakeChannelWiseDequantizeMaxAbsOp "
"should not be null."); "should not be null.");
PADDLE_ENFORCE( PADDLE_ENFORCE(
ctx->HasOutput("Out"), ctx->HasOutput("Out"),
...@@ -103,39 +104,32 @@ class FakeChannelWiseDequantizeMaxAbsOpMaker ...@@ -103,39 +104,32 @@ class FakeChannelWiseDequantizeMaxAbsOpMaker
AddInput("X", AddInput("X",
"(Tensor) The input with float-32/64 type is the " "(Tensor) The input with float-32/64 type is the "
"low precision tensor."); "low precision tensor.");
AddInput("ActivationScale", AddInput("Scales",
"(float) The activation scale in quantization stage.") "(Tensors) The scales in quantization stage. "
.AsDispensable(); "Now, `Scales` is a vector with at most two tensors. "
AddInput("WeightScales", "If Scales has two elements, the second tensor should only have "
"(float array) The weight scales in quantization stage."); "one value.")
.AsDuplicable();
AddOutput("Out", AddOutput("Out",
"(Tensor) The output is the dequantized high " "(Tensor) The output is the dequantized high "
"precision tensor."); "precision tensor.");
AddAttr<int>("activation_bits", "Quantization bit number for activation.") AddAttr<std::vector<int>>(
.SetDefault(8) "quant_bits",
.AddCustomChecker([](const int& bit_length) { "Quantization bit numbers in quantization stage. "
PADDLE_ENFORCE(bit_length >= 1 && bit_length <= 16, "The size of `quant_bits` should be equal to the size of `Scales`.")
"'activation_bits' should be between 1 and 16."); .SetDefault({8});
});
AddAttr<int>("weight_bits", "Quantization bit number for weights.")
.SetDefault(8)
.AddCustomChecker([](const int& bit_length) {
PADDLE_ENFORCE(bit_length >= 1 && bit_length <= 16,
"'weight_bits' should be between 1 and 16.");
});
AddComment(R"DOC( AddComment(R"DOC(
FakeChannelWiseDequantizeMaxAbsOp operator. FakeChannelWiseDequantizeMaxAbsOp operator.
This calculation is an opposite operation of FakeChannelWiseQuantizeMaxAbsOp: This calculation is an opposite operation of FakeChannelWiseQuantizeMaxAbsOp:
$$Out_c = \frac{ActivationScale*WeightScale_c*X_c}{(2^{weight\_bits-1}-1)*(2^{activation\_bits-1}-1)}$$ $$Out_c = \frac{X_c\prod_{i=1}^{n}Scales_{ic}}{\prod_{i=1}^{n}(2^{quant\_bits_i-1}-1)}$$
In the above formula, the range value of c is as follow: In the above formula, the range value of $c$ can be represented as $0 \leq c \lt \ the\ channel\ number\ of\ X$.
$$0 \leq c \lt \ the\ channel\ number\ of\ X$$ Besides, the size of $quant\_bits$ should be equal to the size of $Scales$, and it is called $n$ in the formula.
Notes: Tha per-channel quantization is only applied to weights(channel size scale). Notes: In general, the per-channel quantization is only applied to weights and the activations use per-layer quantization.
And the activations use per-layer quantization(only one scale).
)DOC"); )DOC");
} }
}; };
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#include <vector>
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
...@@ -50,47 +51,40 @@ class FakeChannelWiseDequantizeMaxAbsKernel : public framework::OpKernel<T> { ...@@ -50,47 +51,40 @@ class FakeChannelWiseDequantizeMaxAbsKernel : public framework::OpKernel<T> {
public: public:
virtual void Compute(const framework::ExecutionContext& ctx) const { virtual void Compute(const framework::ExecutionContext& ctx) const {
auto* in = ctx.Input<framework::Tensor>("X"); auto* in = ctx.Input<framework::Tensor>("X");
auto* weight_scales = ctx.Input<framework::Tensor>("WeightScales"); auto scales = ctx.MultiInput<framework::Tensor>("Scales");
auto* out = ctx.Output<framework::Tensor>("Out"); auto* out = ctx.Output<framework::Tensor>("Out");
PADDLE_ENFORCE_EQ(weight_scales->numel(), in->dims()[0], PADDLE_ENFORCE_EQ(scales[0]->numel(), in->dims()[0],
"The weight uses the per-channel quantization type, so " "The number of first scale values must be the same with "
"the number of weight scale values must be the same with "
"first dimension value of Input(X)."); "first dimension value of Input(X).");
int ativation_bits = ctx.Attr<int>("activation_bits"); auto quant_bits = ctx.Attr<std::vector<int>>("quant_bits");
int weight_bits = ctx.Attr<int>("weight_bits"); int max_range = std::pow(2, quant_bits[0] - 1) - 1;
int range = std::pow(2, weight_bits - 1) - 1;
auto& dev_ctx = ctx.template device_context<DeviceContext>(); auto& dev_ctx = ctx.template device_context<DeviceContext>();
out->mutable_data<T>(dev_ctx.GetPlace()); out->mutable_data<T>(dev_ctx.GetPlace());
auto dequant = DequantizeFunctor<DeviceContext, T>(); auto dequant = DequantizeFunctor<DeviceContext, T>();
if (ctx.HasInput("ActivationScale")) { if (scales.size() == 2) {
auto* activation_scale = ctx.Input<framework::Tensor>("ActivationScale"); PADDLE_ENFORCE_EQ(
PADDLE_ENFORCE_EQ(activation_scale->numel(), 1, scales[1]->numel(), 1,
"The activation uses per-layer quantization type, so " "The second scale tensor should only have one value at now.");
"it must have only one value.");
framework::Tensor cpu_weigth_scales;
framework::TensorCopy(*weight_scales, platform::CPUPlace(),
&cpu_weigth_scales);
dev_ctx.Wait();
const T* weight_scales_data = cpu_weigth_scales.data<T>();
range *= (std::pow(2, ativation_bits - 1) - 1);
for (int64_t i = 0; i < in->dims()[0]; i++) { for (int64_t i = 0; i < in->dims()[0]; i++) {
framework::Tensor one_channel_in = in->Slice(i, i + 1); framework::Tensor one_channel_in = in->Slice(i, i + 1);
framework::Tensor one_channel_out = out->Slice(i, i + 1); framework::Tensor one_channel_out = out->Slice(i, i + 1);
auto max_range = range / weight_scales_data[i]; framework::Tensor one_channel_scale = scales[0]->Slice(i, i + 1);
dequant(dev_ctx, &one_channel_in, activation_scale, max_range *= (std::pow(2, quant_bits[1] - 1) - 1);
dequant(dev_ctx, &one_channel_in, &one_channel_scale,
static_cast<T>(max_range), &one_channel_out); static_cast<T>(max_range), &one_channel_out);
} }
dequant(dev_ctx, out, scales[1], static_cast<T>(1), out);
} else { } else {
for (int64_t i = 0; i < in->dims()[0]; i++) { for (int64_t i = 0; i < in->dims()[0]; i++) {
framework::Tensor one_channel_in = in->Slice(i, i + 1); framework::Tensor one_channel_in = in->Slice(i, i + 1);
framework::Tensor one_channel_out = out->Slice(i, i + 1); framework::Tensor one_channel_out = out->Slice(i, i + 1);
framework::Tensor one_channel_scale = weight_scales->Slice(i, i + 1); framework::Tensor one_channel_scale = scales[0]->Slice(i, i + 1);
dequant(dev_ctx, &one_channel_in, &one_channel_scale, dequant(dev_ctx, &one_channel_in, &one_channel_scale,
static_cast<T>(range), &one_channel_out); static_cast<T>(max_range), &one_channel_out);
} }
} }
} }
......
...@@ -49,53 +49,50 @@ def channel_wise_dequantize_max_abs(x, scales, max_range): ...@@ -49,53 +49,50 @@ def channel_wise_dequantize_max_abs(x, scales, max_range):
return y return y
class TestFakeChannelWiseDequantizeMaxAbsOp(OpTest): class TestFakeChannelWiseDequantizeMaxAbsOpTwoScales(OpTest):
def set_args(self): def set_args(self):
self.weight_bits = 8 self.quant_bits = [8, 2]
self.activation_bits = 2
self.data_type = "float32" self.data_type = "float32"
def setUp(self): def setUp(self):
self.set_args() self.set_args()
self.op_type = "fake_channel_wise_dequantize_max_abs" self.op_type = "fake_channel_wise_dequantize_max_abs"
x = np.random.randn(4, 3, 64, 64).astype(self.data_type) x = np.random.randn(4, 3, 64, 64).astype(self.data_type)
max_range = math.pow(2, self.weight_bits - 1) - 1 max_range = math.pow(2, self.quant_bits[0] - 1) - 1
max_range *= (math.pow(2, self.quant_bits[1] - 1) - 1)
yq, scales = channel_wise_quantize_max_abs(x, max_range) yq, scales = channel_wise_quantize_max_abs(x, max_range)
ydq = channel_wise_dequantize_max_abs(yq, scales, max_range) ydq = channel_wise_dequantize_max_abs(yq, scales, max_range)
self.inputs = { self.inputs = {
'X': yq, 'X': yq,
'ActivationScale': np.array(1.0).astype(self.data_type), 'Scales': [("scales0", np.array(scales).astype(self.data_type)),
'WeightScales': np.array(scales).astype(self.data_type) ("scales1", np.array([1.0]).astype(self.data_type))]
}
self.attrs = {
'weight_bits': self.weight_bits,
'activation_bits': self.activation_bits
} }
self.attrs = {'quant_bits': self.quant_bits}
self.outputs = {'Out': ydq} self.outputs = {'Out': ydq}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
class TestFakeChannelWiseDequantizeMaxAbsOpNoActivationScale(OpTest): class TestFakeChannelWiseDequantizeMaxAbsOpOneScale(OpTest):
def set_args(self): def set_args(self):
self.weight_bits = 8 self.quant_bits = [8]
self.data_type = "float32" self.data_type = "float32"
def setUp(self): def setUp(self):
self.set_args() self.set_args()
self.op_type = "fake_channel_wise_dequantize_max_abs" self.op_type = "fake_channel_wise_dequantize_max_abs"
x = np.random.randn(4, 3, 64, 64).astype(self.data_type) x = np.random.randn(4, 3, 64, 64).astype(self.data_type)
max_range = math.pow(2, self.weight_bits - 1) - 1 max_range = math.pow(2, self.quant_bits[0] - 1) - 1
yq, scales = channel_wise_quantize_max_abs(x, max_range) yq, scales = channel_wise_quantize_max_abs(x, max_range)
ydq = channel_wise_dequantize_max_abs(yq, scales, max_range) ydq = channel_wise_dequantize_max_abs(yq, scales, max_range)
self.inputs = { self.inputs = {
'X': yq, 'X': yq,
'WeightScales': np.array(scales).astype(self.data_type) 'Scales': [("scales0", np.array(scales).astype(self.data_type))]
} }
self.attrs = {'weight_bits': self.weight_bits} self.attrs = {'quant_bits': self.quant_bits}
self.outputs = {'Out': ydq} self.outputs = {'Out': ydq}
def test_check_output(self): def test_check_output(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册