From 491b87b4fc78f2585bc20e5ab86df0f204800b32 Mon Sep 17 00:00:00 2001 From: Guanghua Yu <742925032@qq.com> Date: Fri, 24 Jun 2022 17:27:59 +0800 Subject: [PATCH] fix quantization clip and round Attribute (#43764) --- paddle/fluid/operators/fake_quantize_op.cc | 550 ++++++++++++------ paddle/fluid/operators/fake_quantize_op.cu.h | 436 ++++++++------ paddle/fluid/operators/fake_quantize_op.h | 317 +++++----- paddle/fluid/operators/quantize_linear_op.cc | 65 ++- .../post_training_quantization.py | 52 +- .../slim/quantization/quantization_pass.py | 90 +-- .../fluid/contrib/slim/quantization/utils.py | 46 +- ...t_post_training_quantization_lstm_model.py | 16 +- .../test_post_training_quantization_mnist.py | 59 +- ..._post_training_quantization_mobilenetv1.py | 33 +- ...est_post_training_quantization_resnet50.py | 8 +- .../tests/unittests/test_fake_quantize_op.py | 51 +- 12 files changed, 997 insertions(+), 726 deletions(-) diff --git a/paddle/fluid/operators/fake_quantize_op.cc b/paddle/fluid/operators/fake_quantize_op.cc index 94badfb1c24..61ee9d49ebe 100644 --- a/paddle/fluid/operators/fake_quantize_op.cc +++ b/paddle/fluid/operators/fake_quantize_op.cc @@ -33,8 +33,10 @@ struct Compare { template struct FindAbsMaxFunctor { - void operator()(const platform::CPUDeviceContext& ctx, const T* in, - const int num, T* out) { + void operator()(const platform::CPUDeviceContext &ctx, + const T *in, + const int num, + T *out) { *out = std::abs(*(std::max_element(in + 0, in + num, Compare()))); } }; @@ -43,24 +45,26 @@ template struct FindAbsMaxFunctor; template struct FindChannelAbsMaxFunctor { - void operator()(const platform::CPUDeviceContext& ctx, - const framework::Tensor& in_tensor, const int quant_axis, - T* out_abs_max) { + void operator()(const platform::CPUDeviceContext &ctx, + const framework::Tensor &in_tensor, + const int quant_axis, + T *out_abs_max) { // At present, channelwise quantization supports conv2d, depthwise_conv2d // conv2d_transpose and mul PADDLE_ENFORCE_EQ( - quant_axis == 0 || quant_axis == 1, true, + quant_axis == 0 || quant_axis == 1, + true, platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " "the received is %d", quant_axis)); - auto* in_data = in_tensor.data(); + auto *in_data = in_tensor.data(); auto in_dims = in_tensor.dims(); const int64_t channel = in_dims[quant_axis]; if (quant_axis == 0) { const int64_t channel_size = in_tensor.numel() / channel; for (int64_t i = 0; i < channel; i++) { - auto* start = in_data + i * channel_size; - auto* end = in_data + (i + 1) * channel_size; + auto *start = in_data + i * channel_size; + auto *end = in_data + (i + 1) * channel_size; out_abs_max[i] = std::abs(*(std::max_element(start, end, Compare()))); } @@ -72,8 +76,8 @@ struct FindChannelAbsMaxFunctor { const int64_t step_j = in_tensor.numel() / (in_dims[0] * in_dims[1]); for (int64_t i = 0; i < in_dims[0]; i++) { for (int64_t j = 0; j < in_dims[1]; j++) { - auto* start = in_data + i * step_i + j * step_j; - auto* end = in_data + i * step_i + (j + 1) * step_j; + auto *start = in_data + i * step_i + j * step_j; + auto *end = in_data + i * step_i + (j + 1) * step_j; T abs_max = std::abs(*(std::max_element(start, end, Compare()))); out_abs_max[j] = std::max(out_abs_max[j], abs_max); } @@ -86,16 +90,30 @@ template struct FindChannelAbsMaxFunctor; template struct ClipAndFakeQuantFunctor { - void operator()(const platform::CPUDeviceContext& ctx, - const framework::Tensor& in, const framework::Tensor& scale, - const int bin_cnt, const int round_type, - framework::Tensor* out) { + void operator()(const platform::CPUDeviceContext &ctx, + const framework::Tensor &in, + const framework::Tensor &scale, + const int bin_cnt, + const int round_type, + framework::Tensor *out) { T s = scale.data()[0]; T inv_s = inverse(s); platform::Transform trans; - trans(ctx, in.data(), in.data() + in.numel(), - out->mutable_data(ctx.GetPlace()), - QuantTensorFunctor(static_cast(bin_cnt), round_type, inv_s)); + if (round_type == 0) { + trans(ctx, + in.data(), + in.data() + in.numel(), + out->mutable_data(ctx.GetPlace()), + QuantTensorFunctor(static_cast(bin_cnt), inv_s)); + } else { + trans(ctx, + in.data(), + in.data() + in.numel(), + out->mutable_data(ctx.GetPlace()), + phi::ClipFunctor(-s, s)); + auto out_e = framework::EigenVector::Flatten(*out); + out_e.device(*ctx.eigen_device()) = (bin_cnt * inv_s * out_e).round(); + } } }; @@ -103,19 +121,34 @@ template struct ClipAndFakeQuantFunctor; template struct ClipAndFakeQuantDequantFunctor { - void operator()(const platform::CPUDeviceContext& ctx, - const framework::Tensor& in, const framework::Tensor& scale, - const int bin_cnt, const int round_type, - framework::Tensor* out) { + void operator()(const platform::CPUDeviceContext &ctx, + const framework::Tensor &in, + const framework::Tensor &scale, + const int bin_cnt, + const int round_type, + framework::Tensor *out) { T s = scale.data()[0]; T inv_s = inverse(s); platform::Transform trans; - trans(ctx, in.data(), in.data() + in.numel(), - out->mutable_data(ctx.GetPlace()), - QuantTensorFunctor(static_cast(bin_cnt), round_type, inv_s)); - auto out_e = framework::EigenVector::Flatten(*out); - out_e.device(*ctx.eigen_device()) = out_e * s / static_cast(bin_cnt); + if (round_type == 0) { + trans(ctx, + in.data(), + in.data() + in.numel(), + out->mutable_data(ctx.GetPlace()), + QuantTensorFunctor(static_cast(bin_cnt), inv_s)); + auto out_e = framework::EigenVector::Flatten(*out); + out_e.device(*ctx.eigen_device()) = out_e * s / static_cast(bin_cnt); + } else { + trans(ctx, + in.data(), + in.data() + in.numel(), + out->mutable_data(ctx.GetPlace()), + phi::ClipFunctor(-s, s)); + auto out_e = framework::EigenVector::Flatten(*out); + out_e.device(*ctx.eigen_device()) = + (bin_cnt * inv_s * out_e).round() * s / static_cast(bin_cnt); + } } }; template struct ClipAndFakeQuantDequantFunctor struct ChannelClipAndFakeQuantFunctor { - void operator()(const platform::CPUDeviceContext& ctx, - const framework::Tensor& in, const framework::Tensor& scale, - const int bin_cnt, const int round_type, const int quant_axis, - framework::Tensor* out) { + void operator()(const platform::CPUDeviceContext &ctx, + const framework::Tensor &in, + const framework::Tensor &scale, + const int bin_cnt, + const int round_type, + const int quant_axis, + framework::Tensor *out) { // At present, channelwise quantization supports conv2d, depthwise_conv2d // conv2d_transpose and mul PADDLE_ENFORCE_EQ( - quant_axis == 0 || quant_axis == 1, true, + quant_axis == 0 || quant_axis == 1, + true, platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " "the received is %d", quant_axis)); - auto* scale_data = scale.data(); - auto* in_data = in.data(); - auto* out_data = out->mutable_data(ctx.GetPlace()); + auto *scale_data = scale.data(); + auto *in_data = in.data(); + auto *out_data = out->mutable_data(ctx.GetPlace()); auto in_dims = in.dims(); const int64_t channel = in_dims[quant_axis]; platform::Transform trans; @@ -144,12 +181,31 @@ struct ChannelClipAndFakeQuantFunctor { const int64_t channel_size = in.numel() / channel; for (int64_t i = 0; i < channel; i++) { T s = scale_data[i]; - auto* start = in_data + i * channel_size; - auto* end = in_data + (i + 1) * channel_size; + auto *start = in_data + i * channel_size; + auto *end = in_data + (i + 1) * channel_size; T inv_s = inverse(s); - trans( - ctx, start, end, out_data + i * channel_size, - QuantTensorFunctor(static_cast(bin_cnt), round_type, inv_s)); + if (round_type == 0) { + trans(ctx, + start, + end, + out_data + i * channel_size, + QuantTensorFunctor(static_cast(bin_cnt), inv_s)); + } else { + trans(ctx, + start, + end, + out_data + i * channel_size, + phi::ClipFunctor(-s, s)); + } + } + if (round_type == 1) { + for (int64_t i = 0; i < channel; i++) { + T s = scale_data[i]; + T inv_s = inverse(s); + framework::Tensor one_channel_out = out->Slice(i, i + 1); + auto out_e = framework::EigenVector::Flatten(one_channel_out); + out_e.device(*ctx.eigen_device()) = (bin_cnt * inv_s * out_e).round(); + } } } else if (quant_axis == 1) { const int64_t step_i = in.numel() / in_dims[0]; @@ -158,12 +214,21 @@ struct ChannelClipAndFakeQuantFunctor { for (int j = 0; j < in_dims[1]; j++) { T s = scale_data[j]; T inv_s = inverse(s); - auto* start = in_data + i * step_i + j * step_j; - auto* end = in_data + i * step_i + (j + 1) * step_j; - auto* cur_out_data = out_data + i * step_i + j * step_j; - trans(ctx, start, end, cur_out_data, - QuantTensorFunctor(static_cast(bin_cnt), round_type, - inv_s)); + auto *start = in_data + i * step_i + j * step_j; + auto *end = in_data + i * step_i + (j + 1) * step_j; + auto *cur_out_data = out_data + i * step_i + j * step_j; + if (round_type == 0) { + trans(ctx, + start, + end, + cur_out_data, + QuantTensorFunctor(static_cast(bin_cnt), inv_s)); + } else { + trans(ctx, start, end, cur_out_data, phi::ClipFunctor(-s, s)); + for (int k = 0; k < step_j; k++) { + cur_out_data[k] = std::round(bin_cnt * inv_s * cur_out_data[k]); + } + } } } } @@ -174,19 +239,23 @@ template struct ChannelClipAndFakeQuantFunctor; template struct ChannelClipFakeQuantDequantFunctor { - void operator()(const platform::CPUDeviceContext& ctx, - const framework::Tensor& in, const framework::Tensor& scale, - const int bin_cnt, const int round_type, const int quant_axis, - framework::Tensor* out) { + void operator()(const platform::CPUDeviceContext &ctx, + const framework::Tensor &in, + const framework::Tensor &scale, + const int bin_cnt, + const int round_type, + const int quant_axis, + framework::Tensor *out) { PADDLE_ENFORCE_EQ( - quant_axis == 0 || quant_axis == 1, true, + quant_axis == 0 || quant_axis == 1, + true, platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " "the received is %d", quant_axis)); - auto* scale_data = scale.data(); - auto* in_data = in.data(); - auto* out_data = out->mutable_data(ctx.GetPlace()); + auto *scale_data = scale.data(); + auto *in_data = in.data(); + auto *out_data = out->mutable_data(ctx.GetPlace()); auto in_dims = in.dims(); const int64_t channel = in_dims[quant_axis]; platform::Transform trans; @@ -194,15 +263,35 @@ struct ChannelClipFakeQuantDequantFunctor { const int64_t channel_size = in.numel() / channel; for (int i = 0; i < channel; i++) { T s = scale_data[i]; - auto* start = in_data + i * channel_size; - auto* end = in_data + (i + 1) * channel_size; - T inv_s = inverse(s); - trans( - ctx, start, end, out_data + i * channel_size, - QuantTensorFunctor(static_cast(bin_cnt), round_type, inv_s)); + auto *start = in_data + i * channel_size; + auto *end = in_data + (i + 1) * channel_size; + if (round_type == 0) { + T inv_s = inverse(s); + trans(ctx, + start, + end, + out_data + i * channel_size, + QuantTensorFunctor(static_cast(bin_cnt), inv_s)); + } else { + trans(ctx, + start, + end, + out_data + i * channel_size, + phi::ClipFunctor(-s, s)); + } + } + for (int i = 0; i < channel; i++) { + T s = scale_data[i]; framework::Tensor one_channel_out = out->Slice(i, i + 1); auto out_e = framework::EigenVector::Flatten(one_channel_out); - out_e.device(*ctx.eigen_device()) = out_e * s / static_cast(bin_cnt); + if (round_type == 0) { + out_e.device(*ctx.eigen_device()) = + out_e * s / static_cast(bin_cnt); + } else { + T inv_s = inverse(s); + out_e.device(*ctx.eigen_device()) = + (bin_cnt * inv_s * out_e).round() * s / static_cast(bin_cnt); + } } } else if (quant_axis == 1) { const int64_t step_i = in.numel() / in_dims[0]; @@ -211,14 +300,25 @@ struct ChannelClipFakeQuantDequantFunctor { for (int j = 0; j < in_dims[1]; j++) { T s = scale_data[j]; T inv_s = inverse(s); - auto* start = in_data + i * step_i + j * step_j; - auto* end = in_data + i * step_i + (j + 1) * step_j; - auto* cur_out_data = out_data + i * step_i + j * step_j; - trans(ctx, start, end, cur_out_data, - QuantTensorFunctor(static_cast(bin_cnt), round_type, - inv_s)); + auto *start = in_data + i * step_i + j * step_j; + auto *end = in_data + i * step_i + (j + 1) * step_j; + auto *cur_out_data = out_data + i * step_i + j * step_j; + if (round_type == 0) { + trans(ctx, + start, + end, + cur_out_data, + QuantTensorFunctor(static_cast(bin_cnt), inv_s)); + } else { + trans(ctx, start, end, cur_out_data, phi::ClipFunctor(-s, s)); + } for (int k = 0; k < step_j; k++) { - cur_out_data[k] = cur_out_data[k] * s / static_cast(bin_cnt); + if (round_type == 0) { + cur_out_data[k] = cur_out_data[k] * s / static_cast(bin_cnt); + } else { + cur_out_data[k] = std::round(bin_cnt * inv_s * cur_out_data[k]) * + s / static_cast(bin_cnt); + } } } } @@ -230,12 +330,14 @@ template struct ChannelClipFakeQuantDequantFunctor; template struct FindRangeAbsMaxFunctor { - void operator()(const platform::CPUDeviceContext& ctx, - const framework::Tensor& cur_scale, - const framework::Tensor& last_scale, - const framework::Tensor& iter, const int window_size, - framework::Tensor* scales_arr, framework::Tensor* out_scale) { - T* scale_arr = scales_arr->mutable_data(ctx.GetPlace()); + void operator()(const platform::CPUDeviceContext &ctx, + const framework::Tensor &cur_scale, + const framework::Tensor &last_scale, + const framework::Tensor &iter, + const int window_size, + framework::Tensor *scales_arr, + framework::Tensor *out_scale) { + T *scale_arr = scales_arr->mutable_data(ctx.GetPlace()); int64_t it = iter.data()[0]; int idx = it % window_size; T removed = scale_arr[idx]; @@ -247,8 +349,8 @@ struct FindRangeAbsMaxFunctor { max = cur; } else if (fabs(removed - max) < 1e-6) { int size = (it > window_size) ? window_size : it; - FindAbsMaxFunctor()(ctx, scale_arr, size, - &max); + FindAbsMaxFunctor()( + ctx, scale_arr, size, &max); } out_scale->mutable_data(ctx.GetPlace())[0] = max; } @@ -258,11 +360,14 @@ template struct FindRangeAbsMaxFunctor; template struct FindMovingAverageAbsMaxFunctor { - void operator()(const platform::CPUDeviceContext& ctx, - const framework::Tensor& in_accum, - const framework::Tensor& in_state, const T* cur_scale, - const float rate, framework::Tensor* out_state, - framework::Tensor* out_accum, framework::Tensor* out_scale) { + void operator()(const platform::CPUDeviceContext &ctx, + const framework::Tensor &in_accum, + const framework::Tensor &in_state, + const T *cur_scale, + const float rate, + framework::Tensor *out_state, + framework::Tensor *out_accum, + framework::Tensor *out_scale) { T accum = in_accum.data()[0]; T state = in_state.data()[0]; T scale = cur_scale[0]; @@ -282,18 +387,22 @@ template struct FindMovingAverageAbsMaxFunctorHasInput("X"), "Input", "X", - "FakeQuantOrWithDequantAbsMaxOp"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", + void InferShape(framework::InferShapeContext *ctx) const override { + OP_INOUT_CHECK( + ctx->HasInput("X"), "Input", "X", "FakeQuantOrWithDequantAbsMaxOp"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), + "Output", + "Out", "FakeQuantOrWithDequantAbsMaxOp"); - OP_INOUT_CHECK(ctx->HasOutput("OutScale"), "Output", "OutScale", + OP_INOUT_CHECK(ctx->HasOutput("OutScale"), + "Output", + "OutScale", "FakeQuantOrWithDequantAbsMaxOp"); ctx->SetOutputDim("Out", ctx->GetInputDim("X")); ctx->SetOutputDim("OutScale", {1}); @@ -302,7 +411,7 @@ class FakeQuantOrWithDequantAbsMaxOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { + const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.device_context()); @@ -320,8 +429,9 @@ class FakeQuantOrWithDequantAbsMaxOpMaker AddOutput("OutScale", "(Tensor) Current scale"); AddAttr("bit_length", "(int, default 8)") .SetDefault(8) - .AddCustomChecker([](const int& bit_length) { - PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, true, + .AddCustomChecker([](const int &bit_length) { + PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, + true, platform::errors::InvalidArgument( "'bit_length' should be between 1 and 16, but " "the received is %d", @@ -329,18 +439,22 @@ class FakeQuantOrWithDequantAbsMaxOpMaker }); AddAttr( "round_type", - "(int, default 0) The round type of fp32 to int." + "(int, default 1) The round type of fp32 to int." "0: rounding to nearest ties to even. Eg: round(1.5)=2, round(2.5)=2" "1: rounding to nearest ties away from zero. Eg: round(1.5)=2, " "round(2.5)=3") - .SetDefault(0) - .AddCustomChecker([](const int& round_type) { - PADDLE_ENFORCE_EQ(round_type >= 0 && round_type <= 1, true, - platform::errors::InvalidArgument( - "'round_type' should be between 0 and 1, but " - "the received is %d", - round_type)); - }); + .SetDefault(1) + .AddCustomChecker([](const int &round_type) { + PADDLE_ENFORCE_EQ( + round_type == 0 || round_type == 1, + true, + platform::errors::InvalidArgument( + "'round_type' should be 0 or 1, 0 rounding to " + "nearest ties to even and 1 is rounding to nearest " + "ties away from zero.but the received is %d", + round_type)); + }) + .AsExtra(); AddComment(R"DOC( This is a Base Op which supports FakeQuantAbsMaxOpMaker and FakeQuantDequantAbsMaxOpMaker. FakeQuantAbsMaxOp operator is used in the dynamic quantization. @@ -363,12 +477,16 @@ class FakeChannelWiseQuantizeAbsMaxOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", - "FakeChannelWiseQuantizeAbsMax"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", + void InferShape(framework::InferShapeContext *ctx) const override { + OP_INOUT_CHECK( + ctx->HasInput("X"), "Input", "X", "FakeChannelWiseQuantizeAbsMax"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), + "Output", + "Out", "FakeChannelWiseQuantizeAbsMax"); - OP_INOUT_CHECK(ctx->HasOutput("OutScale"), "Output", "OutScale", + OP_INOUT_CHECK(ctx->HasOutput("OutScale"), + "Output", + "OutScale", "FakeChannelWiseQuantizeAbsMax"); int quant_axis = ctx->Attrs().Get("quant_axis"); ctx->SetOutputDim("Out", ctx->GetInputDim("X")); @@ -378,7 +496,7 @@ class FakeChannelWiseQuantizeAbsMaxOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { + const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); } @@ -398,8 +516,9 @@ class FakeChannelWiseQuantizeAbsMaxOpMaker "For conv2d, depthwise_conv2d, conv2d_transpose " "and mul, the quant_axis is equal to the cout axis.") .SetDefault(0) - .AddCustomChecker([](const int& quant_axis) { - PADDLE_ENFORCE_EQ(quant_axis == 0 || quant_axis == 1, true, + .AddCustomChecker([](const int &quant_axis) { + PADDLE_ENFORCE_EQ(quant_axis == 0 || quant_axis == 1, + true, platform::errors::InvalidArgument( "'quant_axis' should be 0 or 1, but " "the received is %d", @@ -407,8 +526,9 @@ class FakeChannelWiseQuantizeAbsMaxOpMaker }); AddAttr("bit_length", "(int, default 8)") .SetDefault(8) - .AddCustomChecker([](const int& bit_length) { - PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, true, + .AddCustomChecker([](const int &bit_length) { + PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, + true, platform::errors::InvalidArgument( "'bit_length' should be between 1 and 16, but " "the received is %d", @@ -416,18 +536,22 @@ class FakeChannelWiseQuantizeAbsMaxOpMaker }); AddAttr( "round_type", - "(int, default 0) The round type of fp32 to int." + "(int, default 1) The round type of fp32 to int." "0: rounding to nearest ties to even. Eg: round(1.5)=2, round(2.5)=2" "1: rounding to nearest ties away from zero. Eg: round(1.5)=2, " "round(2.5)=3") - .SetDefault(0) - .AddCustomChecker([](const int& round_type) { - PADDLE_ENFORCE_EQ(round_type >= 0 && round_type <= 1, true, - platform::errors::InvalidArgument( - "'round_type' should be between 0 and 1, but " - "the received is %d", - round_type)); - }); + .SetDefault(1) + .AddCustomChecker([](const int &round_type) { + PADDLE_ENFORCE_EQ( + round_type == 0 || round_type == 1, + true, + platform::errors::InvalidArgument( + "'round_type' should be 0 or 1, 0 rounding to " + "nearest ties to even and 1 is rounding to nearest " + "ties away from zero.but the received is %d", + round_type)); + }) + .AsExtra(); AddAttr("is_test", "(bool, default false) Set to true for inference only, false " "for training. Some layers may run faster when this is true.") @@ -450,12 +574,18 @@ class FakeChannelWiseQuantizeDequantizeAbsMaxOp public: using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", + void InferShape(framework::InferShapeContext *ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("X"), + "Input", + "X", "FakeChannelWiseQuantizeDequantizeAbsMax"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", + OP_INOUT_CHECK(ctx->HasOutput("Out"), + "Output", + "Out", "FakeChannelWiseQuantizeDequantizeAbsMax"); - OP_INOUT_CHECK(ctx->HasOutput("OutScale"), "Output", "OutScale", + OP_INOUT_CHECK(ctx->HasOutput("OutScale"), + "Output", + "OutScale", "FakeChannelWiseQuantizeDequantizeAbsMax"); int quant_axis = ctx->Attrs().Get("quant_axis"); ctx->SetOutputDim("Out", ctx->GetInputDim("X")); @@ -465,7 +595,7 @@ class FakeChannelWiseQuantizeDequantizeAbsMaxOp protected: framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { + const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); } @@ -485,8 +615,9 @@ class FakeChannelWiseQuantizeDequantizeAbsMaxOpMaker "For conv2d, depthwise_conv2d, conv2d_transpose " "and mul, the quant_axis is equal to the cout axis.") .SetDefault(0) - .AddCustomChecker([](const int& quant_axis) { - PADDLE_ENFORCE_EQ(quant_axis == 0 || quant_axis == 1, true, + .AddCustomChecker([](const int &quant_axis) { + PADDLE_ENFORCE_EQ(quant_axis == 0 || quant_axis == 1, + true, platform::errors::InvalidArgument( "'quant_axis' should be 0 or 1, but " "the received is %d", @@ -494,8 +625,9 @@ class FakeChannelWiseQuantizeDequantizeAbsMaxOpMaker }); AddAttr("bit_length", "(int, default 8)") .SetDefault(8) - .AddCustomChecker([](const int& bit_length) { - PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, true, + .AddCustomChecker([](const int &bit_length) { + PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, + true, platform::errors::InvalidArgument( "'bit_length' should be between 1 and 16, but " "the received is %d", @@ -503,18 +635,22 @@ class FakeChannelWiseQuantizeDequantizeAbsMaxOpMaker }); AddAttr( "round_type", - "(int, default 0) The round type of fp32 to int." + "(int, default 1) The round type of fp32 to int." "0: rounding to nearest ties to even. Eg: round(1.5)=2, round(2.5)=2" "1: rounding to nearest ties away from zero. Eg: round(1.5)=2, " "round(2.5)=3") - .SetDefault(0) - .AddCustomChecker([](const int& round_type) { - PADDLE_ENFORCE_EQ(round_type >= 0 && round_type <= 1, true, - platform::errors::InvalidArgument( - "'round_type' should be between 0 and 1, but " - "the received is %d", - round_type)); - }); + .SetDefault(1) + .AddCustomChecker([](const int &round_type) { + PADDLE_ENFORCE_EQ( + round_type == 0 || round_type == 1, + true, + platform::errors::InvalidArgument( + "'round_type' should be 0 or 1, 0 rounding to " + "nearest ties to even and 1 is rounding to nearest " + "ties away from zero.but the received is %d", + round_type)); + }) + .AsExtra(); AddComment(R"DOC( The scale of FakeChannelWiseQuantize operator is a vector. In detail, each channel of the input X has a scale value. @@ -530,17 +666,19 @@ $$0 \leq c \lt \ the\ channel\ number\ of\ X$$ class FakeQuantizeRangeAbsMaxOp : public framework::OperatorWithKernel { public: - FakeQuantizeRangeAbsMaxOp(const std::string& type, - const framework::VariableNameMap& inputs, - const framework::VariableNameMap& outputs, - const framework::AttributeMap& attrs) + FakeQuantizeRangeAbsMaxOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) : OperatorWithKernel(type, inputs, outputs, attrs) {} - void InferShape(framework::InferShapeContext* ctx) const override { + void InferShape(framework::InferShapeContext *ctx) const override { OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "FakeQuantizeRangeAbsMax"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", - "FakeQuantizeRangeAbsMax"); - OP_INOUT_CHECK(ctx->HasOutput("OutScale"), "Output", "OutScale", + OP_INOUT_CHECK( + ctx->HasOutput("Out"), "Output", "Out", "FakeQuantizeRangeAbsMax"); + OP_INOUT_CHECK(ctx->HasOutput("OutScale"), + "Output", + "OutScale", "FakeQuantizeRangeAbsMax"); if (ctx->HasOutput("OutScales")) { int window_size = ctx->Attrs().Get("window_size"); @@ -553,7 +691,7 @@ class FakeQuantizeRangeAbsMaxOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { + const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.device_context()); @@ -574,8 +712,9 @@ class FakeQuantizeRangeAbsMaxOpMaker .SetDefault(10000); AddAttr("bit_length", "(int, default 8), quantization bit number.") .SetDefault(8) - .AddCustomChecker([](const int& bit_length) { - PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, true, + .AddCustomChecker([](const int &bit_length) { + PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, + true, platform::errors::InvalidArgument( "'bit_length' should be between 1 and 16, but " "the received is %d", @@ -583,18 +722,22 @@ class FakeQuantizeRangeAbsMaxOpMaker }); AddAttr( "round_type", - "(int, default 0) The round type of fp32 to int." + "(int, default 1) The round type of fp32 to int." "0: rounding to nearest ties to even. Eg: round(1.5)=2, round(2.5)=2" "1: rounding to nearest ties away from zero. Eg: round(1.5)=2, " "round(2.5)=3") - .SetDefault(0) - .AddCustomChecker([](const int& round_type) { - PADDLE_ENFORCE_EQ(round_type >= 0 && round_type <= 1, true, - platform::errors::InvalidArgument( - "'round_type' should be between 0 and 1, but " - "the received is %d", - round_type)); - }); + .SetDefault(1) + .AddCustomChecker([](const int &round_type) { + PADDLE_ENFORCE_EQ( + round_type == 0 || round_type == 1, + true, + platform::errors::InvalidArgument( + "'round_type' should be 0 or 1, 0 rounding to " + "nearest ties to even and 1 is rounding to nearest " + "ties away from zero.but the received is %d", + round_type)); + }) + .AsExtra(); AddAttr("is_test", "(bool, default false) Set to true for inference only, false " "for training. Some layers may run faster when this is true.") @@ -614,17 +757,24 @@ class FakeQuantOrWithDequantMovingAverageAbsMaxOp : public framework::OperatorWithKernel { public: FakeQuantOrWithDequantMovingAverageAbsMaxOp( - const std::string& type, const framework::VariableNameMap& inputs, - const framework::VariableNameMap& outputs, - const framework::AttributeMap& attrs) + const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) : OperatorWithKernel(type, inputs, outputs, attrs) {} - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", + void InferShape(framework::InferShapeContext *ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("X"), + "Input", + "X", "FakeQuantOrWithDequantMovingAverageAbsMax"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", + OP_INOUT_CHECK(ctx->HasOutput("Out"), + "Output", + "Out", "FakeQuantOrWithDequantMovingAverageAbsMax"); - OP_INOUT_CHECK(ctx->HasOutput("OutScale"), "Output", "OutScale", + OP_INOUT_CHECK(ctx->HasOutput("OutScale"), + "Output", + "OutScale", "FakeQuantOrWithDequantMovingAverageAbsMax"); if (ctx->HasOutput("OutState")) { ctx->SetOutputDim("OutState", {1}); @@ -639,7 +789,7 @@ class FakeQuantOrWithDequantMovingAverageAbsMaxOp protected: framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { + const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.device_context()); @@ -662,8 +812,9 @@ class FakeQuantOrWithDequantMovingAverageAbsMaxOpMaker .SetDefault(0.9); AddAttr("bit_length", "(int, default 8), quantization bit number.") .SetDefault(8) - .AddCustomChecker([](const int& bit_length) { - PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, true, + .AddCustomChecker([](const int &bit_length) { + PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, + true, platform::errors::InvalidArgument( "'bit_length' should be between 1 and 16, but " "the received is %d", @@ -671,18 +822,22 @@ class FakeQuantOrWithDequantMovingAverageAbsMaxOpMaker }); AddAttr( "round_type", - "(int, default 0) The round type of fp32 to int." + "(int, default 1) The round type of fp32 to int." "0: rounding to nearest ties to even. Eg: round(1.5)=2, round(2.5)=2" "1: rounding to nearest ties away from zero. Eg: round(1.5)=2, " "round(2.5)=3") - .SetDefault(0) - .AddCustomChecker([](const int& round_type) { - PADDLE_ENFORCE_EQ(round_type >= 0 && round_type <= 1, true, - platform::errors::InvalidArgument( - "'round_type' should be between 0 and 1, but " - "the received is %d", - round_type)); - }); + .SetDefault(1) + .AddCustomChecker([](const int &round_type) { + PADDLE_ENFORCE_EQ( + round_type == 0 || round_type == 1, + true, + platform::errors::InvalidArgument( + "'round_type' should be 0 or 1, 0 rounding to " + "nearest ties to even and 1 is rounding to nearest " + "ties away from zero.but the received is %d", + round_type)); + }) + .AsExtra(); AddAttr("is_test", "(bool, default false) Set to true for inference only, false " "for training. Some layers may run faster when this is true.") @@ -709,10 +864,12 @@ class MovingAverageAbsMaxScaleOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", - "MovingAverageAbsMaxScale"); - OP_INOUT_CHECK(ctx->HasOutput("OutScale"), "Output", "OutScale", + void InferShape(framework::InferShapeContext *ctx) const override { + OP_INOUT_CHECK( + ctx->HasInput("X"), "Input", "X", "MovingAverageAbsMaxScale"); + OP_INOUT_CHECK(ctx->HasOutput("OutScale"), + "Output", + "OutScale", "MovingAverageAbsMaxScale"); if (ctx->HasOutput("OutState")) { @@ -730,7 +887,7 @@ class MovingAverageAbsMaxScaleOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { + const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); } @@ -770,19 +927,23 @@ class StrightThroughEstimatorGradOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { + void InferShape(framework::InferShapeContext *ctx) const override { auto out_grad_name = framework::GradVarName("Out"); auto x_grad_name = framework::GradVarName("X"); - OP_INOUT_CHECK(ctx->HasInput(out_grad_name), "Input", out_grad_name, + OP_INOUT_CHECK(ctx->HasInput(out_grad_name), + "Input", + out_grad_name, "StrightThroughEstimatorGradOp"); - OP_INOUT_CHECK(ctx->HasOutput(x_grad_name), "Output", x_grad_name, + OP_INOUT_CHECK(ctx->HasOutput(x_grad_name), + "Output", + x_grad_name, "StrightThroughEstimatorGradOp"); ctx->SetOutputDim(x_grad_name, ctx->GetInputDim(out_grad_name)); } framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { + const framework::ExecutionContext &ctx) const override { auto input_data_type = OperatorWithKernel::IndicateVarDataType( ctx, framework::GradVarName("Out")); return framework::OpKernelType(input_data_type, ctx.GetPlace()); @@ -810,7 +971,8 @@ namespace ops = paddle::operators; using CPU = paddle::platform::CPUDeviceContext; REGISTER_OPERATOR( - fake_quantize_abs_max, ops::FakeQuantOrWithDequantAbsMaxOp, + fake_quantize_abs_max, + ops::FakeQuantOrWithDequantAbsMaxOp, ops::FakeQuantOrWithDequantAbsMaxOpMaker, paddle::framework::EmptyGradOpMaker, paddle::framework::EmptyGradOpMaker); @@ -818,7 +980,8 @@ REGISTER_OP_CPU_KERNEL(fake_quantize_abs_max, ops::FakeQuantizeAbsMaxKernel); REGISTER_OPERATOR( - fake_quantize_dequantize_abs_max, ops::FakeQuantOrWithDequantAbsMaxOp, + fake_quantize_dequantize_abs_max, + ops::FakeQuantOrWithDequantAbsMaxOp, ops::FakeQuantOrWithDequantAbsMaxOpMaker, ops::StrightThroughEstimatorMaker, ops::StrightThroughEstimatorMaker); @@ -826,7 +989,8 @@ REGISTER_OP_CPU_KERNEL(fake_quantize_dequantize_abs_max, ops::FakeQuantizeDequantizeAbsMaxKernel); REGISTER_OPERATOR( - fake_quantize_range_abs_max, ops::FakeQuantizeRangeAbsMaxOp, + fake_quantize_range_abs_max, + ops::FakeQuantizeRangeAbsMaxOp, ops::FakeQuantizeRangeAbsMaxOpMaker, paddle::framework::EmptyGradOpMaker, paddle::framework::EmptyGradOpMaker); @@ -853,7 +1017,8 @@ REGISTER_OP_CPU_KERNEL( ops::FakeQuantizeDequantizeMovingAverageAbsMaxKernel); REGISTER_OPERATOR( - fake_channel_wise_quantize_abs_max, ops::FakeChannelWiseQuantizeAbsMaxOp, + fake_channel_wise_quantize_abs_max, + ops::FakeChannelWiseQuantizeAbsMaxOp, ops::FakeChannelWiseQuantizeAbsMaxOpMaker, paddle::framework::EmptyGradOpMaker, paddle::framework::EmptyGradOpMaker); @@ -861,7 +1026,8 @@ REGISTER_OP_CPU_KERNEL(fake_channel_wise_quantize_abs_max, ops::FakeChannelWiseQuantizeAbsMaxKernel); REGISTER_OPERATOR( - moving_average_abs_max_scale, ops::MovingAverageAbsMaxScaleOp, + moving_average_abs_max_scale, + ops::MovingAverageAbsMaxScaleOp, ops::MovingAverageAbsMaxScaleOpMaker, ops::StrightThroughEstimatorMaker, ops::StrightThroughEstimatorMaker); diff --git a/paddle/fluid/operators/fake_quantize_op.cu.h b/paddle/fluid/operators/fake_quantize_op.cu.h index 46aa3fbfe3b..3b1877f2bc8 100644 --- a/paddle/fluid/operators/fake_quantize_op.cu.h +++ b/paddle/fluid/operators/fake_quantize_op.cu.h @@ -36,12 +36,12 @@ struct QuantizeDataType { }; template -__global__ void FindAbsMaxKernel(const T* in, const int n, T* out) { +__global__ void FindAbsMaxKernel(const T *in, const int n, T *out) { int bid = threadIdx.x + blockIdx.x * blockDim.x; int tid = threadIdx.x; - extern __shared__ char* shared_max_data_tmp[]; - auto shared_max_data = reinterpret_cast(shared_max_data_tmp); + extern __shared__ char *shared_max_data_tmp[]; + auto shared_max_data = reinterpret_cast(shared_max_data_tmp); if (gridDim.x > 1) { T local_max_data = T(0); for (int i = bid; i < n; i += blockDim.x * gridDim.x) { @@ -73,14 +73,16 @@ __global__ void FindAbsMaxKernel(const T* in, const int n, T* out) { template struct FindAbsMaxFunctor { - void operator()(const platform::CUDADeviceContext& ctx, const T* in, - const int num, T* out) { + void operator()(const platform::CUDADeviceContext &ctx, + const T *in, + const int num, + T *out) { int block = 1024; int grid = (block - 1 + num) / block; grid = (grid > block) ? block : grid; framework::Tensor max; - T* max_data = max.mutable_data(phi::make_ddim({grid}), ctx.GetPlace()); + T *max_data = max.mutable_data(phi::make_ddim({grid}), ctx.GetPlace()); FindAbsMaxKernel <<>>(in, num, max_data); FindAbsMaxKernel @@ -93,13 +95,15 @@ template struct FindAbsMaxFunctor; template -__global__ void FindChannelAbsMaxKernelQuantAxis0(const T* in, const int n, - const int c, T* out) { +__global__ void FindChannelAbsMaxKernelQuantAxis0(const T *in, + const int n, + const int c, + T *out) { int tid = threadIdx.x; int channel_size = n / c; - const T* in_c = in + blockIdx.x * channel_size; - extern __shared__ char* shared_max_data_tmp[]; - auto shared_max_data = reinterpret_cast(shared_max_data_tmp); + const T *in_c = in + blockIdx.x * channel_size; + extern __shared__ char *shared_max_data_tmp[]; + auto shared_max_data = reinterpret_cast(shared_max_data_tmp); T local_max_data = T(0); for (int i = tid; i < channel_size; i += blockDim.x) { T tmp = static_cast( @@ -122,17 +126,16 @@ __global__ void FindChannelAbsMaxKernelQuantAxis0(const T* in, const int n, } template -__global__ void FindChannelAbsMaxKernelQuantAxis1(const T* in, const int n, - const int cin, const int cout, - T* out) { - extern __shared__ char* shared_max_data_tmp[]; - auto shared_max_data = reinterpret_cast(shared_max_data_tmp); +__global__ void FindChannelAbsMaxKernelQuantAxis1( + const T *in, const int n, const int cin, const int cout, T *out) { + extern __shared__ char *shared_max_data_tmp[]; + auto shared_max_data = reinterpret_cast(shared_max_data_tmp); int cout_wh_size = n / cin; int wh_size = n / (cin * cout); int tid = threadIdx.x; int bid = blockIdx.x; - const T* in_current = in + tid * cout_wh_size + bid * wh_size; + const T *in_current = in + tid * cout_wh_size + bid * wh_size; T local_max_data = T(0); for (int i = 0; i < wh_size; i++) { T tmp = static_cast( @@ -162,24 +165,26 @@ __global__ void FindChannelAbsMaxKernelQuantAxis1(const T* in, const int n, template struct FindChannelAbsMaxFunctor { - void operator()(const platform::CUDADeviceContext& ctx, - const framework::Tensor& in_tensor, const int quant_axis, - T* out_abs_max) { + void operator()(const platform::CUDADeviceContext &ctx, + const framework::Tensor &in_tensor, + const int quant_axis, + T *out_abs_max) { PADDLE_ENFORCE_EQ( - quant_axis == 0 || quant_axis == 1, true, + quant_axis == 0 || quant_axis == 1, + true, platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " "the received is %d", quant_axis)); const int num = in_tensor.numel(); auto in_dims = in_tensor.dims(); - const T* in_data = in_tensor.data(); + const T *in_data = in_tensor.data(); if (quant_axis == 0) { int cout = in_dims[0]; int grid = cout; int block = 1024; FindChannelAbsMaxKernelQuantAxis0 - <<>>(in_data, num, cout, - out_abs_max); + <<>>( + in_data, num, cout, out_abs_max); } else if (quant_axis == 1) { int cin = in_dims[0]; int cout = in_dims[1]; @@ -213,9 +218,12 @@ struct FindChannelAbsMaxFunctor { template struct FindChannelAbsMaxFunctor; template -__global__ void ClipAndQuantKernel(const T* in, const T* scale, - const int bin_cnt, const int round_type, - const int n, T* out) { +__global__ void ClipAndQuantKernel(const T *in, + const T *scale, + const int bin_cnt, + const int round_type, + const int n, + T *out) { int bid = threadIdx.x + blockIdx.x * blockDim.x; int tid = threadIdx.x; @@ -227,25 +235,30 @@ __global__ void ClipAndQuantKernel(const T* in, const T* scale, for (int i = bid; i < n; i += blockDim.x * gridDim.x) { ComputeDataType x = static_cast(in[i]); - x = bin_cnt_t * inv_s * x; if (round_type == 0) { + x = bin_cnt_t * inv_s * x; x = roundWithTiesToEven(x); + ComputeDataType max_bound = bin_cnt_t; + ComputeDataType min_bound = -bin_cnt_t - static_cast(1); + x = x > max_bound ? max_bound : x; + x = x < min_bound ? min_bound : x; + out[i] = static_cast(x); } else { - x = round(x); + ComputeDataType v = x > s ? s : x; + v = v < -s ? -s : v; + v = bin_cnt_t * inv_s * v; + out[i] = static_cast(round(v)); } - ComputeDataType max_bound = bin_cnt_t; - ComputeDataType min_bound = -bin_cnt_t - static_cast(1); - x = x > max_bound ? max_bound : x; - x = x < min_bound ? min_bound : x; - out[i] = static_cast(x); } } template -__global__ void ClipAndQuantDequantKernel(const T* in, const T* scale, +__global__ void ClipAndQuantDequantKernel(const T *in, + const T *scale, const int bin_cnt, - const int round_type, const int n, - T* out) { + const int round_type, + const int n, + T *out) { int bid = threadIdx.x + blockIdx.x * blockDim.x; int tid = threadIdx.x; @@ -257,33 +270,39 @@ __global__ void ClipAndQuantDequantKernel(const T* in, const T* scale, for (int i = bid; i < n; i += blockDim.x * gridDim.x) { ComputeDataType x = static_cast(in[i]); - x = bin_cnt_t * inv_s * x; if (round_type == 0) { + x = bin_cnt_t * inv_s * x; x = roundWithTiesToEven(x); + ComputeDataType max_bound = bin_cnt_t; + ComputeDataType min_bound = -bin_cnt_t - static_cast(1); + x = x > max_bound ? max_bound : x; + x = x < min_bound ? min_bound : x; + out[i] = static_cast((x * s) / bin_cnt_t); } else { + x = x > s ? s : x; + x = x < -s ? -s : x; + x = bin_cnt_t * inv_s * x; x = round(x); + out[i] = static_cast((x * s) / bin_cnt_t); } - ComputeDataType max_bound = bin_cnt_t; - ComputeDataType min_bound = -bin_cnt_t - static_cast(1); - x = x > max_bound ? max_bound : x; - x = x < min_bound ? min_bound : x; - out[i] = static_cast((x * s) / bin_cnt_t); } } template struct ClipAndFakeQuantFunctor { - void operator()(const platform::CUDADeviceContext& ctx, - const framework::Tensor& in, const framework::Tensor& scale, - const int bin_cnt, const int round_type, - framework::Tensor* out) { + void operator()(const platform::CUDADeviceContext &ctx, + const framework::Tensor &in, + const framework::Tensor &scale, + const int bin_cnt, + const int round_type, + framework::Tensor *out) { int num = in.numel(); int block = 1024; int grid = (block - 1 + num) / block; - const T* in_data = in.data(); - const T* scale_data = scale.data(); - T* out_data = out->mutable_data(ctx.GetPlace()); + const T *in_data = in.data(); + const T *scale_data = scale.data(); + T *out_data = out->mutable_data(ctx.GetPlace()); ClipAndQuantKernel<<>>( in_data, scale_data, bin_cnt, round_type, num, out_data); @@ -294,17 +313,19 @@ template struct ClipAndFakeQuantFunctor; template struct ClipAndFakeQuantDequantFunctor { - void operator()(const platform::CUDADeviceContext& ctx, - const framework::Tensor& in, const framework::Tensor& scale, - const int bin_cnt, const int round_type, - framework::Tensor* out) { + void operator()(const platform::CUDADeviceContext &ctx, + const framework::Tensor &in, + const framework::Tensor &scale, + const int bin_cnt, + const int round_type, + framework::Tensor *out) { int num = in.numel(); int block = 1024; int grid = (block - 1 + num) / block; - const T* in_data = in.data(); - const T* scale_data = scale.data(); - T* out_data = out->mutable_data(ctx.GetPlace()); + const T *in_data = in.data(); + const T *scale_data = scale.data(); + T *out_data = out->mutable_data(ctx.GetPlace()); ClipAndQuantDequantKernel<<>>( in_data, scale_data, bin_cnt, round_type, num, out_data); @@ -313,16 +334,18 @@ struct ClipAndFakeQuantDequantFunctor { // ChannelClipAndQuantKernel for quant_axis is 0 template -__global__ void ChannelClipAndQuantKernelQuantAxis0(const T* in, const T* scale, +__global__ void ChannelClipAndQuantKernelQuantAxis0(const T *in, + const T *scale, const int bin_cnt, const int round_type, const int64_t n, - const int c, T* out) { + const int c, + T *out) { int tid = threadIdx.x; int64_t channel_size = n / c; - const T* in_c = in + blockIdx.x * channel_size; - T* out_c = out + blockIdx.x * channel_size; + const T *in_c = in + blockIdx.x * channel_size; + T *out_c = out + blockIdx.x * channel_size; using ComputeDataType = typename QuantizeDataType::type; @@ -332,25 +355,33 @@ __global__ void ChannelClipAndQuantKernelQuantAxis0(const T* in, const T* scale, for (int64_t i = tid; i < channel_size; i += blockDim.x) { ComputeDataType x = static_cast(in_c[i]); - x = bin_cnt_t * inv_s * x; if (round_type == 0) { + x = bin_cnt_t * inv_s * x; x = roundWithTiesToEven(x); + ComputeDataType max_bound = bin_cnt_t; + ComputeDataType min_bound = -bin_cnt_t - static_cast(1); + x = x > max_bound ? max_bound : x; + x = x < min_bound ? min_bound : x; + out_c[i] = static_cast(x); } else { - x = round(x); + ComputeDataType v = x > s ? s : x; + v = v < -s ? -s : v; + v = bin_cnt_t * inv_s * v; + out_c[i] = static_cast(round(v)); } - ComputeDataType max_bound = bin_cnt_t; - ComputeDataType min_bound = -bin_cnt_t - static_cast(1); - x = x > max_bound ? max_bound : x; - x = x < min_bound ? min_bound : x; - out_c[i] = static_cast(x); } } // ChannelClipAndQuantKernel for quant_axis is N template -__global__ void ChannelClipAndQuantKernelQuantAxisN( - const T* in, const T* scale, const int bin_cnt, const int round_type, - const int64_t n, const int nScale, const int quant_stride, T* out) { +__global__ void ChannelClipAndQuantKernelQuantAxisN(const T *in, + const T *scale, + const int bin_cnt, + const int round_type, + const int64_t n, + const int nScale, + const int quant_stride, + T *out) { int64_t idx = blockDim.x * blockIdx.x + threadIdx.x; using ComputeDataType = typename QuantizeDataType::type; ComputeDataType bin_cnt_t = static_cast(bin_cnt); @@ -359,37 +390,44 @@ __global__ void ChannelClipAndQuantKernelQuantAxisN( static_cast(scale[(i / quant_stride) % nScale]); ComputeDataType inv_s = inverse(s); ComputeDataType x = static_cast(in[i]); - x = bin_cnt_t * inv_s * x; if (round_type == 0) { + x = bin_cnt_t * inv_s * x; x = roundWithTiesToEven(x); + ComputeDataType max_bound = bin_cnt_t; + ComputeDataType min_bound = -bin_cnt_t - static_cast(1); + x = x > max_bound ? max_bound : x; + x = x < min_bound ? min_bound : x; + out[i] = static_cast(x); } else { - x = round(x); + ComputeDataType v = x > s ? s : x; + v = v < -s ? -s : v; + v = bin_cnt_t * inv_s * v; + out[i] = static_cast(round(v)); } - ComputeDataType max_bound = bin_cnt_t; - ComputeDataType min_bound = -bin_cnt_t - static_cast(1); - x = x > max_bound ? max_bound : x; - x = x < min_bound ? min_bound : x; - out[i] = static_cast(x); } } template struct ChannelClipAndFakeQuantFunctor { - void operator()(const platform::CUDADeviceContext& ctx, - const framework::Tensor& in, const framework::Tensor& scale, - const int bin_cnt, const int round_type, const int quant_axis, - framework::Tensor* out) { + void operator()(const platform::CUDADeviceContext &ctx, + const framework::Tensor &in, + const framework::Tensor &scale, + const int bin_cnt, + const int round_type, + const int quant_axis, + framework::Tensor *out) { PADDLE_ENFORCE_EQ( - quant_axis == 0 || quant_axis == 1, true, + quant_axis == 0 || quant_axis == 1, + true, platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " "the received is %d", quant_axis)); int64_t num = in.numel(); auto in_dims = in.dims(); - const T* in_data = in.data(); - const T* scale_data = scale.data(); - T* out_data = out->mutable_data(ctx.GetPlace()); + const T *in_data = in.data(); + const T *scale_data = scale.data(); + T *out_data = out->mutable_data(ctx.GetPlace()); if (quant_axis == 0) { int grid = in_dims[0]; @@ -411,9 +449,15 @@ struct ChannelClipAndFakeQuantFunctor { const int64_t grid_size = std::min(max_blocks, (num + block_size - 1) / block_size); - ChannelClipAndQuantKernelQuantAxisN<<>>( - in_data, scale_data, bin_cnt, round_type, num, in_dims[quant_axis], - quant_stride, out_data); + ChannelClipAndQuantKernelQuantAxisN + <<>>(in_data, + scale_data, + bin_cnt, + round_type, + num, + in_dims[quant_axis], + quant_stride, + out_data); } } }; @@ -422,12 +466,14 @@ template struct ChannelClipAndFakeQuantFunctor; template -__global__ void FindRangeAbsMaxAndFillArray(const T* cur_scale, - const T* last_scale, - const int64_t* iter, - const int window_size, T* scale_arr, - T* out_scale, int* need_find_max, - int* out_size) { +__global__ void FindRangeAbsMaxAndFillArray(const T *cur_scale, + const T *last_scale, + const int64_t *iter, + const int window_size, + T *scale_arr, + T *out_scale, + int *need_find_max, + int *out_size) { int it = iter[0]; int idx = it % window_size; T removed = scale_arr[idx]; @@ -446,45 +492,63 @@ __global__ void FindRangeAbsMaxAndFillArray(const T* cur_scale, template struct FindRangeAbsMaxFunctor { - void operator()(const platform::CUDADeviceContext& ctx, - const framework::Tensor& cur_scale, - const framework::Tensor& last_scale, - const framework::Tensor& iter, const int window_size, - framework::Tensor* scales_arr, framework::Tensor* out_scale) { + void operator()(const platform::CUDADeviceContext &ctx, + const framework::Tensor &cur_scale, + const framework::Tensor &last_scale, + const framework::Tensor &iter, + const int window_size, + framework::Tensor *scales_arr, + framework::Tensor *out_scale) { const auto gpu_place = ctx.GetPlace(); - T* scale_arr = scales_arr->mutable_data(gpu_place); - T* out_scale_data = out_scale->mutable_data(gpu_place); + T *scale_arr = scales_arr->mutable_data(gpu_place); + T *out_scale_data = out_scale->mutable_data(gpu_place); framework::Tensor need_find_max, out_size; - int* find_max = need_find_max.mutable_data({1}, gpu_place); - int* out_size_data = out_size.mutable_data({1}, gpu_place); - - FindRangeAbsMaxAndFillArray<<<1, 1, 0, ctx.stream()>>>( - cur_scale.data(), last_scale.data(), iter.data(), - window_size, scale_arr, out_scale_data, find_max, out_size_data); + int *find_max = need_find_max.mutable_data({1}, gpu_place); + int *out_size_data = out_size.mutable_data({1}, gpu_place); + + FindRangeAbsMaxAndFillArray + <<<1, 1, 0, ctx.stream()>>>(cur_scale.data(), + last_scale.data(), + iter.data(), + window_size, + scale_arr, + out_scale_data, + find_max, + out_size_data); int g_find_max; - memory::Copy(platform::CPUPlace(), &g_find_max, gpu_place, find_max, - sizeof(int), ctx.stream()); + memory::Copy(platform::CPUPlace(), + &g_find_max, + gpu_place, + find_max, + sizeof(int), + ctx.stream()); ctx.Wait(); if (g_find_max) { int len; - memory::Copy(platform::CPUPlace(), &len, gpu_place, out_size_data, - sizeof(int), ctx.stream()); + memory::Copy(platform::CPUPlace(), + &len, + gpu_place, + out_size_data, + sizeof(int), + ctx.stream()); ctx.Wait(); - FindAbsMaxFunctor()(ctx, scale_arr, len, - out_scale_data); + FindAbsMaxFunctor()( + ctx, scale_arr, len, out_scale_data); } } }; template -__global__ void FindMovingAverageAbsMaxKernel(const T* in_state, - const T* in_accum, - const T* cur_scale, const T rate, - T* out_state, T* out_accum, - T* out_scale) { +__global__ void FindMovingAverageAbsMaxKernel(const T *in_state, + const T *in_accum, + const T *cur_scale, + const T rate, + T *out_state, + T *out_accum, + T *out_scale) { T state = rate * (*in_state) + T(1.0f); T accum = rate * (*in_accum) + (*cur_scale); *out_state = state; @@ -496,92 +560,119 @@ template struct FindRangeAbsMaxFunctor; template struct FindMovingAverageAbsMaxFunctor { - void operator()(const platform::CUDADeviceContext& ctx, - const framework::Tensor& in_accum, - const framework::Tensor& in_state, const T* cur_scale, - const float rate, framework::Tensor* out_state, - framework::Tensor* out_accum, framework::Tensor* out_scale) { + void operator()(const platform::CUDADeviceContext &ctx, + const framework::Tensor &in_accum, + const framework::Tensor &in_state, + const T *cur_scale, + const float rate, + framework::Tensor *out_state, + framework::Tensor *out_accum, + framework::Tensor *out_scale) { const auto gpu_place = ctx.GetPlace(); T rate_t = static_cast(rate); - T* out_state_data = out_state->mutable_data(gpu_place); - T* out_accum_data = out_accum->mutable_data(gpu_place); - T* out_scale_data = out_scale->mutable_data(gpu_place); - - FindMovingAverageAbsMaxKernel<<<1, 1, 0, ctx.stream()>>>( - in_state.data(), in_accum.data(), cur_scale, rate_t, - out_state_data, out_accum_data, out_scale_data); + T *out_state_data = out_state->mutable_data(gpu_place); + T *out_accum_data = out_accum->mutable_data(gpu_place); + T *out_scale_data = out_scale->mutable_data(gpu_place); + + FindMovingAverageAbsMaxKernel + <<<1, 1, 0, ctx.stream()>>>(in_state.data(), + in_accum.data(), + cur_scale, + rate_t, + out_state_data, + out_accum_data, + out_scale_data); } }; // ChannelClipAndQuantDequantKernel for quant_axis is 0 template -__global__ void ChannelClipAndQuantDequantKernelQuantAxis0( - const T* in, const T* scale, const int bin_cnt, const int round_type, - const int n, const int c, T* out) { +__global__ void ChannelClipAndQuantDequantKernelQuantAxis0(const T *in, + const T *scale, + const int bin_cnt, + const int round_type, + const int n, + const int c, + T *out) { int tid = threadIdx.x; int channel_size = n / c; - const T* in_c = in + blockIdx.x * channel_size; - T* out_c = out + blockIdx.x * channel_size; + const T *in_c = in + blockIdx.x * channel_size; + T *out_c = out + blockIdx.x * channel_size; T s = scale[blockIdx.x]; T inv_s = inverse(s); for (int i = tid; i < channel_size; i += blockDim.x) { T x = in_c[i]; - x = bin_cnt * inv_s * x; if (round_type == 0) { + x = bin_cnt * inv_s * x; x = roundWithTiesToEven(x); + T max_bound = bin_cnt; + T min_bound = -bin_cnt - static_cast(1); + x = x > max_bound ? max_bound : x; + x = x < min_bound ? min_bound : x; + out_c[i] = (x * s) / bin_cnt; } else { - x = round(x); + T v = x > s ? s : x; + v = v < -s ? -s : v; + v = bin_cnt * inv_s * v; + out_c[i] = round(v) * s / bin_cnt; } - T max_bound = bin_cnt; - T min_bound = -bin_cnt - static_cast(1); - x = x > max_bound ? max_bound : x; - x = x < min_bound ? min_bound : x; - out_c[i] = (x * s) / bin_cnt; } } // ChannelClipAndQuantDequantKernel for quant_axis is 1 template -__global__ void ChannelClipAndQuantDequantKernelQuantAxis1( - const T* in, const T* scale, const int bin_cnt, const int round_type, - const int n, const int cin, const int cout, T* out) { +__global__ void ChannelClipAndQuantDequantKernelQuantAxis1(const T *in, + const T *scale, + const int bin_cnt, + const int round_type, + const int n, + const int cin, + const int cout, + T *out) { T s = scale[blockIdx.x % cout]; T inv_s = inverse(s); int wh_size = n / (cin * cout); - const T* in_c = in + blockIdx.x * wh_size; - T* out_c = out + blockIdx.x * wh_size; + const T *in_c = in + blockIdx.x * wh_size; + T *out_c = out + blockIdx.x * wh_size; for (int i = threadIdx.x; i < wh_size; i += blockDim.x) { T x = in_c[i]; - x = bin_cnt * inv_s * x; if (round_type == 0) { + x = bin_cnt * inv_s * x; x = roundWithTiesToEven(x); + T max_bound = bin_cnt; + T min_bound = -bin_cnt - static_cast(1); + x = x > max_bound ? max_bound : x; + x = x < min_bound ? min_bound : x; + out_c[i] = (x * s) / bin_cnt; } else { - x = round(x); + T v = x > s ? s : x; + v = v < -s ? -s : v; + v = bin_cnt * inv_s * v; + out_c[i] = round(v) * s / bin_cnt; } - T max_bound = bin_cnt; - T min_bound = -bin_cnt - static_cast(1); - x = x > max_bound ? max_bound : x; - x = x < min_bound ? min_bound : x; - out_c[i] = (x * s) / bin_cnt; } } template struct ChannelClipFakeQuantDequantFunctor { - void operator()(const platform::CUDADeviceContext& ctx, - const framework::Tensor& in, const framework::Tensor& scale, - const int bin_cnt, const int round_type, const int quant_axis, - framework::Tensor* out) { + void operator()(const platform::CUDADeviceContext &ctx, + const framework::Tensor &in, + const framework::Tensor &scale, + const int bin_cnt, + const int round_type, + const int quant_axis, + framework::Tensor *out) { // At present, channelwise quantization supports conv2d, depthwise_conv2d // conv2d_transpose and mul PADDLE_ENFORCE_EQ( - quant_axis == 0 || quant_axis == 1, true, + quant_axis == 0 || quant_axis == 1, + true, platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " "the received is %d", quant_axis)); @@ -589,25 +680,34 @@ struct ChannelClipFakeQuantDequantFunctor { int num = in.numel(); auto in_dims = in.dims(); - const T* in_data = in.data(); - const T* scale_data = scale.data(); - T* out_data = out->mutable_data(ctx.GetPlace()); + const T *in_data = in.data(); + const T *scale_data = scale.data(); + T *out_data = out->mutable_data(ctx.GetPlace()); if (quant_axis == 0) { int grid = in_dims[0]; int block = 1024; ChannelClipAndQuantDequantKernelQuantAxis0 - <<>>(in_data, scale_data, bin_cnt, - round_type, num, in_dims[0], + <<>>(in_data, + scale_data, + bin_cnt, + round_type, + num, + in_dims[0], out_data); } else if (quant_axis == 1) { int grid = in_dims[0] * in_dims[1]; int block = 1024; ChannelClipAndQuantDequantKernelQuantAxis1 - <<>>(in_data, scale_data, bin_cnt, - round_type, num, in_dims[0], - in_dims[1], out_data); + <<>>(in_data, + scale_data, + bin_cnt, + round_type, + num, + in_dims[0], + in_dims[1], + out_data); } } }; diff --git a/paddle/fluid/operators/fake_quantize_op.h b/paddle/fluid/operators/fake_quantize_op.h index 2956478f44a..6931ac4325b 100644 --- a/paddle/fluid/operators/fake_quantize_op.h +++ b/paddle/fluid/operators/fake_quantize_op.h @@ -51,16 +51,11 @@ inline HOSTDEVICE T roundWithTiesToEven(T x) { template class QuantTensorFunctor { public: - explicit QuantTensorFunctor(const T bin_cnt, const int round_type, - const T inv_s) - : bin_cnt_(bin_cnt), round_type_(round_type), inv_s_(inv_s) {} + explicit QuantTensorFunctor(const T bin_cnt, const T inv_s) + : bin_cnt_(bin_cnt), inv_s_(inv_s) {} HOSTDEVICE T operator()(const T x) const { T out = bin_cnt_ * inv_s_ * x; - if (round_type_ == 0) { - out = roundWithTiesToEven(out); - } else if (round_type_ == 1) { - out = std::round(out); - } + out = roundWithTiesToEven(out); T max_bound = bin_cnt_; T min_bound = -bin_cnt_ - static_cast(1); out = out > max_bound ? max_bound : out; @@ -70,82 +65,101 @@ class QuantTensorFunctor { private: T bin_cnt_; - int round_type_; T inv_s_; }; template struct FindAbsMaxFunctor { - void operator()(const DeviceContext& ctx, const T* in, const int num, T* out); + void operator()(const DeviceContext &ctx, const T *in, const int num, T *out); }; template struct ClipAndFakeQuantFunctor { - void operator()(const DeviceContext& ctx, const framework::Tensor& in, - const framework::Tensor& scale, const int bin_cnt, - const int round_type, framework::Tensor* out); + void operator()(const DeviceContext &ctx, + const framework::Tensor &in, + const framework::Tensor &scale, + const int bin_cnt, + const int round_type, + framework::Tensor *out); }; template struct ClipAndFakeQuantDequantFunctor { - void operator()(const DeviceContext& ctx, const framework::Tensor& in, - const framework::Tensor& scale, const int bin_cnt, - int round_type, framework::Tensor* out); + void operator()(const DeviceContext &ctx, + const framework::Tensor &in, + const framework::Tensor &scale, + const int bin_cnt, + int round_type, + framework::Tensor *out); }; template struct FindRangeAbsMaxFunctor { - void operator()(const DeviceContext& ctx, const framework::Tensor& cur_scale, - const framework::Tensor& last_scale, - const framework::Tensor& iter, const int window_size, - framework::Tensor* scales_arr, framework::Tensor* out_scale); + void operator()(const DeviceContext &ctx, + const framework::Tensor &cur_scale, + const framework::Tensor &last_scale, + const framework::Tensor &iter, + const int window_size, + framework::Tensor *scales_arr, + framework::Tensor *out_scale); }; template struct FindChannelAbsMaxFunctor { - void operator()(const DeviceContext& ctx, const framework::Tensor& in_tensor, - const int quant_axis, T* out_abs_max); + void operator()(const DeviceContext &ctx, + const framework::Tensor &in_tensor, + const int quant_axis, + T *out_abs_max); }; template struct ChannelClipAndFakeQuantFunctor { - void operator()(const DeviceContext& ctx, const framework::Tensor& in, - const framework::Tensor& scale, const int bin_cnt, - const int round_type, const int quant_axis, - framework::Tensor* out); + void operator()(const DeviceContext &ctx, + const framework::Tensor &in, + const framework::Tensor &scale, + const int bin_cnt, + const int round_type, + const int quant_axis, + framework::Tensor *out); }; template struct ChannelClipFakeQuantDequantFunctor { - void operator()(const DeviceContext& ctx, const framework::Tensor& in, - const framework::Tensor& scale, const int bin_cnt, - int round_type, const int quant_axis, framework::Tensor* out); + void operator()(const DeviceContext &ctx, + const framework::Tensor &in, + const framework::Tensor &scale, + const int bin_cnt, + int round_type, + const int quant_axis, + framework::Tensor *out); }; template struct FindMovingAverageAbsMaxFunctor { - void operator()(const DeviceContext& ctx, const framework::Tensor& in_accum, - const framework::Tensor& in_state, - const framework::Tensor& cur_scale, - framework::Tensor* out_state, framework::Tensor* out_accum, - framework::Tensor* out_scale); + void operator()(const DeviceContext &ctx, + const framework::Tensor &in_accum, + const framework::Tensor &in_state, + const framework::Tensor &cur_scale, + framework::Tensor *out_state, + framework::Tensor *out_accum, + framework::Tensor *out_scale); }; template class FakeAbsMaxKernelBase : public framework::OpKernel { public: - void Compute(const framework::ExecutionContext& context) const override { - auto* in = context.Input("X"); - auto* out = context.Output("Out"); - auto* out_scale = context.Output("OutScale"); - T* out_s = out_scale->mutable_data(context.GetPlace()); + void Compute(const framework::ExecutionContext &context) const override { + auto *in = context.Input("X"); + auto *out = context.Output("Out"); + auto *out_scale = context.Output("OutScale"); + T *out_s = out_scale->mutable_data(context.GetPlace()); int bit_length = context.Attr("bit_length"); int round_type = context.Attr("round_type"); int bin_cnt = std::pow(2, bit_length - 1) - 1; - auto& dev_ctx = context.template device_context(); - const T* in_data = in->data(); + auto &dev_ctx = context.template device_context(); + const T *in_data = in->data(); FindAbsMaxFunctor()(dev_ctx, in_data, in->numel(), out_s); RunClipFunctor(dev_ctx, *in, *out_scale, bin_cnt, round_type, out); } @@ -153,20 +167,25 @@ class FakeAbsMaxKernelBase : public framework::OpKernel { virtual ~FakeAbsMaxKernelBase() = default; protected: - virtual void RunClipFunctor(const DeviceContext& dev_ctx, - const framework::Tensor& in, - const framework::Tensor& scale, int bin_cnt, - int round_type, framework::Tensor* out) const = 0; + virtual void RunClipFunctor(const DeviceContext &dev_ctx, + const framework::Tensor &in, + const framework::Tensor &scale, + int bin_cnt, + int round_type, + framework::Tensor *out) const = 0; }; template class FakeQuantizeAbsMaxKernel : public FakeAbsMaxKernelBase { protected: - void RunClipFunctor(const DeviceContext& dev_ctx, const framework::Tensor& in, - const framework::Tensor& scale, int bin_cnt, - int round_type, framework::Tensor* out) const override { - ClipAndFakeQuantFunctor()(dev_ctx, in, scale, bin_cnt, - round_type, out); + void RunClipFunctor(const DeviceContext &dev_ctx, + const framework::Tensor &in, + const framework::Tensor &scale, + int bin_cnt, + int round_type, + framework::Tensor *out) const override { + ClipAndFakeQuantFunctor()( + dev_ctx, in, scale, bin_cnt, round_type, out); } }; @@ -174,9 +193,12 @@ template class FakeQuantizeDequantizeAbsMaxKernel : public FakeAbsMaxKernelBase { protected: - void RunClipFunctor(const DeviceContext& dev_ctx, const framework::Tensor& in, - const framework::Tensor& scale, int bin_cnt, - int round_type, framework::Tensor* out) const override { + void RunClipFunctor(const DeviceContext &dev_ctx, + const framework::Tensor &in, + const framework::Tensor &scale, + int bin_cnt, + int round_type, + framework::Tensor *out) const override { ClipAndFakeQuantDequantFunctor()( dev_ctx, in, scale, bin_cnt, round_type, out); } @@ -185,11 +207,11 @@ class FakeQuantizeDequantizeAbsMaxKernel template class FakeChannelWiseQuantizeAbsMaxKernel : public framework::OpKernel { public: - void Compute(const framework::ExecutionContext& context) const override { - auto* in = context.Input("X"); + void Compute(const framework::ExecutionContext &context) const override { + auto *in = context.Input("X"); - auto* out = context.Output("Out"); - auto* out_scale = context.Output("OutScale"); + auto *out = context.Output("Out"); + auto *out_scale = context.Output("OutScale"); out->mutable_data(context.GetPlace()); int bit_length = context.Attr("bit_length"); @@ -198,11 +220,11 @@ class FakeChannelWiseQuantizeAbsMaxKernel : public framework::OpKernel { int quant_axis = context.Attr("quant_axis"); bool is_test = context.Attr("is_test"); - auto& dev_ctx = context.template device_context(); + auto &dev_ctx = context.template device_context(); if (!is_test) { - T* out_scale_data = out_scale->mutable_data(context.GetPlace()); - FindChannelAbsMaxFunctor()(dev_ctx, *in, quant_axis, - out_scale_data); + T *out_scale_data = out_scale->mutable_data(context.GetPlace()); + FindChannelAbsMaxFunctor()( + dev_ctx, *in, quant_axis, out_scale_data); } ChannelClipAndFakeQuantFunctor()( dev_ctx, *in, *out_scale, bin_cnt, round_type, quant_axis, out); @@ -213,12 +235,12 @@ template class FakeChannelWiseQuantizeDequantizeAbsMaxKernel : public framework::OpKernel { public: - void Compute(const framework::ExecutionContext& context) const override { - auto* in = context.Input("X"); - auto* out = context.Output("Out"); - auto* out_scale = context.Output("OutScale"); - T* out_scale_data = out_scale->mutable_data(context.GetPlace()); - auto& dev_ctx = context.template device_context(); + void Compute(const framework::ExecutionContext &context) const override { + auto *in = context.Input("X"); + auto *out = context.Output("Out"); + auto *out_scale = context.Output("OutScale"); + T *out_scale_data = out_scale->mutable_data(context.GetPlace()); + auto &dev_ctx = context.template device_context(); out->mutable_data(dev_ctx.GetPlace()); int bit_length = context.Attr("bit_length"); @@ -226,8 +248,8 @@ class FakeChannelWiseQuantizeDequantizeAbsMaxKernel int bin_cnt = std::pow(2, bit_length - 1) - 1; int quant_axis = context.Attr("quant_axis"); - FindChannelAbsMaxFunctor()(dev_ctx, *in, quant_axis, - out_scale_data); + FindChannelAbsMaxFunctor()( + dev_ctx, *in, quant_axis, out_scale_data); ChannelClipFakeQuantDequantFunctor()( dev_ctx, *in, *out_scale, bin_cnt, round_type, quant_axis, out); @@ -237,60 +259,64 @@ class FakeChannelWiseQuantizeDequantizeAbsMaxKernel template class FakeQuantizeRangeAbsMaxKernel : public framework::OpKernel { public: - void Compute(const framework::ExecutionContext& context) const override { - auto* in = context.Input("X"); - auto* in_scale = context.Input("InScale"); + void Compute(const framework::ExecutionContext &context) const override { + auto *in = context.Input("X"); + auto *in_scale = context.Input("InScale"); - auto* out = context.Output("Out"); + auto *out = context.Output("Out"); out->mutable_data(context.GetPlace()); bool is_test = context.Attr("is_test"); int bit_length = context.Attr("bit_length"); int round_type = context.Attr("round_type"); int bin_cnt = std::pow(2, bit_length - 1) - 1; - auto& dev_ctx = context.template device_context(); + auto &dev_ctx = context.template device_context(); // testing if (is_test) { - ClipAndFakeQuantFunctor()(dev_ctx, *in, *in_scale, - bin_cnt, round_type, out); + ClipAndFakeQuantFunctor()( + dev_ctx, *in, *in_scale, bin_cnt, round_type, out); return; } // training - auto* out_scale = context.Output("OutScale"); - auto* out_scales = context.Output("OutScales"); - auto* iter = context.Input("Iter"); + auto *out_scale = context.Output("OutScale"); + auto *out_scales = context.Output("OutScales"); + auto *iter = context.Input("Iter"); int window_size = context.Attr("window_size"); out_scale->mutable_data(context.GetPlace()); framework::Tensor cur_scale; - T* cur_scale_data = cur_scale.mutable_data({1}, context.GetPlace()); - FindAbsMaxFunctor()(dev_ctx, in->data(), in->numel(), - cur_scale_data); - FindRangeAbsMaxFunctor()(dev_ctx, cur_scale, *in_scale, - *iter, window_size, out_scales, + T *cur_scale_data = cur_scale.mutable_data({1}, context.GetPlace()); + FindAbsMaxFunctor()( + dev_ctx, in->data(), in->numel(), cur_scale_data); + FindRangeAbsMaxFunctor()(dev_ctx, + cur_scale, + *in_scale, + *iter, + window_size, + out_scales, out_scale); - ClipAndFakeQuantFunctor()(dev_ctx, *in, *out_scale, - bin_cnt, round_type, out); + ClipAndFakeQuantFunctor()( + dev_ctx, *in, *out_scale, bin_cnt, round_type, out); } }; template class FakeMovingAverageAbsMaxKernelBase : public framework::OpKernel { public: - void Compute(const framework::ExecutionContext& context) const override { - auto* in = context.Input("X"); - auto* in_scale = context.Input("InScale"); - auto* out = context.Output("Out"); + void Compute(const framework::ExecutionContext &context) const override { + auto *in = context.Input("X"); + auto *in_scale = context.Input("InScale"); + auto *out = context.Output("Out"); out->mutable_data(context.GetPlace()); bool is_test = context.Attr("is_test"); int bit_length = context.Attr("bit_length"); int round_type = context.Attr("round_type"); int bin_cnt = std::pow(2, bit_length - 1) - 1; - auto& dev_ctx = context.template device_context(); + auto &dev_ctx = context.template device_context(); // testing if (is_test) { @@ -299,25 +325,30 @@ class FakeMovingAverageAbsMaxKernelBase : public framework::OpKernel { } // training - auto* in_accum = context.Input("InAccum"); - auto* in_state = context.Input("InState"); + auto *in_accum = context.Input("InAccum"); + auto *in_state = context.Input("InState"); auto cur_scale = memory::Alloc(dev_ctx, sizeof(T)); - T* cur_scale_data = static_cast(cur_scale->ptr()); + T *cur_scale_data = static_cast(cur_scale->ptr()); - FindAbsMaxFunctor()(dev_ctx, in->data(), in->numel(), - cur_scale_data); + FindAbsMaxFunctor()( + dev_ctx, in->data(), in->numel(), cur_scale_data); - auto* out_state = context.Output("OutState"); - auto* out_accum = context.Output("OutAccum"); - auto* out_scale = context.Output("OutScale"); + auto *out_state = context.Output("OutState"); + auto *out_accum = context.Output("OutAccum"); + auto *out_scale = context.Output("OutScale"); out_state->mutable_data(context.GetPlace()); out_accum->mutable_data(context.GetPlace()); out_scale->mutable_data(context.GetPlace()); float moving_rate = context.Attr("moving_rate"); - FindMovingAverageAbsMaxFunctor()( - dev_ctx, *in_accum, *in_state, cur_scale_data, moving_rate, out_state, - out_accum, out_scale); + FindMovingAverageAbsMaxFunctor()(dev_ctx, + *in_accum, + *in_state, + cur_scale_data, + moving_rate, + out_state, + out_accum, + out_scale); RunClipFunctor(dev_ctx, *in, *out_scale, bin_cnt, round_type, out); } @@ -325,21 +356,26 @@ class FakeMovingAverageAbsMaxKernelBase : public framework::OpKernel { virtual ~FakeMovingAverageAbsMaxKernelBase() = default; protected: - virtual void RunClipFunctor(const DeviceContext& dev_ctx, - const framework::Tensor& in, - const framework::Tensor& in_scale, int bin_cnt, - int round_type, framework::Tensor* out) const = 0; + virtual void RunClipFunctor(const DeviceContext &dev_ctx, + const framework::Tensor &in, + const framework::Tensor &in_scale, + int bin_cnt, + int round_type, + framework::Tensor *out) const = 0; }; template class FakeQuantizeMovingAverageAbsMaxKernel : public FakeMovingAverageAbsMaxKernelBase { protected: - void RunClipFunctor(const DeviceContext& dev_ctx, const framework::Tensor& in, - const framework::Tensor& in_scale, int bin_cnt, - int round_type, framework::Tensor* out) const override { - ClipAndFakeQuantFunctor()(dev_ctx, in, in_scale, bin_cnt, - round_type, out); + void RunClipFunctor(const DeviceContext &dev_ctx, + const framework::Tensor &in, + const framework::Tensor &in_scale, + int bin_cnt, + int round_type, + framework::Tensor *out) const override { + ClipAndFakeQuantFunctor()( + dev_ctx, in, in_scale, bin_cnt, round_type, out); } }; @@ -347,9 +383,12 @@ template class FakeQuantizeDequantizeMovingAverageAbsMaxKernel : public FakeMovingAverageAbsMaxKernelBase { protected: - void RunClipFunctor(const DeviceContext& dev_ctx, const framework::Tensor& in, - const framework::Tensor& in_scale, int bin_cnt, - int round_type, framework::Tensor* out) const override { + void RunClipFunctor(const DeviceContext &dev_ctx, + const framework::Tensor &in, + const framework::Tensor &in_scale, + int bin_cnt, + int round_type, + framework::Tensor *out) const override { ClipAndFakeQuantDequantFunctor()( dev_ctx, in, in_scale, bin_cnt, round_type, out); } @@ -358,12 +397,12 @@ class FakeQuantizeDequantizeMovingAverageAbsMaxKernel template class MovingAverageAbsMaxScaleKernel : public framework::OpKernel { public: - void Compute(const framework::ExecutionContext& context) const override { - auto* in = context.Input("X"); - auto& dev_ctx = context.template device_context(); + void Compute(const framework::ExecutionContext &context) const override { + auto *in = context.Input("X"); + auto &dev_ctx = context.template device_context(); if (context.HasOutput("Out")) { - auto* out = context.Output("Out"); + auto *out = context.Output("Out"); out->mutable_data(context.GetPlace()); framework::TensorCopy(*in, context.GetPlace(), dev_ctx, out); } @@ -375,40 +414,46 @@ class MovingAverageAbsMaxScaleKernel : public framework::OpKernel { } // training - auto* in_accum = context.Input("InAccum"); - auto* in_state = context.Input("InState"); + auto *in_accum = context.Input("InAccum"); + auto *in_state = context.Input("InState"); auto cur_scale = memory::Alloc(dev_ctx, sizeof(T)); - T* cur_scale_data = static_cast(cur_scale->ptr()); + T *cur_scale_data = static_cast(cur_scale->ptr()); - FindAbsMaxFunctor()(dev_ctx, in->data(), in->numel(), - cur_scale_data); + FindAbsMaxFunctor()( + dev_ctx, in->data(), in->numel(), cur_scale_data); - auto* out_state = context.Output("OutState"); - auto* out_accum = context.Output("OutAccum"); - auto* out_scale = context.Output("OutScale"); + auto *out_state = context.Output("OutState"); + auto *out_accum = context.Output("OutAccum"); + auto *out_scale = context.Output("OutScale"); out_state->mutable_data(context.GetPlace()); out_accum->mutable_data(context.GetPlace()); out_scale->mutable_data(context.GetPlace()); float moving_rate = context.Attr("moving_rate"); - FindMovingAverageAbsMaxFunctor()( - dev_ctx, *in_accum, *in_state, cur_scale_data, moving_rate, out_state, - out_accum, out_scale); + FindMovingAverageAbsMaxFunctor()(dev_ctx, + *in_accum, + *in_state, + cur_scale_data, + moving_rate, + out_state, + out_accum, + out_scale); } }; template class StrightThroughEstimatorGradKernel : public framework::OpKernel { public: - void Compute(const framework::ExecutionContext& context) const override { - auto* d_out = + void Compute(const framework::ExecutionContext &context) const override { + auto *d_out = context.Input(framework::GradVarName("Out")); auto x_grad_name = framework::GradVarName("X"); - auto* d_x = context.Output(x_grad_name); - PADDLE_ENFORCE_NOT_NULL(d_x, platform::errors::PreconditionNotMet( - "StrightThroughEstimatorGradKernel " - "doesn't have the output named %s.", - x_grad_name)); + auto *d_x = context.Output(x_grad_name); + PADDLE_ENFORCE_NOT_NULL(d_x, + platform::errors::PreconditionNotMet( + "StrightThroughEstimatorGradKernel " + "doesn't have the output named %s.", + x_grad_name)); // Initialize dx as same as d_out d_x->mutable_data(context.GetPlace()); diff --git a/paddle/fluid/operators/quantize_linear_op.cc b/paddle/fluid/operators/quantize_linear_op.cc index 7aaebb8f929..4580acbe3fc 100644 --- a/paddle/fluid/operators/quantize_linear_op.cc +++ b/paddle/fluid/operators/quantize_linear_op.cc @@ -26,14 +26,17 @@ namespace operators { template struct ChannelDequantizeFunctorV2 { - void operator()(const platform::CPUDeviceContext& dev_ctx, - const framework::Tensor* in, const framework::Tensor* scale, - T max_range, const int quant_axis, framework::Tensor* out) { + void operator()(const platform::CPUDeviceContext &dev_ctx, + const framework::Tensor *in, + const framework::Tensor *scale, + T max_range, + const int quant_axis, + framework::Tensor *out) { // Dequant op is before quantized op // Dequantize the weight of quantized op auto in_dims = in->dims(); const int64_t channel = in_dims[quant_axis]; - const T* scale_factor = scale->data(); + const T *scale_factor = scale->data(); if (quant_axis == 0) { for (int64_t i = 0; i < channel; i++) { T s = scale_factor[i]; @@ -41,7 +44,7 @@ struct ChannelDequantizeFunctorV2 { framework::Tensor one_channel_out = out->Slice(i, i + 1); auto in_e = framework::EigenVector::Flatten(one_channel_in); auto out_e = framework::EigenVector::Flatten(one_channel_out); - auto& dev = *dev_ctx.eigen_device(); + auto &dev = *dev_ctx.eigen_device(); out_e.device(dev) = in_e * s / max_range; } } else if (quant_axis == 1) { @@ -51,12 +54,12 @@ struct ChannelDequantizeFunctorV2 { } int64_t step_i = in->numel() / out_iter; int64_t step_j = in->numel() / (out_iter * channel); - auto* in_data = in->data(); - auto* out_data = out->mutable_data(dev_ctx.GetPlace()); + auto *in_data = in->data(); + auto *out_data = out->mutable_data(dev_ctx.GetPlace()); for (int64_t i = 0; i < out_iter; i++) { for (int64_t j = 0; j < channel; j++) { - auto* cur_in = in_data + i * step_i + j * step_j; - auto* cur_out = out_data + i * step_i + j * step_j; + auto *cur_in = in_data + i * step_i + j * step_j; + auto *cur_out = out_data + i * step_i + j * step_j; T s = scale_factor[j]; for (int64_t k = 0; k < step_j; k++) { *cur_out = (*cur_in) * s / max_range; @@ -75,11 +78,11 @@ template struct ChannelDequantizeFunctorV2; class QuantizeLinearOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { + void InferShape(framework::InferShapeContext *ctx) const override { OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "QuantizeLinear"); OP_INOUT_CHECK(ctx->HasInput("Scale"), "Input", "Scale", "QuantizeLinear"); - OP_INOUT_CHECK(ctx->HasInput("ZeroPoint"), "Input", "ZeroPoint", - "QuantizeLinear"); + OP_INOUT_CHECK( + ctx->HasInput("ZeroPoint"), "Input", "ZeroPoint", "QuantizeLinear"); OP_INOUT_CHECK(ctx->HasOutput("Y"), "Output", "Y", "QuantizeLinear"); ctx->SetOutputDim("Y", ctx->GetInputDim("X")); int quant_axis = ctx->Attrs().Get("quant_axis"); @@ -95,7 +98,7 @@ class QuantizeLinearOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { + const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); } @@ -116,9 +119,10 @@ class QuantizeLinearOpMaker : public framework::OpProtoAndCheckerMaker { "For conv2d, depthwise_conv2d, conv2d_transpose " "and mul, the quant_axis is equal to the cout axis.") .SetDefault(0) - .AddCustomChecker([](const int& quant_axis) { + .AddCustomChecker([](const int &quant_axis) { PADDLE_ENFORCE_EQ( - quant_axis == 0 || quant_axis == 1 || quant_axis == -1, true, + quant_axis == 0 || quant_axis == 1 || quant_axis == -1, + true, platform::errors::InvalidArgument( "'quant_axis' should be 0 or 1, but " "the received is %d", @@ -126,8 +130,9 @@ class QuantizeLinearOpMaker : public framework::OpProtoAndCheckerMaker { }); AddAttr("bit_length", "(int, default 8)") .SetDefault(8) - .AddCustomChecker([](const int& bit_length) { - PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, true, + .AddCustomChecker([](const int &bit_length) { + PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, + true, platform::errors::InvalidArgument( "'bit_length' should be between 1 and 16, but " "the received is %d", @@ -140,13 +145,17 @@ class QuantizeLinearOpMaker : public framework::OpProtoAndCheckerMaker { "1: rounding to nearest ties away from zero. Eg: round(1.5)=2, " "round(2.5)=3") .SetDefault(0) - .AddCustomChecker([](const int& round_type) { - PADDLE_ENFORCE_EQ(round_type >= 0 && round_type <= 1, true, - platform::errors::InvalidArgument( - "'round_type' should be between 0 and 1, but " - "the received is %d", - round_type)); - }); + .AddCustomChecker([](const int &round_type) { + PADDLE_ENFORCE_EQ( + round_type == 0 || round_type == 1, + true, + platform::errors::InvalidArgument( + "'round_type' should be 0 or 1, 0 rounding to " + "nearest ties to even and 1 is rounding to nearest " + "ties away from zero.but the received is %d", + round_type)); + }) + .AsExtra(); AddAttr("is_test", "(bool, default false) Set to true for inference only, false " "for training. Some layers may run faster when this is true.") @@ -170,14 +179,18 @@ namespace ops = paddle::operators; using CPU = paddle::platform::CPUDeviceContext; REGISTER_OPERATOR( - quantize_linear, ops::QuantizeLinearOp, ops::QuantizeLinearOpMaker, + quantize_linear, + ops::QuantizeLinearOp, + ops::QuantizeLinearOpMaker, paddle::framework::EmptyGradOpMaker, paddle::framework::EmptyGradOpMaker); REGISTER_OP_CPU_KERNEL(quantize_linear, ops::QuantizeLinearKernel); REGISTER_OPERATOR( - dequantize_linear, ops::QuantizeLinearOp, ops::QuantizeLinearOpMaker, + dequantize_linear, + ops::QuantizeLinearOp, + ops::QuantizeLinearOpMaker, paddle::framework::EmptyGradOpMaker, paddle::framework::EmptyGradOpMaker); diff --git a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py index 9bcf3af1340..3926ee95036 100644 --- a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py +++ b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py @@ -121,8 +121,7 @@ class PostTrainingQuantization(object): algo="KL", hist_percent=0.99999, quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"], - weight_round_algo='round', - round_type='TiesToEven', + round_type='round', learning_rate=0.001, is_full_quantize=False, bias_correction=False, @@ -181,14 +180,10 @@ class PostTrainingQuantization(object): quantizable_op_type(list[str], optional): List the type of ops that will be quantized. Default is ["conv2d", "depthwise_conv2d", "mul"]. - weight_round_algo(str, optional): The method of converting the quantized weights + round_type(str, optional): The method of converting the quantized weights value float->int. Currently supports ['round', 'adaround'] methods. Default is `round`, which is rounding nearest to the integer. 'adaround' is refer to https://arxiv.org/abs/2004.10568. - round_type(str, optional): The method of converting the tensor value float->int. - Currently supports ['TiesToEven', 'TiesAwayFromZero'] methods. - Default is `TiesToEven`, which is rounding to nearest ties to even. - 'TiesAwayFromZero' is rounding to nearest ties away from zero. learning_rate(float, optional): The learning rate of adaround method. is_full_quantized(bool, optional): If set is_full_quantized as True, apply quantization to all supported quantizable op type. If set @@ -269,10 +264,8 @@ class PostTrainingQuantization(object): self._support_algo_type = [ 'KL', 'hist', 'avg', 'mse', 'emd', 'abs_max', 'min_max' ] - assert round_type in ['TiesToEven', 'TiesAwayFromZero'] + assert round_type in ['adaround', 'round'] self._round_type = round_type - assert weight_round_algo in ['adaround', 'round'] - self._weight_round_algo = weight_round_algo self._learning_rate = learning_rate self._dynamic_quantize_op_type = ['lstm'] self._support_quantize_op_type = \ @@ -414,7 +407,7 @@ class PostTrainingQuantization(object): if self._algo in ["KL", "hist"]: self._calculate_kl_hist_threshold() - if self._weight_round_algo == 'adaround': + if self._round_type == 'adaround': self._adaround_apply() self._reset_activation_persistable() @@ -651,7 +644,6 @@ class PostTrainingQuantization(object): float(np.max(np.abs(var_tensor[i])))) self._quantized_threshold[var_name] = abs_max_value _logger.info("MSE searching stage ...") - distribution = np.round if self._round_type == 'TiesToEven' else utils.round_c for var_name in self._quantized_act_var_name: var_tensor = utils.load_variable_data(self._scope, var_name) var_tensor = var_tensor.flatten() @@ -664,9 +656,14 @@ class PostTrainingQuantization(object): scale = s * abs_max_value s += 0.02 bins = 2**(self._activation_bits - 1) - 1 - quant_var = np.clip(distribution(var_tensor / scale * bins), - -bins - 1, bins) - quant_dequant_var = quant_var / bins * scale + if self._onnx_format: + quant_var = np.clip(distribution(var_tensor / scale * bins), + -bins - 1, bins) + quant_dequant_var = quant_var / bins * scale + else: + quant_dequant_var = np.round( + np.clip(var_tensor, 0.0, scale) / scale * + bins) / bins * scale mse_loss = ((var_tensor - quant_dequant_var)**2).mean() if mse_loss <= self._best_calibration_loss[var_name]: self._best_calibration_loss[var_name] = mse_loss @@ -691,7 +688,6 @@ class PostTrainingQuantization(object): float(np.max(np.abs(var_tensor[i])))) self._quantized_threshold[var_name] = abs_max_value _logger.info("EMD searching stage ...") - distribution = np.round if self._round_type == 'TiesToEven' else utils.round_c for var_name in self._quantized_act_var_name: var_tensor = utils.load_variable_data(self._scope, var_name) var_tensor = var_tensor.flatten() @@ -704,9 +700,14 @@ class PostTrainingQuantization(object): scale = s * abs_max_value s += 0.02 bins = 2**(self._activation_bits - 1) - 1 - quant_var = np.clip(distribution(var_tensor / scale * bins), - -bins - 1, bins) - quant_dequant_var = quant_var / bins * scale + if self._onnx_format: + quant_var = np.clip(distribution(var_tensor / scale * bins), + -bins - 1, bins) + quant_dequant_var = quant_var / bins * scale + else: + quant_dequant_var = np.round( + np.clip(var_tensor, 0.0, scale) / scale * + bins) / bins * scale emd_loss = np.abs( np.mean(var_tensor) - np.mean(quant_dequant_var)) + np.abs( np.std(var_tensor) - np.std(quant_dequant_var)) @@ -918,8 +919,7 @@ class PostTrainingQuantization(object): activation_bits=self._activation_bits, activation_quantize_type=self._activation_quantize_type, weight_quantize_type=self._weight_quantize_type, - quantizable_op_type=major_quantizable_op_types, - round_type=self._round_type) + quantizable_op_type=major_quantizable_op_types) else: transform_pass = QuantizationTransformPassV2( scope=self._scope, @@ -928,8 +928,7 @@ class PostTrainingQuantization(object): activation_bits=self._activation_bits, activation_quantize_type=self._activation_quantize_type, weight_quantize_type=self._weight_quantize_type, - quantizable_op_type=major_quantizable_op_types, - round_type=self._round_type) + quantizable_op_type=major_quantizable_op_types) for sub_graph in graph.all_sub_graphs(): # Insert fake_quant/fake_dequantize op must in test graph, so @@ -946,15 +945,13 @@ class PostTrainingQuantization(object): add_quant_dequant_pass = AddQuantDequantPass( scope=self._scope, place=self._place, - quantizable_op_type=minor_quantizable_op_types, - round_type=self._round_type) + quantizable_op_type=minor_quantizable_op_types) else: add_quant_dequant_pass = AddQuantDequantPassV2( scope=self._scope, place=self._place, quantizable_op_type=minor_quantizable_op_types, - is_full_quantized=self._is_full_quantize, - round_type=self._round_type) + is_full_quantized=self._is_full_quantize) for sub_graph in graph.all_sub_graphs(): sub_graph._for_test = True @@ -979,7 +976,6 @@ class PostTrainingQuantization(object): place=self._place, bias_correction=self._bias_correction, weight_bits=self._weight_bits, - weight_round_algo=self._weight_round_algo, round_type=self._round_type, activation_bits=self._activation_bits, weight_quantize_type=self._weight_quantize_type, diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py index 08d507284e4..3a316e9192e 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py +++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py @@ -119,7 +119,6 @@ class QuantizationTransformPass(object): moving_rate=0.9, skip_pattern=['skip_quant'], quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul'], - round_type='TiesToEven', weight_quantize_func=None, act_quantize_func=None, weight_preprocess_func=None, @@ -157,10 +156,6 @@ class QuantizationTransformPass(object): quantizable_op_type(list[str]): List the type of ops that will be quantized. Default is ["conv2d", "depthwise_conv2d", "mul"]. The quantizable_op_type in QuantizationFreezePass and ConvertToInt8Pass must be the same as this. - round_type(str, optional): The method of converting the tensor value float->int. - Currently supports ['TiesToEven', 'TiesAwayFromZero'] methods. - Default is `TiesToEven`, which is rounding to nearest ties to even. - 'TiesAwayFromZero' is rounding to nearest ties away from zero. weight_quantize_func(function): Function that defines how to quantize weight. Using this can quickly test if user's quantization method works or not. In this function, user should both define quantization function and @@ -211,7 +206,6 @@ class QuantizationTransformPass(object): self._weight_bits = weight_bits self._activation_bits = activation_bits self._skip_pattern = skip_pattern - self._round_type = round_type self._weight_quantize_func = weight_quantize_func self._act_quantize_func = act_quantize_func self._weight_preprocess_func = weight_preprocess_func @@ -465,12 +459,10 @@ class QuantizationTransformPass(object): _init_var_node(scale_var_node, np.zeros(scale_var_node.shape(), dtype=data_type), self._scope, self._place) - round_type = 0 if self._round_type == 'TiesToEven' else 1 quant_op_node = graph.create_op_node( op_type='fake_quantize_abs_max', attrs={ 'bit_length': quant_bits, - 'round_type': round_type, 'op_role': core.op_proto_and_checker_maker.OpRole.Forward }, inputs={'X': var_node}, @@ -525,11 +517,9 @@ class QuantizationTransformPass(object): inputs['Iter'] = self._global_step outputs['OutScales'] = scales_node - round_type = 0 if self._round_type == 'TiesToEven' else 1 attrs = { 'window_size': self._window_size, 'bit_length': quant_bits, - 'round_type': round_type, 'is_test': self._is_test, 'op_role': core.op_proto_and_checker_maker.OpRole.Forward } @@ -600,10 +590,8 @@ class QuantizationTransformPass(object): outs['OutState'] = state_out_node outs['OutAccum'] = accum_out_node - round_type = 0 if self._round_type == 'TiesToEven' else 1 attrs = { 'bit_length': quant_bits, - 'round_type': round_type, 'moving_rate': self._moving_rate, 'is_test': self._is_test, 'op_role': core.op_proto_and_checker_maker.OpRole.Forward @@ -650,12 +638,10 @@ class QuantizationTransformPass(object): _init_var_node(scale_var_node, np.zeros(scale_var_node.shape(), dtype=data_type), self._scope, self._place) - round_type = 0 if self._round_type == 'TiesToEven' else 1 quant_op_node = graph.create_op_node( op_type='fake_channel_wise_quantize_abs_max', attrs={ 'bit_length': quant_bits, - 'round_type': round_type, 'quant_axis': quant_axis, 'is_test': self._is_test, 'op_role': core.op_proto_and_checker_maker.OpRole.Forward @@ -949,8 +935,7 @@ class QuantizationFreezePass(object): bias_correction=False, weight_bits=8, activation_bits=8, - weight_round_algo='round', - round_type='TiesToEven', + round_type='round', weight_quantize_type='abs_max', quantizable_op_type=None): """ @@ -968,14 +953,10 @@ class QuantizationFreezePass(object): https://arxiv.org/abs/1810.05723. weight_bits(int): quantization bit number for weights. activation_bits(int): quantization bit number for activation. - weight_round_algo(str, optional): The method of converting the quantized weights + round_type(str, optional): The method of converting the quantized weights value float->int. Currently supports ['round', 'adaround'] methods. Default is `round`, which is rounding nearest to the integer. 'adaround' is refer to https://arxiv.org/abs/2004.10568. - round_type(str, optional): The method of converting the tensor value float->int. - Currently supports ['TiesToEven', 'TiesAwayFromZero'] methods. - Default is `TiesToEven`, which is rounding to nearest ties to even. - 'TiesAwayFromZero' is rounding to nearest ties away from zero. weight_quantize_type(str): quantization type for weights, support 'abs_max' and 'channel_wise_abs_max'. The 'range_abs_max' usually is not used for weight, since weights are fixed once the model is well trained. @@ -991,7 +972,6 @@ class QuantizationFreezePass(object): self._place = _get_paddle_place(place) self._weight_bits = weight_bits self._activation_bits = activation_bits - self._weight_round_algo = weight_round_algo self._round_type = round_type self._weight_quantize_type = weight_quantize_type self._fake_quant_op_names = _fake_quant_op_list @@ -1039,7 +1019,7 @@ class QuantizationFreezePass(object): scale_v = scale_v.tolist() self._quant_var_scale_map[input_arg_name] = scale_v # Quantize weight and restore - if self._weight_round_algo == 'round': + if self._round_type == 'round': param_v = self._load_var(input_arg_name) if any( _check_grandchild_op_node(op_node, op) @@ -1049,7 +1029,8 @@ class QuantizationFreezePass(object): quant_axis = 0 quantized_param_v = utils.quant_tensor( param_v.copy(), scale_v, quant_axis, - self._weight_bits, self._round_type) + self._weight_bits) + quantized_param_v = np.round(quantized_param_v) # Weight bias correction if self._bias_correction == True: quantized_param_v = utils.bias_correction_w( @@ -1058,6 +1039,7 @@ class QuantizationFreezePass(object): scale_v, quant_axis, weight_bits=self._weight_bits) + quantized_param_v = np.round(quantized_param_v) self._restore_var(input_arg_name, quantized_param_v) self._remove_fake_quant_and_dequant_op(graph, op_node) @@ -1600,8 +1582,7 @@ class AddQuantDequantPass(object): quant_bits=8, skip_pattern=["skip_quant"], quantizable_op_type=["elementwise_add", "pool2d"], - is_full_quantized=False, - round_type='TiesToEven'): + is_full_quantized=False): """ Constructor. @@ -1623,10 +1604,6 @@ class AddQuantDequantPass(object): quantization to all supported quantizable op type. If set is_full_quantized as False, only apply quantization to the op type according to the input quantizable_op_type. - round_type(str, optional): The method of converting the tensor value float->int. - Currently supports ['TiesToEven', 'TiesAwayFromZero'] methods. - Default is `TiesToEven`, which is rounding to nearest ties to even. - 'TiesAwayFromZero' is rounding to nearest ties away from zero. """ self._scope = scope self._place = _get_paddle_place(place) @@ -1634,7 +1611,6 @@ class AddQuantDequantPass(object): self._quant_bits = quant_bits self._is_test = None self._skip_pattern = skip_pattern - self._round_type = round_type if is_full_quantized: self._quantizable_op_type = utils._act_supported_quantizable_op_type @@ -1769,10 +1745,8 @@ class AddQuantDequantPass(object): outs['OutState'] = state_out_node outs['OutAccum'] = accum_out_node - round_type = 0 if self._round_type == 'TiesToEven' else 1 attrs = { 'bit_length': quant_bits, - 'round_type': round_type, 'moving_rate': self._moving_rate, 'is_test': self._is_test, 'op_role': core.op_proto_and_checker_maker.OpRole.Forward @@ -1812,10 +1786,6 @@ class InsertQuantizeLinear(object): Default is -1. channel_wise(bool, optional): Whether quantization with per channel or not. Default is False. is_test(bool, optional): Whether quantization with training or not. Default is True. - round_type(str, optional): The method of converting the tensor value float->int. - Currently supports ['TiesToEven', 'TiesAwayFromZero'] methods. - Default is `TiesToEven`, which is rounding to nearest ties to even. - 'TiesAwayFromZero' is rounding to nearest ties away from zero. """ def __init__(self, @@ -1824,15 +1794,13 @@ class InsertQuantizeLinear(object): quant_bits=8, quant_axis=-1, channel_wise=False, - is_test=True, - round_type='TiesToEven'): + is_test=True): self._place = place self._scope = scope self.quant_bits = quant_bits self.quant_axis = quant_axis self.channel_wise = channel_wise self._is_test = is_test - self._round_type = round_type def insert_quant_op(self, graph, var_node): assert var_node.is_var(), '{} is not a var'.format(var_node.name()) @@ -1875,12 +1843,7 @@ class InsertQuantizeLinear(object): if zero_point_node is not None: inputs["ZeroPoint"] = zero_point_node - round_type = 0 if self._round_type == 'TiesToEven' else 1 - attrs = { - "quant_axis": self.quant_axis, - "bit_length": self.quant_bits, - "round_type": round_type - } + attrs = {"quant_axis": self.quant_axis, "bit_length": self.quant_bits} outputs = {"Y": quant_var_node} if not self._is_test: attrs["is_test"] = self._is_test @@ -1985,7 +1948,6 @@ class QuantizationTransformPassV2(object): moving_rate=0.9, skip_pattern=['skip_quant'], quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul'], - round_type='TiesToEven', weight_quantize_func=None, act_quantize_func=None, weight_preprocess_func=None, @@ -2021,10 +1983,6 @@ class QuantizationTransformPassV2(object): quantizable_op_type(list[str]): List the type of ops that will be quantized. Default is ["conv2d", "depthwise_conv2d", "mul"]. The quantizable_op_type in QuantizationFreezePass and ConvertToInt8Pass must be the same as this. - round_type(str, optional): The method of converting the tensor value float->int. - Currently supports ['TiesToEven', 'TiesAwayFromZero'] methods. - Default is `TiesToEven`, which is rounding to nearest ties to even. - 'TiesAwayFromZero' is rounding to nearest ties away from zero. weight_quantize_func(function): Function that defines how to quantize weight. Using this can quickly test if user's quantization method works or not. In this function, user should both define quantization function and @@ -2074,7 +2032,6 @@ class QuantizationTransformPassV2(object): self._weight_bits = weight_bits self._activation_bits = activation_bits self._skip_pattern = skip_pattern - self._round_type = round_type self._weight_quantize_func = weight_quantize_func self._act_quantize_func = act_quantize_func self._weight_preprocess_func = weight_preprocess_func @@ -2198,8 +2155,7 @@ class QuantizationTransformPassV2(object): quant_bits=quant_bits, quant_axis=quant_axis, channel_wise=channel_wise, - is_test=self._is_test, - round_type=self._round_type) + is_test=self._is_test) quant_var_node, scale_var_node = insert_quant_pass.insert_quant_op( graph, var_node) dequant_var_node = insert_quant_pass.insert_dequant_op( @@ -2307,8 +2263,7 @@ class AddQuantDequantPassV2(object): quant_bits=8, skip_pattern=["skip_quant"], quantizable_op_type=["elementwise_add", "pool2d"], - is_full_quantized=False, - round_type='TiesToEven'): + is_full_quantized=False): """ Args: scope(paddle.Scope): The scope is used to initialize these new parameters. @@ -2328,10 +2283,6 @@ class AddQuantDequantPassV2(object): quantization to all supported quantizable op type. If set is_full_quantized as False, only apply quantization to the op type according to the input quantizable_op_type. - round_type(str, optional): The method of converting the tensor value float->int. - Currently supports ['TiesToEven', 'TiesAwayFromZero'] methods. - Default is `TiesToEven`, which is rounding to nearest ties to even. - 'TiesAwayFromZero' is rounding to nearest ties away from zero. Examples: .. code-block:: python @@ -2354,7 +2305,6 @@ class AddQuantDequantPassV2(object): self._quant_bits = quant_bits self._is_test = None self._skip_pattern = skip_pattern - self._round_type = round_type if is_full_quantized: self._quantizable_op_type = utils._act_supported_quantizable_op_type @@ -2427,8 +2377,7 @@ class AddQuantDequantPassV2(object): quant_bits=self._quant_bits, quant_axis=-1, channel_wise=False, - is_test=self._is_test, - round_type=self._round_type) + is_test=self._is_test) quant_var_node, scale_var_node = insert_quant_pass.insert_quant_op( graph, in_node) dequant_var_node = insert_quant_pass.insert_dequant_op( @@ -2511,8 +2460,6 @@ class ReplaceFakeQuantDequantPass(object): "quant_axis") else -1 bit_length = op.op().attr("bit_length") if op.op().has_attr( "bit_length") else 8 - round_type = op.op().attr("round_type") if op.op().has_attr( - "round_type") else 0 zero_point_node = None quanted_node = x_node @@ -2534,8 +2481,7 @@ class ReplaceFakeQuantDequantPass(object): quant_op_node = graph.create_op_node(op_type="quantize_linear", attrs={ "quant_axis": quant_axis, - "bit_length": bit_length, - "round_type": round_type + "bit_length": bit_length }, inputs={ "X": x_node, @@ -2654,11 +2600,11 @@ class QuantWeightPass(object): param_v = self._load_var(x_node.name()) quant_axis = _op.op().attr("quant_axis") bits_length = _op.op().attr("bit_length") - round_type = _op.op().attr("round_type") if _op.op().has_attr( - "round_type") else 0 - quantized_param_v = utils.quant_tensor(param_v.copy(), scale_v, - quant_axis, bits_length, - round_type) + quantized_param_v = utils.quant_tensor(param_v.copy(), + scale_v, + quant_axis, + bits_length, + onnx_format=True) if self._bias_correction == True: quantized_param_v = utils.bias_correction_w( param_v, diff --git a/python/paddle/fluid/contrib/slim/quantization/utils.py b/python/paddle/fluid/contrib/slim/quantization/utils.py index e396ce9dee2..28efcd2d591 100644 --- a/python/paddle/fluid/contrib/slim/quantization/utils.py +++ b/python/paddle/fluid/contrib/slim/quantization/utils.py @@ -321,39 +321,41 @@ def set_variable_data(scope, place, var_name, np_value): tensor.set(np_value, place) -def round_c_single_element(val): - dtype = type(val) - if val >= 0: - return dtype(np.floor(val + 0.5)) - return dtype(np.ceil(val - 0.5)) +def quant_tensor(x, scale, quant_axis=0, weight_bits=8, onnx_format=False): + # symmetry quant + def _clip(x, scale): + x[x > scale] = scale + x[x < -scale] = -scale + return x - -# rounding to nearest ties away from zero -round_c = np.vectorize(round_c_single_element) - - -def quant_tensor(x, - scale, - quant_axis=0, - weight_bits=8, - round_type='TiesToEven'): assert quant_axis in [0, 1], 'quant_axis should be 0 or 1 for now.' - distribution = np.round if round_type == 'TiesToEven' else round_c bnt = (1 << (weight_bits - 1)) - 1 if isinstance(scale, list): for i, s in enumerate(scale): if s == 0.0: s = 1e-8 if quant_axis == 0: - x[i] = distribution(x[i] / s * bnt) - x[i] = np.clip(x[i], -bnt - 1, bnt) + if onnx_format: + x[i] = np.round(x[i] / s * bnt) + x[i] = np.clip(x[i], -bnt - 1, bnt) + else: + x[i] = _clip(x[i], s) + x[i] = x[i] / s * bnt else: - x[:, i] = distribution(x[:, i] / s * bnt) - x[:, i] = np.clip(x[:, i], -bnt - 1, bnt) + if onnx_format: + x[:, i] = np.round(x[:, i] / s * bnt) + x[:, i] = np.clip(x[:, i], -bnt - 1, bnt) + else: + x[:, i] = _clip(x[:, i], s) + x[:, i] = x[:, i] / s * bnt else: scale = 1e-8 if scale == 0.0 else scale - x = distribution(x / scale * bnt) - x = np.clip(x, -bnt - 1, bnt) + if onnx_format: + x = np.round(x / scale * bnt) + x = np.clip(x, -bnt - 1, bnt) + else: + x = _clip(x, scale) + x = x / scale * bnt return x diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_lstm_model.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_lstm_model.py index befc76c0275..6100ed4f82a 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_lstm_model.py +++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_lstm_model.py @@ -165,7 +165,7 @@ class TestPostTrainingQuantization(unittest.TestCase): model_path, data_path, algo="KL", - weight_round_algo="round", + round_type="round", quantizable_op_type=["conv2d"], is_full_quantize=False, is_use_cache_file=False, @@ -185,7 +185,7 @@ class TestPostTrainingQuantization(unittest.TestCase): batch_nums=batch_nums, algo=algo, quantizable_op_type=quantizable_op_type, - weight_round_algo=weight_round_algo, + round_type=round_type, is_full_quantize=is_full_quantize, optimize_model=is_optimize_model, onnx_format=onnx_format, @@ -201,7 +201,7 @@ class TestPostTrainingQuantization(unittest.TestCase): data_url, data_md5, algo, - weight_round_algo, + round_type, quantizable_op_type, is_full_quantize, is_use_cache_file, @@ -224,7 +224,7 @@ class TestPostTrainingQuantization(unittest.TestCase): print("Start post training quantization for {0} on {1} samples ...". format(model_name, quant_iterations)) self.generate_quantized_model(fp32_model_path, data_path, algo, - weight_round_algo, quantizable_op_type, + round_type, quantizable_op_type, is_full_quantize, is_use_cache_file, is_optimize_model, quant_iterations, onnx_format) @@ -255,7 +255,7 @@ class TestPostTrainingAvgForLSTM(TestPostTrainingQuantization): data_url = "https://paddle-inference-dist.cdn.bcebos.com/int8/unittest_model_data/quant_lstm_input_data.tar.gz" data_md5 = "add84c754e9b792fea1fbd728d134ab7" algo = "avg" - weight_round_algo = "round" + round_type = "round" quantizable_op_type = ["mul", "lstm"] is_full_quantize = False is_use_cache_file = False @@ -264,7 +264,7 @@ class TestPostTrainingAvgForLSTM(TestPostTrainingQuantization): infer_iterations = 100 quant_iterations = 10 self.run_test(model_name, model_url, model_md5, data_name, data_url, - data_md5, algo, weight_round_algo, quantizable_op_type, + data_md5, algo, round_type, quantizable_op_type, is_full_quantize, is_use_cache_file, is_optimize_model, diff_threshold, infer_iterations, quant_iterations) @@ -279,7 +279,7 @@ class TestPostTrainingAvgForLSTMONNXFormat(TestPostTrainingQuantization): data_url = "https://paddle-inference-dist.cdn.bcebos.com/int8/unittest_model_data/quant_lstm_input_data.tar.gz" data_md5 = "add84c754e9b792fea1fbd728d134ab7" algo = "avg" - weight_round_algo = "round" + round_type = "round" quantizable_op_type = ["mul", "lstm"] is_full_quantize = False is_use_cache_file = False @@ -295,7 +295,7 @@ class TestPostTrainingAvgForLSTMONNXFormat(TestPostTrainingQuantization): data_url, data_md5, algo, - weight_round_algo, + round_type, quantizable_op_type, is_full_quantize, is_use_cache_file, diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py index 71e974f8981..807bdbf8a9a 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py +++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py @@ -108,7 +108,7 @@ class TestPostTrainingQuantization(unittest.TestCase): def generate_quantized_model(self, model_path, algo="KL", - weight_round_algo="round", + round_type="round", quantizable_op_type=["conv2d"], is_full_quantize=False, is_use_cache_file=False, @@ -130,7 +130,7 @@ class TestPostTrainingQuantization(unittest.TestCase): batch_nums=batch_nums, algo=algo, quantizable_op_type=quantizable_op_type, - weight_round_algo=weight_round_algo, + round_type=round_type, is_full_quantize=is_full_quantize, optimize_model=is_optimize_model, bias_correction=bias_correction, @@ -145,7 +145,7 @@ class TestPostTrainingQuantization(unittest.TestCase): data_url, data_md5, algo, - weight_round_algo, + round_type, quantizable_op_type, is_full_quantize, is_use_cache_file, @@ -169,11 +169,10 @@ class TestPostTrainingQuantization(unittest.TestCase): print("Start INT8 post training quantization for {0} on {1} images ...". format(model_name, quant_iterations * batch_size)) - self.generate_quantized_model(origin_model_path, algo, - weight_round_algo, quantizable_op_type, - is_full_quantize, is_use_cache_file, - is_optimize_model, batch_size, - quant_iterations, onnx_format, + self.generate_quantized_model(origin_model_path, algo, round_type, + quantizable_op_type, is_full_quantize, + is_use_cache_file, is_optimize_model, + batch_size, quant_iterations, onnx_format, skip_tensor_list, bias_correction) print("Start INT8 inference for {0} on {1} images ...".format( @@ -204,7 +203,7 @@ class TestPostTrainingKLForMnist(TestPostTrainingQuantization): data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" data_md5 = "be71d3997ec35ac2a65ae8a145e2887c" algo = "KL" - weight_round_algo = "round" + round_type = "round" quantizable_op_type = ["conv2d", "depthwise_conv2d", "mul"] is_full_quantize = False is_use_cache_file = False @@ -213,7 +212,7 @@ class TestPostTrainingKLForMnist(TestPostTrainingQuantization): batch_size = 10 infer_iterations = 50 quant_iterations = 5 - self.run_test(model_name, data_url, data_md5, algo, weight_round_algo, + self.run_test(model_name, data_url, data_md5, algo, round_type, quantizable_op_type, is_full_quantize, is_use_cache_file, is_optimize_model, diff_threshold, batch_size, infer_iterations, quant_iterations) @@ -226,7 +225,7 @@ class TestPostTraininghistForMnist(TestPostTrainingQuantization): data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" data_md5 = "be71d3997ec35ac2a65ae8a145e2887c" algo = "hist" - weight_round_algo = "round" + round_type = "round" quantizable_op_type = ["conv2d", "depthwise_conv2d", "mul"] is_full_quantize = False is_use_cache_file = False @@ -235,7 +234,7 @@ class TestPostTraininghistForMnist(TestPostTrainingQuantization): batch_size = 10 infer_iterations = 50 quant_iterations = 5 - self.run_test(model_name, data_url, data_md5, algo, weight_round_algo, + self.run_test(model_name, data_url, data_md5, algo, round_type, quantizable_op_type, is_full_quantize, is_use_cache_file, is_optimize_model, diff_threshold, batch_size, infer_iterations, quant_iterations) @@ -248,7 +247,7 @@ class TestPostTrainingmseForMnist(TestPostTrainingQuantization): data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" data_md5 = "be71d3997ec35ac2a65ae8a145e2887c" algo = "mse" - weight_round_algo = "round" + round_type = "round" quantizable_op_type = ["conv2d", "depthwise_conv2d", "mul"] is_full_quantize = False is_use_cache_file = False @@ -257,7 +256,7 @@ class TestPostTrainingmseForMnist(TestPostTrainingQuantization): batch_size = 10 infer_iterations = 50 quant_iterations = 5 - self.run_test(model_name, data_url, data_md5, algo, weight_round_algo, + self.run_test(model_name, data_url, data_md5, algo, round_type, quantizable_op_type, is_full_quantize, is_use_cache_file, is_optimize_model, diff_threshold, batch_size, infer_iterations, quant_iterations) @@ -270,7 +269,7 @@ class TestPostTrainingemdForMnist(TestPostTrainingQuantization): data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" data_md5 = "be71d3997ec35ac2a65ae8a145e2887c" algo = "emd" - weight_round_algo = "round" + round_type = "round" quantizable_op_type = ["conv2d", "depthwise_conv2d", "mul"] is_full_quantize = False is_use_cache_file = False @@ -279,7 +278,7 @@ class TestPostTrainingemdForMnist(TestPostTrainingQuantization): batch_size = 10 infer_iterations = 50 quant_iterations = 5 - self.run_test(model_name, data_url, data_md5, algo, weight_round_algo, + self.run_test(model_name, data_url, data_md5, algo, round_type, quantizable_op_type, is_full_quantize, is_use_cache_file, is_optimize_model, diff_threshold, batch_size, infer_iterations, quant_iterations) @@ -292,7 +291,7 @@ class TestPostTrainingavgForMnist(TestPostTrainingQuantization): data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" data_md5 = "be71d3997ec35ac2a65ae8a145e2887c" algo = "avg" - weight_round_algo = "round" + round_type = "round" quantizable_op_type = ["conv2d", "depthwise_conv2d", "mul"] is_full_quantize = False is_use_cache_file = False @@ -301,7 +300,7 @@ class TestPostTrainingavgForMnist(TestPostTrainingQuantization): batch_size = 10 infer_iterations = 50 quant_iterations = 5 - self.run_test(model_name, data_url, data_md5, algo, weight_round_algo, + self.run_test(model_name, data_url, data_md5, algo, round_type, quantizable_op_type, is_full_quantize, is_use_cache_file, is_optimize_model, diff_threshold, batch_size, infer_iterations, quant_iterations) @@ -314,7 +313,7 @@ class TestPostTrainingAbsMaxForMnist(TestPostTrainingQuantization): data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" data_md5 = "be71d3997ec35ac2a65ae8a145e2887c" algo = "abs_max" - weight_round_algo = "round" + round_type = "round" quantizable_op_type = ["conv2d", "mul"] is_full_quantize = True is_use_cache_file = False @@ -323,7 +322,7 @@ class TestPostTrainingAbsMaxForMnist(TestPostTrainingQuantization): batch_size = 10 infer_iterations = 50 quant_iterations = 10 - self.run_test(model_name, data_url, data_md5, algo, weight_round_algo, + self.run_test(model_name, data_url, data_md5, algo, round_type, quantizable_op_type, is_full_quantize, is_use_cache_file, is_optimize_model, diff_threshold, batch_size, infer_iterations, quant_iterations) @@ -336,7 +335,7 @@ class TestPostTrainingmseAdaroundForMnist(TestPostTrainingQuantization): data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" data_md5 = "be71d3997ec35ac2a65ae8a145e2887c" algo = "mse" - weight_round_algo = "adaround" + round_type = "adaround" quantizable_op_type = ["conv2d", "depthwise_conv2d", "mul"] is_full_quantize = False is_use_cache_file = False @@ -350,7 +349,7 @@ class TestPostTrainingmseAdaroundForMnist(TestPostTrainingQuantization): data_url, data_md5, algo, - weight_round_algo, + round_type, quantizable_op_type, is_full_quantize, is_use_cache_file, @@ -369,7 +368,7 @@ class TestPostTrainingKLAdaroundForMnist(TestPostTrainingQuantization): data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" data_md5 = "be71d3997ec35ac2a65ae8a145e2887c" algo = "KL" - weight_round_algo = "adaround" + round_type = "adaround" quantizable_op_type = ["conv2d", "depthwise_conv2d", "mul"] is_full_quantize = False is_use_cache_file = False @@ -378,7 +377,7 @@ class TestPostTrainingKLAdaroundForMnist(TestPostTrainingQuantization): batch_size = 10 infer_iterations = 50 quant_iterations = 5 - self.run_test(model_name, data_url, data_md5, algo, weight_round_algo, + self.run_test(model_name, data_url, data_md5, algo, round_type, quantizable_op_type, is_full_quantize, is_use_cache_file, is_optimize_model, diff_threshold, batch_size, infer_iterations, quant_iterations) @@ -391,7 +390,7 @@ class TestPostTrainingmseForMnistONNXFormat(TestPostTrainingQuantization): data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" data_md5 = "be71d3997ec35ac2a65ae8a145e2887c" algo = "mse" - weight_round_algo = "round" + round_type = "round" quantizable_op_type = ["conv2d", "depthwise_conv2d", "mul"] is_full_quantize = False is_use_cache_file = False @@ -405,7 +404,7 @@ class TestPostTrainingmseForMnistONNXFormat(TestPostTrainingQuantization): data_url, data_md5, algo, - weight_round_algo, + round_type, quantizable_op_type, is_full_quantize, is_use_cache_file, @@ -425,7 +424,7 @@ class TestPostTrainingmseForMnistONNXFormatFullQuant( data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" data_md5 = "be71d3997ec35ac2a65ae8a145e2887c" algo = "mse" - weight_round_algo = "round" + round_type = "round" quantizable_op_type = ["conv2d", "depthwise_conv2d", "mul"] is_full_quantize = True is_use_cache_file = False @@ -439,7 +438,7 @@ class TestPostTrainingmseForMnistONNXFormatFullQuant( data_url, data_md5, algo, - weight_round_algo, + round_type, quantizable_op_type, is_full_quantize, is_use_cache_file, @@ -458,7 +457,7 @@ class TestPostTrainingavgForMnistSkipOP(TestPostTrainingQuantization): data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" data_md5 = "be71d3997ec35ac2a65ae8a145e2887c" algo = "avg" - weight_round_algo = "round" + round_type = "round" quantizable_op_type = ["conv2d", "depthwise_conv2d", "mul"] is_full_quantize = False is_use_cache_file = False @@ -472,7 +471,7 @@ class TestPostTrainingavgForMnistSkipOP(TestPostTrainingQuantization): data_url, data_md5, algo, - weight_round_algo, + round_type, quantizable_op_type, is_full_quantize, is_use_cache_file, diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py index fac0dcc3413..9c076d85fd2 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py +++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py @@ -242,7 +242,7 @@ class TestPostTrainingQuantization(unittest.TestCase): model_path, quantizable_op_type, algo="KL", - weight_round_algo="round", + round_type="round", is_full_quantize=False, is_use_cache_file=False, is_optimize_model=False, @@ -264,7 +264,7 @@ class TestPostTrainingQuantization(unittest.TestCase): model_dir=model_path, algo=algo, quantizable_op_type=quantizable_op_type, - weight_round_algo=weight_round_algo, + round_type=round_type, is_full_quantize=is_full_quantize, optimize_model=is_optimize_model, onnx_format=onnx_format, @@ -275,7 +275,7 @@ class TestPostTrainingQuantization(unittest.TestCase): def run_test(self, model, algo, - weight_round_algo, + round_type, data_urls, data_md5s, quantizable_op_type, @@ -299,10 +299,9 @@ class TestPostTrainingQuantization(unittest.TestCase): print("Start INT8 post training quantization for {0} on {1} images ...". format(model, sample_iterations * batch_size)) self.generate_quantized_model(model_cache_folder + "/model", - quantizable_op_type, algo, - weight_round_algo, is_full_quantize, - is_use_cache_file, is_optimize_model, - onnx_format) + quantizable_op_type, algo, round_type, + is_full_quantize, is_use_cache_file, + is_optimize_model, onnx_format) print("Start INT8 inference for {0} on {1} images ...".format( model, infer_iterations * batch_size)) @@ -330,7 +329,7 @@ class TestPostTrainingKLForMobilenetv1(TestPostTrainingQuantization): def test_post_training_kl_mobilenetv1(self): model = "MobileNet-V1" algo = "KL" - weight_round_algo = "round" + round_type = "round" data_urls = [ 'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' ] @@ -345,7 +344,7 @@ class TestPostTrainingKLForMobilenetv1(TestPostTrainingQuantization): is_use_cache_file = False is_optimize_model = True diff_threshold = 0.025 - self.run_test(model, algo, weight_round_algo, data_urls, data_md5s, + self.run_test(model, algo, round_type, data_urls, data_md5s, quantizable_op_type, is_full_quantize, is_use_cache_file, is_optimize_model, diff_threshold) @@ -355,7 +354,7 @@ class TestPostTrainingavgForMobilenetv1(TestPostTrainingQuantization): def test_post_training_avg_mobilenetv1(self): model = "MobileNet-V1" algo = "avg" - weight_round_algo = "round" + round_type = "round" data_urls = [ 'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' ] @@ -369,7 +368,7 @@ class TestPostTrainingavgForMobilenetv1(TestPostTrainingQuantization): is_use_cache_file = False is_optimize_model = True diff_threshold = 0.025 - self.run_test(model, algo, weight_round_algo, data_urls, data_md5s, + self.run_test(model, algo, round_type, data_urls, data_md5s, quantizable_op_type, is_full_quantize, is_use_cache_file, is_optimize_model, diff_threshold) @@ -379,7 +378,7 @@ class TestPostTraininghistForMobilenetv1(TestPostTrainingQuantization): def test_post_training_hist_mobilenetv1(self): model = "MobileNet-V1" algo = "hist" - weight_round_algo = "round" + round_type = "round" data_urls = [ 'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' ] @@ -393,7 +392,7 @@ class TestPostTraininghistForMobilenetv1(TestPostTrainingQuantization): is_use_cache_file = False is_optimize_model = True diff_threshold = 0.03 - self.run_test(model, algo, weight_round_algo, data_urls, data_md5s, + self.run_test(model, algo, round_type, data_urls, data_md5s, quantizable_op_type, is_full_quantize, is_use_cache_file, is_optimize_model, diff_threshold) @@ -403,7 +402,7 @@ class TestPostTrainingAbsMaxForMobilenetv1(TestPostTrainingQuantization): def test_post_training_abs_max_mobilenetv1(self): model = "MobileNet-V1" algo = "abs_max" - weight_round_algo = "round" + round_type = "round" data_urls = [ 'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' ] @@ -417,7 +416,7 @@ class TestPostTrainingAbsMaxForMobilenetv1(TestPostTrainingQuantization): is_optimize_model = False # The accuracy diff of post-training quantization (abs_max) maybe bigger diff_threshold = 0.05 - self.run_test(model, algo, weight_round_algo, data_urls, data_md5s, + self.run_test(model, algo, round_type, data_urls, data_md5s, quantizable_op_type, is_full_quantize, is_use_cache_file, is_optimize_model, diff_threshold) @@ -427,7 +426,7 @@ class TestPostTrainingAvgONNXFormatForMobilenetv1(TestPostTrainingQuantization): def test_post_training_onnx_format_mobilenetv1(self): model = "MobileNet-V1" algo = "avg" - weight_round_algo = "round" + round_type = "round" data_urls = [ 'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' ] @@ -444,7 +443,7 @@ class TestPostTrainingAvgONNXFormatForMobilenetv1(TestPostTrainingQuantization): diff_threshold = 0.05 self.run_test(model, algo, - weight_round_algo, + round_type, data_urls, data_md5s, quantizable_op_type, diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_resnet50.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_resnet50.py index 78c5153b742..c79499100ce 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_resnet50.py +++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_resnet50.py @@ -25,7 +25,7 @@ class TestPostTrainingForResnet50(TestPostTrainingQuantization): def test_post_training_resnet50(self): model = "ResNet-50" algo = "min_max" - weight_round_algo = "round" + round_type = "round" data_urls = [ 'http://paddle-inference-dist.bj.bcebos.com/int8/resnet50_int8_model.tar.gz' ] @@ -35,7 +35,7 @@ class TestPostTrainingForResnet50(TestPostTrainingQuantization): is_use_cache_file = False is_optimize_model = False diff_threshold = 0.025 - self.run_test(model, algo, weight_round_algo, data_urls, data_md5s, + self.run_test(model, algo, round_type, data_urls, data_md5s, quantizable_op_type, is_full_quantize, is_use_cache_file, is_optimize_model, diff_threshold) @@ -45,7 +45,7 @@ class TestPostTrainingForResnet50ONNXFormat(TestPostTrainingQuantization): def test_post_training_resnet50(self): model = "ResNet-50" algo = "min_max" - weight_round_algo = "round" + round_type = "round" data_urls = [ 'http://paddle-inference-dist.bj.bcebos.com/int8/resnet50_int8_model.tar.gz' ] @@ -58,7 +58,7 @@ class TestPostTrainingForResnet50ONNXFormat(TestPostTrainingQuantization): onnx_format = True self.run_test(model, algo, - weight_round_algo, + round_type, data_urls, data_md5s, quantizable_op_type, diff --git a/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py b/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py index e76d5c49d9e..02fff35fec7 100644 --- a/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py +++ b/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py @@ -49,7 +49,7 @@ class TestFakeQuantizeAbsMaxOp(OpTest): dtype, input_shape, distribution, - round_type='TiesToEven'): + round_type='TiesAwayFromZero'): input_data = distribution(input_shape).astype(dtype) compute_type = get_compute_type(dtype) scale = np.max(np.abs(input_data)) @@ -58,12 +58,12 @@ class TestFakeQuantizeAbsMaxOp(OpTest): if round_type == 'TiesToEven': round_out = np.round( input_data.astype(compute_type) * inv_scale * bnt) + output_data = np.clip(round_out, -bnt - 1, bnt) self.attrs['round_type'] = 0 else: - round_out = round_c( + output_data = round_c( input_data.astype(compute_type) * inv_scale * bnt) self.attrs['round_type'] = 1 - output_data = np.clip(round_out, -bnt - 1, bnt) self.inputs = {'X': input_data} self.outputs = {'Out': output_data, 'OutScale': scale} self.dtype = dtype @@ -75,7 +75,7 @@ class TestFakeQuantizeAbsMaxOp(OpTest): def test_fake_quantize_abs_max_round1(self): self._fake_quantize_abs_max(np.float32, (124, 240), np.random.random, - round_type='TiesAwayFromZero') + round_type='TiesToEven') def test_fake_quantize_abs_max_float16(self): self._fake_quantize_abs_max(np.float16, (124, 240), np.random.random) @@ -110,12 +110,12 @@ class TestFakeChannelWiseQuantizeAbsMaxOp(OpTest): if round_type == 'TiesToEven': round_out = np.round( input_data.astype(compute_type) / scale_broadcast * bnt) + output_data = np.clip(round_out, -bnt - 1, bnt) self.attrs['round_type'] = 0 else: - round_out = round_c( - input_data.astype(compute_type) / scale_broadcast * bnt) + output_data = round_c(bnt * input_data.astype(compute_type) / + scale_broadcast) self.attrs['round_type'] = 1 - output_data = np.clip(round_out, -bnt - 1, bnt) if quant_axis == 1: scale_broadcast = np.transpose(scale_broadcast, (1, ) + compute_axis) @@ -169,11 +169,15 @@ class TestFakeQuantizeRangeAbsMaxOp(OpTest): round_out = np.round( input_data.astype(compute_type) / out_scale[0] * bnt) self.attrs['round_type'] = 0 + output_data = np.clip(round_out, -bnt - 1, bnt) else: - round_out = round_c( - input_data.astype(compute_type) / out_scale[0] * bnt) + if is_test: + clip_data = np.clip(input_data, -in_scale, in_scale) + else: + clip_data = input_data + output_data = round_c( + clip_data.astype(compute_type) / out_scale[0] * bnt) self.attrs['round_type'] = 1 - output_data = np.clip(round_out, -bnt - 1, bnt) self.inputs = { 'X': input_data, 'Iter': np.zeros(1).astype(np.int64), @@ -250,7 +254,7 @@ class TestFakeQuantizeMovingAverageAbsMaxOp(OpTest): distribution, dequantize=False, with_gradient=False, - round_type='TiesToEven'): + round_type='TiesAwayFromZero'): input_data = distribution(input_shape).astype(dtype) compute_type = get_compute_type(dtype) bnt = (1 << (self.attrs['bit_length'] - 1)) - 1 @@ -267,12 +271,12 @@ class TestFakeQuantizeMovingAverageAbsMaxOp(OpTest): if round_type == 'TiesToEven': round_out = np.round( input_data.astype(compute_type) / out_scale * bnt) + quant_data = np.clip(round_out, -bnt - 1, bnt) self.attrs['round_type'] = 0 else: - round_out = round_c( + quant_data = round_c( input_data.astype(compute_type) / out_scale * bnt) self.attrs['round_type'] = 1 - quant_data = np.clip(round_out, -bnt - 1, bnt) if dequantize: output_data = (quant_data * out_scale / bnt).astype(dtype) self.op_type = 'fake_quantize_dequantize_moving_average_abs_max' @@ -307,10 +311,9 @@ class TestFakeQuantizeMovingAverageAbsMaxOp(OpTest): np.random.random) def test_fake_quantize_moving_average_abs_max_round1(self): - self._fake_quantize_moving_average_abs_max( - np.float32, (8, 16, 7, 7), - np.random.random, - round_type='TiesAwayFromZero') + self._fake_quantize_moving_average_abs_max(np.float32, (8, 16, 7, 7), + np.random.random, + round_type='TiesToEven') def test_fake_quantize_dequantize_moving_average_abs_max(self): self._fake_quantize_moving_average_abs_max(np.float32, (8, 16, 7, 7), @@ -329,17 +332,17 @@ class TestFakeQuantizeDequantizeAbsMaxOp(OpTest): dtype, input_shape, distribution, - round_type='TiesToEven'): + round_type='TiesAwayFromZero'): input_data = distribution(input_shape).astype(dtype) scale = np.max(np.abs(input_data)).astype(dtype) bnt = (1 << (self.attrs['bit_length'] - 1)) - 1 if round_type == 'TiesToEven': round_out = np.round(input_data / scale * bnt) + output_data = np.clip(round_out, -bnt - 1, bnt) * scale / bnt self.attrs['round_type'] = 0 else: - round_out = round_c(input_data / scale * bnt) + output_data = round_c(input_data / scale * bnt) * scale / bnt self.attrs['round_type'] = 1 - output_data = np.clip(round_out, -bnt - 1, bnt) * scale / bnt self.inputs = {'X': input_data} self.outputs = { 'Out': output_data, @@ -357,7 +360,7 @@ class TestFakeQuantizeDequantizeAbsMaxOp(OpTest): def test_fake_quantize_dequantize_abs_max_round1(self): self._fake_quantize_dequantize_abs_max(np.float32, (124, 240), np.random.random, - round_type='TiesAwayFromZero') + round_type='TiesToEven') class TestChannelWiseFakeQuantizeDequantizeAbsMaxOp(OpTest): @@ -382,11 +385,13 @@ class TestChannelWiseFakeQuantizeDequantizeAbsMaxOp(OpTest): scale_broadcast = np.amax(input_data, axis=compute_axis, keepdims=True) if round_type == 'TiesToEven': round_out = np.round(bnt * output_data / scale_broadcast) + output_data = np.clip(round_out, -bnt - 1, + bnt) * scale_broadcast / bnt self.attrs['round_type'] = 0 else: - round_out = round_c(bnt * output_data / scale_broadcast) + output_data = round_c( + bnt * output_data / scale_broadcast) * scale_broadcast / bnt self.attrs['round_type'] = 1 - output_data = np.clip(round_out, -bnt - 1, bnt) * scale_broadcast / bnt if quant_axis == 1: scale_broadcast = np.transpose(scale_broadcast, (1, ) + compute_axis) -- GitLab