diff --git a/paddle/fluid/operators/fake_quantize_op.cc b/paddle/fluid/operators/fake_quantize_op.cc index a91e0f520e93c01bc5af09b691af2d5a6deda9f2..4b855fb6470353b5fa11a939a7f5bea39ea9c3a6 100644 --- a/paddle/fluid/operators/fake_quantize_op.cc +++ b/paddle/fluid/operators/fake_quantize_op.cc @@ -18,6 +18,52 @@ limitations under the License. */ namespace paddle { namespace operators { +class FakeQuantizeAbsMaxOp : public framework::OperatorWithKernel { + public: + FakeQuantizeAbsMaxOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorWithKernel(type, inputs, outputs, attrs) {} + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of FakeQuantizeOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of FakeQuantizeOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("OutScale"), + "Output(Scale) of FakeQuantizeOp should not be null."); + ctx->SetOutputDim("Out", ctx->GetInputDim("X")); + ctx->SetOutputDim("OutScale", {1}); + ctx->ShareLoD("X", /*->*/ "Out"); + } +}; + +class FakeQuantizeAbsMaxOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "(Tensor) Input is float data type."); + AddOutput("Out", + "(Tensor) Output of quantized low level tensor, " + "but also saved as float data type."); + AddOutput("OutScale", "(Tensor) Current scale"); + AddAttr("bit_length", "(int, default 8)") + .SetDefault(8) + .AddCustomChecker([](const int &bit_length) { + PADDLE_ENFORCE(bit_length >= 1 && bit_length <= 16, + "'bit_length' should be between 1 and 16."); + }); + AddComment(R"DOC( +FakeQuantize operator + +$$scale = max(abs(X))$$ +$$range = 2^{bit_length - 1} - 1$$ +$$Out = round(X/scale * range)$$ + +)DOC"); + } +}; + class FakeQuantizeOp : public framework::OperatorWithKernel { public: FakeQuantizeOp(const std::string &type, diff --git a/paddle/fluid/operators/fake_quantize_op.h b/paddle/fluid/operators/fake_quantize_op.h index 80f71d85dde39f773cc489fb79effcc775c5010a..ce92f48ad3093e1526c07bf3067bd6000795e074 100644 --- a/paddle/fluid/operators/fake_quantize_op.h +++ b/paddle/fluid/operators/fake_quantize_op.h @@ -24,6 +24,131 @@ limitations under the License. */ namespace paddle { namespace operators { +template +class FakeQuantizeAbsMaxKernel : public framework::OpKernel { + public: + T FindAbsMax(framework::Tensor* in, int n) const { + T* p = in->mutable_data(platform::CPUPlace()); + T abs_max = (T)0.00000001; + for (int i = 0; i < n; i++) { + T tmp = fabs(p[i]); + if (tmp > abs_max) abs_max = tmp; + } + return T(abs_max); + } + T FindRangeAbsMax(framework::Tensor* scale_list, framework::Tensor* out_scale, + const T& cur_scale, int window_size, + int current_iter) const { + T* sl = scale_list->mutable_data(platform::CPUPlace()); + T remove_tmp = sl[current_iter]; + sl[current_iter] = cur_scale; + T& max_scale = out_scale->mutable_data(platform::CPUPlace())[0]; + if (max_scale < cur_scale) { + max_scale = cur_scale; + } else if (fabs(remove_tmp - max_scale) < 1e-6) { + int size = (current_iter > window_size) ? window_size : current_iter; + max_scale = T(FindAbsMax(scale_list, size)); + } + return max_scale; + } + + T FindMovingAverageAbsMmax(framework::Tensor* in_scale, + framework::Tensor* out_scale, + const T& cur_scale) const { + T* ins = in_scale->mutable_data(platform::CPUPlace()); + T* outs = out_scale->mutable_data(platform::CPUPlace()); + outs[0] = 0.9 * cur_scale + 0.1 * ins[0]; + return T(outs[0]); + } + + virtual void Compute(const framework::ExecutionContext& context) const { + auto* tensor = context.Output("Out"); + auto* in = context.Input("X"); + const bool is_test = context.Attr("is_test"); + tensor->mutable_data(in->place()); + + auto* oms_tensor = context.Output("OutMovingScale"); + oms_tensor->mutable_data(in->place()); + + auto quantize_type = + static_cast(context.Attr("quantize_type")); + if (quantize_type == std::string("range_abs_max")) { + auto* oss_tensor = context.Output("OutScales"); + oss_tensor->mutable_data( + context.Input("InScales")->place()); + auto* oci_tensor = context.Output("OutCurrentIter"); + oci_tensor->mutable_data( + context.Input("InCurrentIter")->place()); + } + + T scale = static_cast(1); + int window_size = context.Attr("window_size"); + int bit_length = context.Attr("bit_length"); + int bin_cnt = std::pow(2, bit_length - 1) - 1; + + auto& dev = + *context.template device_context().eigen_device(); + auto raw_in = framework::EigenVector::Flatten(*in); + if (quantize_type == std::string("abs_max")) { + auto* saving_scale = context.Output("OutMovingScale"); + auto scale_out = framework::EigenVector::Flatten(*saving_scale); + scale_out.device(dev) = raw_in.abs().maximum(); + scale = scale_out(0); + + auto& device_ctx = context.template device_context(); + auto* scale_list = context.Output("OutScales"); + math::SetConstant scalar; + scale_list->mutable_data(context.GetPlace()); + scalar(device_ctx, scale_list, static_cast(0)); + auto* iter = context.Output("OutCurrentIter"); + iter->mutable_data(context.GetPlace()); + scalar(device_ctx, iter, static_cast(0)); + } else if (quantize_type == std::string("range_abs_max")) { + auto* moving_scale = context.Input("InMovingScale"); + if (is_test) { + scale = moving_scale->data()[0]; + } else { + auto* it = context.Input("InCurrentIter"); + auto* iter = context.Output("OutCurrentIter"); + const int* last_iter = it->data(); + int* current_iter = iter->mutable_data(platform::CPUPlace()); + auto* scale_list = context.Output("OutScales"); + auto* saving_scale = + context.Output("OutMovingScale"); + auto scale_out = framework::EigenVector::Flatten(*saving_scale); + scale_out.device(dev) = raw_in.abs().maximum(); + scale = saving_scale->mutable_data(platform::CPUPlace())[0]; + scale = FindRangeAbsMax(scale_list, saving_scale, scale, window_size, + current_iter[0]); + saving_scale->mutable_data(platform::CPUPlace())[0] = scale; + (*current_iter) = (*last_iter) + 1; + } + } else if (quantize_type == std::string("moving_average_abs_max")) { + auto* moving_scale = context.Input("InMovingScale"); + if (is_test) { + scale = moving_scale->data()[0]; + } else { + auto* saving_scale = + context.Output("OutMovingScale"); + auto scale_out = framework::EigenVector::Flatten(*saving_scale); + scale_out.device(dev) = raw_in.abs().maximum(); + scale = saving_scale->mutable_data(platform::CPUPlace())[0]; + scale = FindMovingAverageAbsMmax( + const_cast(moving_scale), saving_scale, scale); + saving_scale->mutable_data(platform::CPUPlace())[0] = scale; + } + } + + Transform trans; + trans(context.template device_context(), in->data(), + in->data() + in->numel(), tensor->mutable_data(in->place()), + ClipFunctor(-scale, scale)); + auto eigen_out = framework::EigenVector::Flatten(*tensor); + auto eigen_in = framework::EigenVector::Flatten(*tensor); + eigen_out.device(dev) = (bin_cnt / scale * eigen_in).round(); + } +}; + using platform::Transform; template