From 81b4fad8b94d85a473875ae0efe634fe46697314 Mon Sep 17 00:00:00 2001 From: achao2013 Date: Fri, 15 Mar 2019 15:40:37 +0800 Subject: [PATCH] add moving average absmax op and fix bug (#15155) * Add moving average absmax op in quantilize-aware training. --- paddle/fluid/API.spec | 2 +- paddle/fluid/operators/fake_quantize_op.cc | 102 ++++++++++++++++++ paddle/fluid/operators/fake_quantize_op.cu | 38 +++++++ paddle/fluid/operators/fake_quantize_op.h | 59 +++++++++- .../contrib/quantize/quantize_transpiler.py | 84 +++++++++++++-- .../slim/quantization/quantization_pass.py | 86 ++++++++++++++- .../slim/tests/test_quantization_pass.py | 13 +++ .../tests/unittests/test_fake_quantize_op.py | 42 ++++++++ 8 files changed, 409 insertions(+), 17 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 6b25d6a14f5..fdd23681af5 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -367,7 +367,7 @@ paddle.fluid.contrib.BeamSearchDecoder.read_array (ArgSpec(args=['self', 'init', paddle.fluid.contrib.BeamSearchDecoder.update_array (ArgSpec(args=['self', 'array', 'value'], varargs=None, keywords=None, defaults=None), ('document', '5754e9b3212b7c09497151516a0de5a7')) paddle.fluid.contrib.memory_usage (ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', '8fcb2f93bb743693baa8d4860a5ccc47')) paddle.fluid.contrib.op_freq_statistic (ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None), ('document', '4d43687113c4bf5b29d15aee2f4e4afa')) -paddle.fluid.contrib.QuantizeTranspiler.__init__ (ArgSpec(args=['self', 'weight_bits', 'activation_bits', 'activation_quantize_type', 'weight_quantize_type', 'window_size'], varargs=None, keywords=None, defaults=(8, 8, 'abs_max', 'abs_max', 10000)), ('document', '14b39f1fcd5667ff556b1aad94357d1d')) +paddle.fluid.contrib.QuantizeTranspiler.__init__ (ArgSpec(args=['self', 'weight_bits', 'activation_bits', 'activation_quantize_type', 'weight_quantize_type', 'window_size', 'moving_rate'], varargs=None, keywords=None, defaults=(8, 8, 'abs_max', 'abs_max', 10000, 0.9)), ('document', '14b39f1fcd5667ff556b1aad94357d1d')) paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 (ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.QuantizeTranspiler.freeze_program (ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None)), ('document', '909675a1ab055c69b436a7893fcae4fd')) paddle.fluid.contrib.QuantizeTranspiler.training_transpile (ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6dd9909f10b283ba2892a99058a72884')) diff --git a/paddle/fluid/operators/fake_quantize_op.cc b/paddle/fluid/operators/fake_quantize_op.cc index 70186e5efa2..d51d51b4953 100644 --- a/paddle/fluid/operators/fake_quantize_op.cc +++ b/paddle/fluid/operators/fake_quantize_op.cc @@ -81,6 +81,30 @@ struct FindRangeAbsMaxFunctor { template struct FindRangeAbsMaxFunctor; +template +struct FindMovingAverageAbsMaxFunctor { + void operator()(const platform::CPUDeviceContext& ctx, + const framework::Tensor& in_accum, + const framework::Tensor& in_state, const T* cur_scale, + const float rate, framework::Tensor* out_state, + framework::Tensor* out_accum, framework::Tensor* out_scale) { + T accum = in_accum.data()[0]; + T state = in_state.data()[0]; + T scale = cur_scale[0]; + + state = rate * state + 1; + accum = rate * accum + scale; + scale = accum / state; + + out_state->mutable_data(ctx.GetPlace())[0] = state; + out_accum->mutable_data(ctx.GetPlace())[0] = accum; + out_scale->mutable_data(ctx.GetPlace())[0] = scale; + } +}; + +template struct FindMovingAverageAbsMaxFunctor; + class FakeQuantizeAbsMaxOp : public framework::OperatorWithKernel { public: FakeQuantizeAbsMaxOp(const std::string& type, @@ -255,6 +279,78 @@ $$Out = round(X/scale * range)$$ } }; +class FakeQuantizeMovingAverageAbsMaxOp : public framework::OperatorWithKernel { + public: + FakeQuantizeMovingAverageAbsMaxOp(const std::string& type, + const framework::VariableNameMap& inputs, + const framework::VariableNameMap& outputs, + const framework::AttributeMap& attrs) + : OperatorWithKernel(type, inputs, outputs, attrs) {} + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE( + ctx->HasInput("X"), + "Input(X) of FakeQuantizeMovingAverageAbsMaxOp should not be null."); + PADDLE_ENFORCE( + ctx->HasOutput("Out"), + "Output(Out) of FakeQuantizeMovingAverageAbsMaxOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("OutScale"), + "Output(OutScale) of FakeQuantizeMovingAverageAbsMaxOp " + "should not be null"); + if (ctx->HasOutput("OutState")) { + ctx->SetOutputDim("OutState", {1}); + } + if (ctx->HasOutput("OutAccum")) { + ctx->SetOutputDim("OutAccum", {1}); + } + ctx->SetOutputDim("Out", ctx->GetInputDim("X")); + ctx->SetOutputDim("OutScale", {1}); + ctx->ShareLoD("X", /*->*/ "Out"); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(ctx.Input("X")->type(), + ctx.device_context()); + } +}; + +class FakeQuantizeMovingAverageAbsMaxOpMaker + : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "(Tensor) Input is float data type."); + AddInput("InScale", "Last scale."); + AddInput("InAccum", "Last accum.").AsDispensable(); + AddInput("InState", "Last state.").AsDispensable(); + AddOutput("Out", "(Tensor) Output of quantized low level tensor."); + AddOutput("OutScale", " Current scale"); + AddOutput("OutState", "(Tensor) state buffer.").AsDispensable(); + AddOutput("OutAccum", "(Tensor) accum buffer.").AsDispensable(); + AddAttr("moving_rate", "(float, default 0.9) moving rate.") + .SetDefault(0.9); + AddAttr("bit_length", "(int, default 8), quantization bit number.") + .SetDefault(8) + .AddCustomChecker([](const int& bit_length) { + PADDLE_ENFORCE(bit_length >= 1 && bit_length <= 16, + "'bit_length' should be between 1 and 16."); + }); + AddAttr("is_test", + "(bool, default false) Set to true for inference only, false " + "for training. Some layers may run faster when this is true.") + .SetDefault(false); + AddComment(R"DOC( +FakeQuantize operator is used in static quantization. + +$$scale = (0.9*max(abs(x))+accum)/(0.9*state+1)$$ +$$range = 2^{bit_length - 1} - 1$$ +$$Out = round(X/scale * range)$$ + +)DOC"); + } +}; + } // namespace operators } // namespace paddle @@ -273,6 +369,12 @@ REGISTER_OPERATOR(fake_quantize_range_abs_max, ops::FakeQuantizeRangeAbsMaxOp, REGISTER_OP_CPU_KERNEL(fake_quantize_range_abs_max, ops::FakeQuantizeRangeAbsMaxKernel); +REGISTER_OPERATOR(fake_quantize_moving_average_abs_max, + ops::FakeQuantizeMovingAverageAbsMaxOp, + ops::FakeQuantizeMovingAverageAbsMaxOpMaker, + paddle::framework::EmptyGradOpMaker); +REGISTER_OP_CPU_KERNEL(fake_quantize_moving_average_abs_max, + ops::FakeQuantizeMovingAverageAbsMaxKernel); REGISTER_OPERATOR(fake_channel_wise_quantize_abs_max, ops::FakeChannelWiseQuantizeAbsMaxOp, ops::FakeChannelWiseQuantizeAbsMaxOpMaker, diff --git a/paddle/fluid/operators/fake_quantize_op.cu b/paddle/fluid/operators/fake_quantize_op.cu index 5da16a7c731..3707f6772ea 100644 --- a/paddle/fluid/operators/fake_quantize_op.cu +++ b/paddle/fluid/operators/fake_quantize_op.cu @@ -147,6 +147,41 @@ struct FindRangeAbsMaxFunctor { template struct FindRangeAbsMaxFunctor; +template +struct FindMovingAverageAbsMaxFunctor { + void operator()(const platform::CUDADeviceContext& ctx, + const framework::Tensor& in_accum, + const framework::Tensor& in_state, const T* cur_scale, + const float rate, framework::Tensor* out_state, + framework::Tensor* out_accum, framework::Tensor* out_scale) { + const auto gpu_place = boost::get(ctx.GetPlace()); + + T accum; + memory::Copy(platform::CPUPlace(), &accum, gpu_place, in_accum.data(), + sizeof(T), 0); + T state; + memory::Copy(platform::CPUPlace(), &state, gpu_place, in_state.data(), + sizeof(T), 0); + T scale; + memory::Copy(platform::CPUPlace(), &scale, gpu_place, cur_scale, sizeof(T), + 0); + + state = rate * state + 1; + accum = rate * accum + scale; + scale = accum / state; + + memory::Copy(gpu_place, out_accum->mutable_data(gpu_place), + platform::CPUPlace(), &accum, sizeof(T), 0); + memory::Copy(gpu_place, out_state->mutable_data(gpu_place), + platform::CPUPlace(), &state, sizeof(T), 0); + memory::Copy(gpu_place, out_scale->mutable_data(gpu_place), + platform::CPUPlace(), &scale, sizeof(T), 0); + } +}; + +template struct FindMovingAverageAbsMaxFunctor; + template struct ClipAndFakeQuantFunctor { void operator()(const platform::CUDADeviceContext& ctx, @@ -178,3 +213,6 @@ REGISTER_OP_CUDA_KERNEL(fake_channel_wise_quantize_abs_max, ops::FakeChannelWiseQuantizeAbsMaxKernel); REGISTER_OP_CUDA_KERNEL(fake_quantize_range_abs_max, ops::FakeQuantizeRangeAbsMaxKernel); +REGISTER_OP_CUDA_KERNEL( + fake_quantize_moving_average_abs_max, + ops::FakeQuantizeMovingAverageAbsMaxKernel); diff --git a/paddle/fluid/operators/fake_quantize_op.h b/paddle/fluid/operators/fake_quantize_op.h index 8b47600e7d9..ec667e89e76 100644 --- a/paddle/fluid/operators/fake_quantize_op.h +++ b/paddle/fluid/operators/fake_quantize_op.h @@ -42,12 +42,20 @@ struct FindRangeAbsMaxFunctor { framework::Tensor* scales_arr, framework::Tensor* out_scale); }; +template +struct FindMovingAverageAbsMaxFunctor { + void operator()(const DeviceContext& ctx, const framework::Tensor& in_accum, + const framework::Tensor& in_state, + const framework::Tensor& cur_scale, + framework::Tensor* out_state, framework::Tensor* out_accum, + framework::Tensor* out_scale); +}; + template class FakeQuantizeAbsMaxKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* in = context.Input("X"); - auto* out = context.Output("Out"); auto* out_scale = context.Output("OutScale"); T* out_s = out_scale->mutable_data(context.GetPlace()); @@ -138,5 +146,54 @@ class FakeQuantizeRangeAbsMaxKernel : public framework::OpKernel { } }; +template +class FakeQuantizeMovingAverageAbsMaxKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* in = context.Input("X"); + auto* in_scale = context.Input("InScale"); + auto* out = context.Output("Out"); + out->mutable_data(context.GetPlace()); + + bool is_test = context.Attr("is_test"); + int bit_length = context.Attr("bit_length"); + int bin_cnt = std::pow(2, bit_length - 1) - 1; + auto& dev_ctx = context.template device_context(); + + // testing + if (is_test) { + ClipAndFakeQuantFunctor()(dev_ctx, *in, *in_scale, + bin_cnt, out); + return; + } + + // training + auto* in_accum = context.Input("InAccum"); + auto* in_state = context.Input("InState"); + auto& allocator = + platform::DeviceTemporaryAllocator::Instance().Get(dev_ctx); + auto cur_scale = allocator.Allocate(1 * sizeof(T)); + T* cur_scale_data = static_cast(cur_scale->ptr()); + + FindAbsMaxFunctor()(dev_ctx, in->data(), in->numel(), + cur_scale_data); + + auto* out_state = context.Output("OutState"); + auto* out_accum = context.Output("OutAccum"); + auto* out_scale = context.Output("OutScale"); + out_state->mutable_data(context.GetPlace()); + out_accum->mutable_data(context.GetPlace()); + out_scale->mutable_data(context.GetPlace()); + float moving_rate = context.Attr("moving_rate"); + + FindMovingAverageAbsMaxFunctor()( + dev_ctx, *in_accum, *in_state, cur_scale_data, moving_rate, out_state, + out_accum, out_scale); + + ClipAndFakeQuantFunctor()(dev_ctx, *in, *out_scale, + bin_cnt, out); + } +}; + } // namespace operators } // namespace paddle diff --git a/python/paddle/fluid/contrib/quantize/quantize_transpiler.py b/python/paddle/fluid/contrib/quantize/quantize_transpiler.py index 032d0353ea6..8eddf18cece 100644 --- a/python/paddle/fluid/contrib/quantize/quantize_transpiler.py +++ b/python/paddle/fluid/contrib/quantize/quantize_transpiler.py @@ -84,7 +84,8 @@ class QuantizeTranspiler(object): activation_bits=8, activation_quantize_type='abs_max', weight_quantize_type='abs_max', - window_size=10000): + window_size=10000, + moving_rate=0.9): """ Convert and rewrite the fluid Program according to weight and activation quantization type. @@ -117,23 +118,27 @@ class QuantizeTranspiler(object): """ self.weight_bits = weight_bits self.activation_bits = activation_bits - quant_type = ['abs_max', 'range_abs_max'] + quant_type = ['abs_max', 'range_abs_max', 'moving_average_abs_max'] if weight_quantize_type not in quant_type: raise ValueError( "Unknown weight_quantize_type: '%s'. It can only be ", - "'abs_max' or 'range_abs_max'.", str(weight_quantize_type)) + "'abs_max' or 'range_abs_max' or 'moving_average_abs_max'.", + str(weight_quantize_type)) if activation_quantize_type not in quant_type: raise ValueError( "Unknown activation_quantize_type : '%s'. It can only be ", - "'abs_max' or 'range_abs_max'.", str(activation_quantize_type)) + "'abs_max' or 'range_abs_max' or 'moving_average_abs_max'.", + str(activation_quantize_type)) self.weight_quantize_type = weight_quantize_type self.activation_quantize_type = activation_quantize_type self.window_size = window_size + self.moving_rate = moving_rate self.helper = LayerHelper(self.__class__.__name__) self.fake_quant_op_types = [ - 'fake_quantize_abs_max', 'fake_quantize_range_abs_max' + 'fake_quantize_abs_max', 'fake_quantize_range_abs_max', + 'fake_quantize_moving_average_abs_max' ] self.fake_dequant_op_types = ['fake_dequantize_max_abs'] self.is_test = None @@ -168,6 +173,7 @@ class QuantizeTranspiler(object): block_id = block.idx # insert quant op and dequant op for name in op.input_arg_names: + #if share input between ops if name in dequanted_vars[block_id]: dequant_var = dequanted_vars[block_id][name] else: @@ -261,6 +267,7 @@ class QuantizeTranspiler(object): max_range = None scale_var = None for name in op.input_arg_names: + #rename input name of the op to the input name of last op which has be removed if name in op_in_rename_map[block_id]: op._rename_input(name, op_in_rename_map[block_id][name]) @@ -272,8 +279,7 @@ class QuantizeTranspiler(object): max_range = param_range * act_range / scale_v else: assert isinstance(scale_v, Variable) - scale_var = var_scale_map[block_id][_original_var_name( - name)] + scale_var = scale_v if len(op.output_arg_names) != 1: raise ValueError("Only support one output, but op %s has" @@ -309,7 +315,7 @@ class QuantizeTranspiler(object): op_type = op.type # insert dequant_op after fc/conv, need to rename - # input of the followed ops + # input of the followed ops(of fc/conv) to the dquant_op for name in op.input_arg_names: if name in op_out_rename_map[block_id]: op._rename_input(name, @@ -389,8 +395,8 @@ class QuantizeTranspiler(object): for op in block.ops: args += op.input_arg_names args += op.output_arg_names - args = list(set(args)) - var_names = block.vars.keys() + args = list(set(args)) #vals of all left ops + var_names = block.vars.keys() # all vals sub_block_remove_vars = [] for var in var_names: if var not in args: @@ -471,6 +477,61 @@ class QuantizeTranspiler(object): return quant_var, scale + def _insert_quant_moving_average_abs_max_op(self, block, idx, var, + quant_bits): + """Insert fake_quantize_moving_average_abs_max + """ + quant_var = block.create_var( + name=_quantized_var_name(var.name), + type=var.type, + shape=var.shape, + dtype=var.dtype) + state = self.helper.create_global_variable( + name=unique_name.generate('state'), + persistable=True, + dtype=var.dtype, + shape=[1]) + self.helper.set_variable_initializer( + state, initializer=Constant(value=1)) + accum = self.helper.create_global_variable( + name=unique_name.generate('accum'), + persistable=True, + dtype=var.dtype, + shape=[1]) + self.helper.set_variable_initializer( + accum, initializer=Constant(value=1)) + scale = self.helper.create_parameter( + attr=ParamAttr( + name=_quantized_scale_name(var.name), + initializer=Constant(0.001), + trainable=False), + shape=[1], + dtype=var.dtype) + scale.stop_gradient = True + + ins = {'X': var, 'InScale': scale} + outs = {'Out': quant_var, 'OutScale': scale} + if not self.is_test: + ins['InState'] = state + ins['InAccum'] = accum + outs['OutState'] = state + outs['OutAccum'] = accum + + attrs = { + 'bit_length': quant_bits, + 'moving_rate': self.moving_rate, + 'is_test': self.is_test + } + + quant_op = block._insert_op( + idx, + type='fake_quantize_moving_average_abs_max', + attrs=attrs, + inputs=ins, + outputs=outs) + + return quant_var, scale + def _insert_quant_op(self, block, idx, var, quant_bits, quant_type): """ Insert fake_quantize_op @@ -480,6 +541,9 @@ class QuantizeTranspiler(object): elif quant_type == 'range_abs_max': return self._insert_quant_range_abs_max_op(block, idx, var, quant_bits) + elif quant_type == 'moving_average_abs_max': + return self._insert_quant_moving_average_abs_max_op(block, idx, var, + quant_bits) def _insert_dequant_op(self, block, idx, var, scale, quant_bits): """ diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py index 622add48430..919db4c78e5 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py +++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py @@ -38,7 +38,8 @@ class QuantizationTransformPass(object): activation_bits=8, activation_quantize_type='abs_max', weight_quantize_type='abs_max', - window_size=10000): + window_size=10000, + moving_rate=0.9): """ Convert and rewrite the IrGraph according to weight and activation quantization type. @@ -83,19 +84,22 @@ class QuantizationTransformPass(object): self._weight_bits = weight_bits self._activation_bits = activation_bits - quant_type = ['abs_max', 'range_abs_max'] + quant_type = ['abs_max', 'range_abs_max', 'moving_average_abs_max'] if activation_quantize_type not in quant_type: raise ValueError( "Unknown activation_quantize_type : '%s'. It can only be ", - "'abs_max' or 'range_abs_max'.", str(activation_quantize_type)) + "'abs_max' or 'range_abs_max' or 'moving_average_abs_max'.", + str(activation_quantize_type)) if weight_quantize_type not in quant_type: raise ValueError( "Unknown weight_quantize_type: '%s'. It can only be ", - "'abs_max' or 'range_abs_max'.", str(weight_quantize_type)) + "'abs_max' or 'range_abs_max' or 'moving_average_abs_max'.", + str(weight_quantize_type)) self._activation_quantize_type = activation_quantize_type self._weight_quantize_type = weight_quantize_type self._window_size = window_size + self._moving_rate = moving_rate self._need_initialized = collections.OrderedDict() self._quantizable_ops = ['conv2d', 'depthwise_conv2d', 'mul'] @@ -222,6 +226,9 @@ class QuantizationTransformPass(object): elif quant_type == 'range_abs_max': return self._insert_quant_range_abs_max_op(graph, var_node, quant_bits) + elif quant_type == 'moving_average_abs_max': + return self._insert_quant_moving_average_abs_max_op(graph, var_node, + quant_bits) def _insert_quant_abs_max_op(self, graph, var_node, quant_bits): """ @@ -309,6 +316,74 @@ class QuantizationTransformPass(object): return quant_var_node, scale_out_node + def _insert_quant_moving_average_abs_max_op(self, graph, var_node, + quant_bits): + """Insert fake_quantize_moving_average_abs_max + """ + quant_var_node = graph.create_var_node( + name=self._quantized_var_name(var_node.name()), + var_type=var_node.type(), + shape=var_node.shape(), + var_dtype=var_node.dtype()) + scale_in_node = graph.create_persistable_node( + name=self._quantized_scale_name(var_node.name()), + var_type=core.VarDesc.VarType.LOD_TENSOR, + shape=[1], + var_dtype=var_node.dtype()) + self._need_initialized[scale_in_node.var()] = Constant(value=0.001) + + scale_out_node = graph.create_var_node_from_desc(scale_in_node.var()) + ins = {'X': var_node, 'InScale': scale_in_node} + outs = {'Out': quant_var_node, 'OutScale': scale_out_node} + if not self._is_test: + state_in_node = graph.create_persistable_node( + name=unique_name.generate('state'), + var_type=core.VarDesc.VarType.LOD_TENSOR, + var_dtype=var_node.dtype(), + shape=[1]) + self._need_initialized[state_in_node.var()] = Constant(value=1) + accum_in_node = graph.create_persistable_node( + name=unique_name.generate('accum'), + var_type=core.VarDesc.VarType.LOD_TENSOR, + var_dtype=var_node.dtype(), + shape=[1]) + self._need_initialized[accum_in_node.var()] = Constant(value=1) + state_out_node = graph.create_var_node_from_desc(state_in_node.var( + )) + accum_out_node = graph.create_var_node_from_desc(accum_in_node.var( + )) + + ins['InState'] = state_in_node + ins['InAccum'] = accum_in_node + outs['OutState'] = state_out_node + outs['OutAccum'] = accum_out_node + + attrs = { + 'bit_length': quant_bits, + 'moving_rate': self._moving_rate, + 'is_test': self._is_test, + 'op_role': core.op_proto_and_checker_maker.OpRole.Forward + } + + quant_op_node = graph.create_op_node( + op_type='fake_quantize_moving_average_abs_max', + attrs=attrs, + inputs=ins, + outputs=outs) + + graph.link_to(var_node, quant_op_node) + graph.link_to(scale_in_node, quant_op_node) + graph.link_to(quant_op_node, quant_var_node) + graph.link_to(quant_op_node, scale_out_node) + + if not self._is_test: + graph.link_to(state_in_node, quant_op_node) + graph.link_to(accum_in_node, quant_op_node) + graph.link_to(quant_op_node, state_out_node) + graph.link_to(quant_op_node, accum_out_node) + + return quant_var_node, scale_out_node + def _insert_dequant_op(self, graph, var_node, scale_var_node, quant_bits): """ Insert fake_dequantize_op in the graph. @@ -389,7 +464,8 @@ class QuantizationFreezePass(object): self._weight_quantize_type = weight_quantize_type self._quantizable_ops = ['conv2d', 'depthwise_conv2d', 'mul'] self._fake_quant_op_names = [ - 'fake_quantize_abs_max', 'fake_quantize_range_abs_max' + 'fake_quantize_abs_max', 'fake_quantize_range_abs_max', + 'fake_quantize_moving_average_abs_max' ] self._fake_dequant_op_names = ['fake_dequantize_max_abs'] self._op_input_rename_map = collections.OrderedDict() diff --git a/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py b/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py index c6a301b7f41..0b4b2a285f5 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py +++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py @@ -164,6 +164,9 @@ class TestQuantizationTransformPass(unittest.TestCase): def test_linear_fc_quant_range_abs_max(self): self.linear_fc_quant('range_abs_max', for_ci=True) + def test_linear_fc_quant_moving_average_abs_max(self): + self.linear_fc_quant('moving_average_abs_max', for_ci=True) + def residual_block_quant(self, quant_type, for_ci=False): main = fluid.Program() startup = fluid.Program() @@ -201,6 +204,9 @@ class TestQuantizationTransformPass(unittest.TestCase): def test_residual_block_range_abs_max(self): self.residual_block_quant('range_abs_max', for_ci=True) + def test_residual_block_moving_average_abs_max(self): + self.residual_block_quant('moving_average_abs_max', for_ci=True) + class TestQuantizationFreezePass(unittest.TestCase): def freeze_graph(self, use_cuda, seed, quant_type, for_ci=False): @@ -380,11 +386,18 @@ class TestQuantizationFreezePass(unittest.TestCase): with fluid.unique_name.guard(): self.freeze_graph( True, seed=1, quant_type='range_abs_max', for_ci=True) + self.freeze_graph( + True, + seed=1, + quant_type='moving_average_abs_max', + for_ci=True) def test_freeze_graph_cpu_static(self): with fluid.unique_name.guard(): self.freeze_graph( False, seed=2, quant_type='range_abs_max', for_ci=True) + self.freeze_graph( + False, seed=2, quant_type='moving_average_abs_max', for_ci=True) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py b/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py index 90a90112bd5..cf8f01edb9a 100644 --- a/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py +++ b/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np from op_test import OpTest +import paddle.fluid.core as core class TestFakeQuantizeOp(OpTest): @@ -75,6 +76,7 @@ class TestFakeQuantizeRangeAbsMaxOp(OpTest): 'InScale': np.zeros(1).astype("float32") } scale = np.max(np.abs(self.inputs['X'])).astype("float32") + out_scales = np.zeros(self.attrs['window_size']).astype("float32") out_scales[0] = scale self.outputs = { @@ -88,6 +90,46 @@ class TestFakeQuantizeRangeAbsMaxOp(OpTest): self.check_output() +class TestFakeQuantizeMovingOp(OpTest): + def setUp(self): + self.op_type = "fake_quantize_moving_average_abs_max" + self.attrs = { + 'bit_length': int(5), + 'moving_rate': float(0.9), + 'is_test': False + } + accum = np.zeros(1).astype("float32") + accum[0] = 1 + state = np.zeros(1).astype("float32") + state[0] = 1 + scale = np.zeros(1).astype("float32") + scale[0] = 0.001 + self.inputs = { + 'X': np.random.random((8, 16, 7, 7)).astype("float32"), + 'InScale': scale, + 'InAccum': accum, + 'InState': state, + } + + out_accum = np.zeros(1).astype("float32") + out_state = np.zeros(1).astype("float32") + out_scale = np.zeros(1).astype("float32") + out_accum[0] = self.attrs['moving_rate'] * accum[0] + np.max( + np.abs(self.inputs['X'])).astype("float32") + out_state[0] = self.attrs['moving_rate'] * state[0] + 1 + out_scale = out_accum / out_state + self.outputs = { + 'Out': np.round(self.inputs['X'] / out_scale * ( + (1 << (self.attrs['bit_length'] - 1)) - 1)), + 'OutAccum': out_accum, + 'OutState': out_state, + 'OutScale': out_scale, + } + + def test_check_output(self): + self.check_output() + + class TestFakeQuantizeRangeAbsMaxOp2(OpTest): def setUp(self): self.op_type = "fake_quantize_range_abs_max" -- GitLab