diff --git a/python/paddle/optimizer/optimizer.py b/python/paddle/optimizer/optimizer.py index c198c436b23e06548f1efd25f793e694ff8a8b06..c4ae555d4c529ed3dfbbd002ce239bdd3e8c6fe3 100644 --- a/python/paddle/optimizer/optimizer.py +++ b/python/paddle/optimizer/optimizer.py @@ -125,40 +125,40 @@ class Optimizer: Examples: .. code-block:: python - #Take the subclass adam as an example - import paddle - linear = paddle.nn.Linear(10, 10) - inp = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1) - out = linear(inp) - loss = paddle.mean(out) - adam = paddle.optimizer.Adam(learning_rate=0.1, - parameters=linear.parameters()) - loss.backward() - adam.step() - adam.clear_grad() - - #Take the subclass sgd as an example - #optimize parameters in linear_1 and linear2 in different options. - #Note that the learning_rate of linear_2 is 0.01. - linear_1 = paddle.nn.Linear(10, 10) - linear_2 = paddle.nn.Linear(10, 10) - inp = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1) - out = linear_1(inp) - out = linear_2(out) - loss = paddle.mean(out) - sgd = paddle.optimizer.SGD( - learning_rate=0.1, - parameters=[{ - 'params': linear_1.parameters() - }, { - 'params': linear_2.parameters(), - 'weight_decay': 0.001, - 'learning_rate': 0.1 - }], - weight_decay=0.01) - loss.backward() - sgd.step() - sgd.clear_grad() + >>> # Take the subclass adam as an example + >>> import paddle + >>> linear = paddle.nn.Linear(10, 10) + >>> inp = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1) + >>> out = linear(inp) + >>> loss = paddle.mean(out) + >>> adam = paddle.optimizer.Adam(learning_rate=0.1, + ... parameters=linear.parameters()) + >>> loss.backward() + >>> adam.step() + >>> adam.clear_grad() + + >>> #Take the subclass sgd as an example + >>> #optimize parameters in linear_1 and linear2 in different options. + >>> #Note that the learning_rate of linear_2 is 0.01. + >>> linear_1 = paddle.nn.Linear(10, 10) + >>> linear_2 = paddle.nn.Linear(10, 10) + >>> inp = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1) + >>> out = linear_1(inp) + >>> out = linear_2(out) + >>> loss = paddle.mean(out) + >>> sgd = paddle.optimizer.SGD( + ... learning_rate=0.1, + ... parameters=[{ + ... 'params': linear_1.parameters() + ... }, { + ... 'params': linear_2.parameters(), + ... 'weight_decay': 0.001, + ... 'learning_rate': 0.1 + ... }], + ... weight_decay=0.01) + >>> loss.backward() + >>> sgd.step() + >>> sgd.clear_grad() """ @@ -343,23 +343,23 @@ class Optimizer: Examples: .. code-block:: python - import paddle + >>> import paddle - emb = paddle.nn.Embedding(10, 10) + >>> emb = paddle.nn.Embedding(10, 10) - layer_state_dict = emb.state_dict() - paddle.save(layer_state_dict, "emb.pdparams") + >>> layer_state_dict = emb.state_dict() + >>> paddle.save(layer_state_dict, "emb.pdparams") - scheduler = paddle.optimizer.lr.NoamDecay( - d_model=0.01, warmup_steps=100, verbose=True) - adam = paddle.optimizer.Adam( - learning_rate=scheduler, - parameters=emb.parameters()) - opt_state_dict = adam.state_dict() - paddle.save(opt_state_dict, "adam.pdopt") + >>> scheduler = paddle.optimizer.lr.NoamDecay( + ... d_model=0.01, warmup_steps=100, verbose=True) + >>> adam = paddle.optimizer.Adam( + ... learning_rate=scheduler, + ... parameters=emb.parameters()) + >>> opt_state_dict = adam.state_dict() + >>> paddle.save(opt_state_dict, "adam.pdopt") - opti_state_dict = paddle.load("adam.pdopt") - adam.set_state_dict(opti_state_dict) + >>> opti_state_dict = paddle.load("adam.pdopt") + >>> adam.set_state_dict(opti_state_dict) ''' if isinstance(self._learning_rate, LRScheduler): @@ -500,23 +500,22 @@ class Optimizer: Examples: .. code-block:: python - import paddle - linear = paddle.nn.Linear(10, 10) - - adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters()) - - # set learning rate manually by python float value - lr_list = [0.2, 0.3, 0.4, 0.5, 0.6] - for i in range(5): - adam.set_lr(lr_list[i]) - lr = adam.get_lr() - print("current lr is {}".format(lr)) - # Print: - # current lr is 0.2 - # current lr is 0.3 - # current lr is 0.4 - # current lr is 0.5 - # current lr is 0.6 + >>> import paddle + >>> linear = paddle.nn.Linear(10, 10) + + >>> adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters()) + + >>> # set learning rate manually by python float value + >>> lr_list = [0.2, 0.3, 0.4, 0.5, 0.6] + >>> for i in range(5): + ... adam.set_lr(lr_list[i]) + ... lr = adam.get_lr() + ... print("current lr is {}".format(lr)) + current lr is 0.2 + current lr is 0.3 + current lr is 0.4 + current lr is 0.5 + current lr is 0.6 """ if not isinstance(value, (int, float)): @@ -570,24 +569,24 @@ class Optimizer: Examples: .. code-block:: python - import paddle - linear = paddle.nn.Linear(10, 10) + >>> import paddle + >>> linear = paddle.nn.Linear(10, 10) - adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters()) + >>> adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters()) - # set learning rate manually by class LRScheduler - scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2,4,6], gamma=0.8) - adam.set_lr_scheduler(scheduler) - lr = adam.get_lr() - print("current lr is {}".format(lr)) - # current lr is 0.5 + >>> # set learning rate manually by class LRScheduler + >>> scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2,4,6], gamma=0.8) + >>> adam.set_lr_scheduler(scheduler) + >>> lr = adam.get_lr() + >>> print("current lr is {}".format(lr)) + current lr is 0.5 - # set learning rate manually by another LRScheduler - scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.1, step_size=5, gamma=0.6) - adam.set_lr_scheduler(scheduler) - lr = adam.get_lr() - print("current lr is {}".format(lr)) - # current lr is 0.1 + >>> # set learning rate manually by another LRScheduler + >>> scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.1, step_size=5, gamma=0.6) + >>> adam.set_lr_scheduler(scheduler) + >>> lr = adam.get_lr() + >>> print("current lr is {}".format(lr)) + current lr is 0.1 """ from paddle.optimizer.lr import LRScheduler @@ -611,50 +610,79 @@ class Optimizer: Examples: .. code-block:: python - # train on default dynamic graph mode - import paddle - import numpy as np - emb = paddle.nn.Embedding(10, 3) - - ## example1: LRScheduler is not used, return the same value is all the same - adam = paddle.optimizer.Adam(0.01, parameters = emb.parameters()) - for batch in range(10): - input = paddle.randint(low=0, high=5, shape=[5]) - out = emb(input) - out.backward() - print("Learning rate of step{}: {}".format(batch, adam.get_lr())) # 0.01 - adam.step() - - ## example2: StepDecay is used, return the scheduled learning rate - scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=2, gamma=0.1) - adam = paddle.optimizer.Adam(scheduler, parameters = emb.parameters()) - for batch in range(10): - input = paddle.randint(low=0, high=5, shape=[5]) - out = emb(input) - out.backward() - print("Learning rate of step{}: {}".format(batch, adam.get_lr())) # 0.5->0.05... - adam.step() - scheduler.step() - - # train on static graph mode - paddle.enable_static() - main_prog = paddle.static.Program() - start_prog = paddle.static.Program() - with paddle.static.program_guard(main_prog, start_prog): - x = paddle.static.data(name='x', shape=[None, 10]) - z = paddle.static.nn.fc(x, 100) - loss = paddle.mean(z) - scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=2, gamma=0.1) - adam = paddle.optimizer.Adam(learning_rate=scheduler) - adam.minimize(loss) - - exe = paddle.static.Executor() - exe.run(start_prog) - for batch in range(10): - print("Learning rate of step{}: {}", adam.get_lr()) # 0.5->0.05->0.005... - out = exe.run(main_prog, feed={'x': np.random.randn(3, 10).astype('float32')}) - scheduler.step() - + >>> # train on default dynamic graph mode + >>> import paddle + >>> import numpy as np + >>> emb = paddle.nn.Embedding(10, 3) + + >>> ## example1: LRScheduler is not used, return the same value is all the same + >>> adam = paddle.optimizer.Adam(0.01, parameters = emb.parameters()) + >>> for batch in range(10): + ... input = paddle.randint(low=0, high=5, shape=[5]) + ... out = emb(input) + ... out.backward() + ... print("Learning rate of step{}: {}".format(batch, adam.get_lr())) # 0.01 + ... adam.step() + Learning rate of step0: 0.01 + Learning rate of step1: 0.01 + Learning rate of step2: 0.01 + Learning rate of step3: 0.01 + Learning rate of step4: 0.01 + Learning rate of step5: 0.01 + Learning rate of step6: 0.01 + Learning rate of step7: 0.01 + Learning rate of step8: 0.01 + Learning rate of step9: 0.01 + + >>> ## example2: StepDecay is used, return the scheduled learning rate + >>> scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=2, gamma=0.1) + >>> adam = paddle.optimizer.Adam(scheduler, parameters = emb.parameters()) + >>> for batch in range(10): + ... input = paddle.randint(low=0, high=5, shape=[5]) + ... out = emb(input) + ... out.backward() + ... print("Learning rate of step{}: {}".format(batch, adam.get_lr())) # 0.5->0.05... + ... adam.step() + ... scheduler.step() + Learning rate of step0: 0.5 + Learning rate of step1: 0.5 + Learning rate of step2: 0.05 + Learning rate of step3: 0.05 + Learning rate of step4: 0.005000000000000001 + Learning rate of step5: 0.005000000000000001 + Learning rate of step6: 0.0005000000000000001 + Learning rate of step7: 0.0005000000000000001 + Learning rate of step8: 5.000000000000001e-05 + Learning rate of step9: 5.000000000000001e-05 + + >>> # train on static graph mode + >>> paddle.enable_static() + >>> main_prog = paddle.static.Program() + >>> start_prog = paddle.static.Program() + >>> with paddle.static.program_guard(main_prog, start_prog): + ... x = paddle.static.data(name='x', shape=[None, 10]) + ... z = paddle.static.nn.fc(x, 100) + ... loss = paddle.mean(z) + ... scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=2, gamma=0.1) + ... adam = paddle.optimizer.Adam(learning_rate=scheduler) + ... adam.minimize(loss) + + >>> exe = paddle.static.Executor() + >>> exe.run(start_prog) + >>> for batch in range(10): + ... print("Learning rate of step{}: {}".format(batch, adam.get_lr())) # 0.5->0.05->0.005... + ... out = exe.run(main_prog, feed={'x': np.random.randn(3, 10).astype('float32')}) + ... scheduler.step() + Learning rate of step0: 0.5 + Learning rate of step1: 0.5 + Learning rate of step2: 0.05 + Learning rate of step3: 0.05 + Learning rate of step4: 0.005000000000000001 + Learning rate of step5: 0.005000000000000001 + Learning rate of step6: 0.0005000000000000001 + Learning rate of step7: 0.0005000000000000001 + Learning rate of step8: 5.000000000000001e-05 + Learning rate of step9: 5.000000000000001e-05 """ if isinstance(self._learning_rate, float): return self._learning_rate @@ -1146,17 +1174,17 @@ class Optimizer: Examples: .. code-block:: python - import paddle - x = paddle.arange(26, dtype="float32").reshape([2, 13]) - - linear = paddle.nn.Linear(13, 5) - # This can be any optimizer supported by dygraph. - adam = paddle.optimizer.Adam(learning_rate = 0.01, - parameters = linear.parameters()) - out = linear(x) - out.backward() - adam.step() - adam.clear_grad() + >>> import paddle + >>> x = paddle.arange(26, dtype="float32").reshape([2, 13]) + + >>> linear = paddle.nn.Linear(13, 5) + >>> # This can be any optimizer supported by dygraph. + >>> adam = paddle.optimizer.Adam(learning_rate = 0.01, + ... parameters = linear.parameters()) + >>> out = linear(x) + >>> out.backward() + >>> adam.step() + >>> adam.clear_grad() """ act_no_grad_set = None if framework.in_dygraph_mode(): @@ -1218,16 +1246,16 @@ class Optimizer: Examples: .. code-block:: python - import paddle + >>> import paddle - inp = paddle.uniform([10, 10], dtype="float32", min=-0.1, max=0.1) - linear = paddle.nn.Linear(10, 10) - out = linear(inp) - loss = paddle.mean(out) - optimizer = paddle.optimizer.Adam(learning_rate=0.1, - parameters=linear.parameters()) - params_grads = optimizer.backward(loss) - optimizer.apply_gradients(params_grads) + >>> inp = paddle.uniform([10, 10], dtype="float32", min=-0.1, max=0.1) + >>> linear = paddle.nn.Linear(10, 10) + >>> out = linear(inp) + >>> loss = paddle.mean(out) + >>> optimizer = paddle.optimizer.Adam(learning_rate=0.1, + ... parameters=linear.parameters()) + >>> params_grads = optimizer.backward(loss) + >>> optimizer.apply_gradients(params_grads) """ @@ -1436,17 +1464,17 @@ class Optimizer: Examples: .. code-block:: python - import paddle + >>> import paddle - a = paddle.arange(26, dtype="float32").reshape([2, 13]) - linear = paddle.nn.Linear(13, 5) - # This can be any optimizer supported by dygraph. - adam = paddle.optimizer.Adam(learning_rate = 0.01, - parameters = linear.parameters()) - out = linear(a) - out.backward() - adam.step() - adam.clear_grad() + >>> a = paddle.arange(26, dtype="float32").reshape([2, 13]) + >>> linear = paddle.nn.Linear(13, 5) + >>> # This can be any optimizer supported by dygraph. + >>> adam = paddle.optimizer.Adam(learning_rate = 0.01, + ... parameters = linear.parameters()) + >>> out = linear(a) + >>> out.backward() + >>> adam.step() + >>> adam.clear_grad() """ param_list = [] @@ -1494,21 +1522,21 @@ class Optimizer: Examples: .. code-block:: python - import paddle - linear = paddle.nn.Linear(10, 10) - input = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1) - out = linear(input) - loss = paddle.mean(out) + >>> import paddle + >>> linear = paddle.nn.Linear(10, 10) + >>> input = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1) + >>> out = linear(input) + >>> loss = paddle.mean(out) - beta1 = paddle.to_tensor([0.9], dtype="float32") - beta2 = paddle.to_tensor([0.99], dtype="float32") + >>> beta1 = paddle.to_tensor([0.9], dtype="float32") + >>> beta2 = paddle.to_tensor([0.99], dtype="float32") - adam = paddle.optimizer.Adam(learning_rate=0.1, - parameters=linear.parameters(), - weight_decay=0.01) - loss.backward() - adam.minimize(loss) - adam.clear_grad() + >>> adam = paddle.optimizer.Adam(learning_rate=0.1, + ... parameters=linear.parameters(), + ... weight_decay=0.01) + >>> loss.backward() + >>> adam.minimize(loss) + >>> adam.clear_grad() """ assert isinstance(loss, Variable), "The loss should be an Tensor." @@ -1562,17 +1590,17 @@ class Optimizer: Examples: .. code-block:: python - import paddle - - a = paddle.arange(26, dtype="float32").reshape([2, 13]) - linear = paddle.nn.Linear(13, 5) - # This can be any optimizer supported by dygraph. - adam = paddle.optimizer.Adam(learning_rate = 0.01, - parameters = linear.parameters()) - out = linear(a) - out.backward() - adam.step() - adam.clear_grad() + >>> import paddle + + >>> a = paddle.arange(26, dtype="float32").reshape([2, 13]) + >>> linear = paddle.nn.Linear(13, 5) + >>> # This can be any optimizer supported by dygraph. + >>> adam = paddle.optimizer.Adam(learning_rate = 0.01, + ... parameters = linear.parameters()) + >>> out = linear(a) + >>> out.backward() + >>> adam.step() + >>> adam.clear_grad() """ if paddle.fluid.dygraph.base.in_declarative_mode(): self._declarative_step() diff --git a/python/paddle/quantization/config.py b/python/paddle/quantization/config.py index dc8ea16e53b6344a6711625ef600e5b37415ad64..cb8db9206e6db760b16da46f8b236baa5a2ed9b0 100644 --- a/python/paddle/quantization/config.py +++ b/python/paddle/quantization/config.py @@ -70,12 +70,15 @@ class QuantConfig: Examples: .. code-block:: python - from paddle.quantization import QuantConfig - from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver + >>> from paddle.quantization import QuantConfig + >>> from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver - quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9) - q_config = QuantConfig(activation=quanter, weight=quanter) - print(q_config) + >>> quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9) + >>> q_config = QuantConfig(activation=quanter, weight=quanter) + >>> print(q_config) + Global config: + activation: FakeQuanterWithAbsMaxObserver(name=None,moving_rate=0.9,bit_length=8,dtype=float32) + weight: FakeQuanterWithAbsMaxObserver(name=None,moving_rate=0.9,bit_length=8,dtype=float32) """ @@ -100,31 +103,36 @@ class QuantConfig: weight: QuanterFactory = None, ): r""" - Set the quantization config by layer. It has the highest priority among - all the setting methods. - - Args: - layer(Union[Layer, list]): One or a list of layers. - activation(QuanterFactory): Quanter used for activations. - weight(QuanterFactory): Quanter used for weights. - - Examples: - .. code-block:: python - - import paddle - from paddle.nn import Linear - from paddle.quantization import QuantConfig - from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver - - class Model(paddle.nn.Layer): - def __init__(self): - super().__init__() - self.fc = Linear(576, 120) - model = Model() - quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9) - q_config = QuantConfig(activation=None, weight=None) - q_config.add_layer_config([model.fc], activation=quanter, weight=quanter) - print(q_config) + Set the quantization config by layer. It has the highest priority among + all the setting methods. + + Args: + layer(Union[Layer, list]): One or a list of layers. + activation(QuanterFactory): Quanter used for activations. + weight(QuanterFactory): Quanter used for weights. + + Examples: + .. code-block:: python + + >>> import paddle + >>> from paddle.nn import Linear + >>> from paddle.quantization import QuantConfig + >>> from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver + + >>> class Model(paddle.nn.Layer): + ... def __init__(self): + ... super().__init__() + ... self.fc = Linear(576, 120) + >>> model = Model() + >>> quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9) + >>> q_config = QuantConfig(activation=None, weight=None) + >>> q_config.add_layer_config([model.fc], activation=quanter, weight=quanter) + >>> # doctest: +SKIP + >>> print(q_config) + Global config: + None + Layer prefix config: + {'linear_0': } """ if isinstance(layer, list): @@ -144,31 +152,36 @@ class QuantConfig: weight: QuanterFactory = None, ): r""" - Set the quantization config by full name of layer. Its priority is - lower than `add_layer_config`. - - Args: - layer_name(Union[str, list]): One or a list of layers' full name. - activation(QuanterFactory): Quanter used for activations. - weight(QuanterFactory): Quanter used for weights. - - Examples: - .. code-block:: python - - import paddle - from paddle.nn import Linear - from paddle.quantization import QuantConfig - from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver - - class Model(paddle.nn.Layer): - def __init__(self): - super().__init__() - self.fc = Linear(576, 120) - model = Model() - quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9) - q_config = QuantConfig(activation=None, weight=None) - q_config.add_name_config([model.fc.full_name()], activation=quanter, weight=quanter) - print(q_config) + Set the quantization config by full name of layer. Its priority is + lower than `add_layer_config`. + + Args: + layer_name(Union[str, list]): One or a list of layers' full name. + activation(QuanterFactory): Quanter used for activations. + weight(QuanterFactory): Quanter used for weights. + + Examples: + .. code-block:: python + + >>> import paddle + >>> from paddle.nn import Linear + >>> from paddle.quantization import QuantConfig + >>> from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver + + >>> class Model(paddle.nn.Layer): + ... def __init__(self): + ... super().__init__() + ... self.fc = Linear(576, 120) + >>> model = Model() + >>> quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9) + >>> q_config = QuantConfig(activation=None, weight=None) + >>> q_config.add_name_config([model.fc.full_name()], activation=quanter, weight=quanter) + >>> # doctest: +SKIP + >>> print(q_config) + Global config: + None + Layer prefix config: + {'linear_0': } """ if isinstance(layer_name, str): @@ -198,22 +211,27 @@ class QuantConfig: weight(QuanterFactory): Quanter used for weights. Examples: - .. code-block:: python - - import paddle - from paddle.nn import Linear - from paddle.quantization import QuantConfig - from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver - - class Model(paddle.nn.Layer): - def __init__(self): - super().__init__() - self.fc = Linear(576, 120) - model = Model() - quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9) - q_config = QuantConfig(activation=None, weight=None) - q_config.add_type_config([Linear], activation=quanter, weight=quanter) - print(q_config) + .. code-block:: python + + >>> import paddle + >>> from paddle.nn import Linear + >>> from paddle.quantization import QuantConfig + >>> from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver + + >>> class Model(paddle.nn.Layer): + ... def __init__(self): + ... super().__init__() + ... self.fc = Linear(576, 120) + >>> model = Model() + >>> quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9) + >>> q_config = QuantConfig(activation=None, weight=None) + >>> q_config.add_type_config([Linear], activation=quanter, weight=quanter) + >>> # doctest: +SKIP + >>> print(q_config) + Global config: + None + Layer type config: + {: } """ if isinstance(layer_type, type) and issubclass( @@ -240,18 +258,18 @@ class QuantConfig: target(type): The type of layers that will be converted to. Examples: - .. code-block:: python - - from paddle.nn import Conv2D - from paddle.quantization import QuantConfig - from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver - quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9) - q_config = QuantConfig(activation=None, weight=None) - class CustomizedQuantedConv2D: - def forward(self, x): - pass - # add some code for quantization simulation - q_config.add_qat_layer_mapping(Conv2D, CustomizedQuantedConv2D) + .. code-block:: python + + >>> from paddle.nn import Conv2D + >>> from paddle.quantization import QuantConfig + >>> from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver + >>> quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9) + >>> q_config = QuantConfig(activation=None, weight=None) + >>> class CustomizedQuantedConv2D: + ... def forward(self, x): + ... pass + ... # add some code for quantization simulation + >>> q_config.add_qat_layer_mapping(Conv2D, CustomizedQuantedConv2D) """ assert isinstance(source, type) and issubclass( source, paddle.nn.Layer @@ -272,13 +290,13 @@ class QuantConfig: layer_type(type): The type of layer to be declared as leaf. Examples: - .. code-block:: python + .. code-block:: python - from paddle.nn import Sequential - from paddle.quantization import QuantConfig - from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver - q_config = QuantConfig(activation=None, weight=None) - q_config.add_customized_leaf(Sequential) + >>> from paddle.nn import Sequential + >>> from paddle.quantization import QuantConfig + >>> from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver + >>> q_config = QuantConfig(activation=None, weight=None) + >>> q_config.add_customized_leaf(Sequential) """ self._customized_leaves.append(layer_type) @@ -379,22 +397,22 @@ class QuantConfig: model(Layer): The model to be specified by the config. Examples: - .. code-block:: python - - import paddle - from paddle.nn import Linear, Sequential - from paddle.quantization import QuantConfig - from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver - - class Model(paddle.nn.Layer): - def __init__(self): - super().__init__() - self.fc = Sequential(Linear(576, 120),Linear(576, 120)) - model = Model() - quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9) - q_config = QuantConfig(activation=None, weight=None) - q_config.add_layer_config([model.fc], activation=quanter, weight=quanter) - q_config._specify(model) + .. code-block:: python + + >>> import paddle + >>> from paddle.nn import Linear, Sequential + >>> from paddle.quantization import QuantConfig + >>> from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver + + >>> class Model(paddle.nn.Layer): + ... def __init__(self): + ... super().__init__() + ... self.fc = Sequential(Linear(576, 120),Linear(576, 120)) + >>> model = Model() + >>> quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9) + >>> q_config = QuantConfig(activation=None, weight=None) + >>> q_config.add_layer_config([model.fc], activation=quanter, weight=quanter) + >>> q_config._specify(model) """ self._model = model self._specify_helper(self._model) diff --git a/python/paddle/quantization/factory.py b/python/paddle/quantization/factory.py index d7ad2c355ba47f48d312dbd873a8aa72f552b912..3a1205c38bec17e0453a2335d060288803d25e7b 100644 --- a/python/paddle/quantization/factory.py +++ b/python/paddle/quantization/factory.py @@ -83,21 +83,22 @@ def quanter(class_name): Examples: .. code-block:: python - # Given codes in ./customized_quanter.py - from paddle.quantization import quanter - from paddle.quantization import BaseQuanter - @quanter("CustomizedQuanter") - class CustomizedQuanterLayer(BaseQuanter): - def __init__(self, arg1, kwarg1=None): - pass - - # Used in ./test.py - # from .customized_quanter import CustomizedQuanter - from paddle.quantization import QuantConfig - arg1_value = "test" - kwarg1_value = 20 - quanter = CustomizedQuanter(arg1_value, kwarg1=kwarg1_value) - q_config = QuantConfig(activation=quanter, weight=quanter) + >>> # doctest: +SKIP + >>> # Given codes in ./customized_quanter.py + >>> from paddle.quantization import quanter + >>> from paddle.quantization import BaseQuanter + >>> @quanter("CustomizedQuanter") + >>> class CustomizedQuanterLayer(BaseQuanter): + ... def __init__(self, arg1, kwarg1=None): + ... pass + + >>> # Used in ./test.py + >>> # from .customized_quanter import CustomizedQuanter + >>> from paddle.quantization import QuantConfig + >>> arg1_value = "test" + >>> kwarg1_value = 20 + >>> quanter = CustomizedQuanter(arg1_value, kwarg1=kwarg1_value) + >>> q_config = QuantConfig(activation=quanter, weight=quanter) """ diff --git a/python/paddle/quantization/imperative/qat.py b/python/paddle/quantization/imperative/qat.py index 591dac54507f54f27b4d18872a9bcbba6fc22f5e..f261f4cabe42a75a2b60a4a46b8b27cd546289bf 100644 --- a/python/paddle/quantization/imperative/qat.py +++ b/python/paddle/quantization/imperative/qat.py @@ -135,79 +135,81 @@ class ImperativeQuantAware: during training. If this attribute is not sets or the attribute is false, the Layer would be qunatized in training. - Examples 1: - .. code-block:: python - - import paddle - from paddle.static.quantization \ - import ImperativeQuantAware - from paddle.vision.models \ - import resnet - - model = resnet.resnet50(pretrained=True) - - imperative_qat = ImperativeQuantAware( - weight_quantize_type='abs_max', - activation_quantize_type='moving_average_abs_max') - - # Add the fake quant logical. - # The original model will be rewrite. - # The outscale of outputs in supportted layers would be calculated. - imperative_qat.quantize(model) - - # Fine-tune the quantized model - # ... - - # Save quant model for the inference. - imperative_qat.save_quantized_model( - layer=model, - model_path="./resnet50_qat", - input_spec=[ - paddle.static.InputSpec( - shape=[None, 3, 224, 224], dtype='float32')]) - - Examples 2: - .. code-block:: python - - import paddle - from paddle.static.quantization \ - import ImperativeQuantAware - - class ImperativeModel(paddle.nn.Layer): - def __init__(self): - super().__init__() - # self.linear_0 would skip the quantization. - self.linear_0 = paddle.nn.Linear(784, 400) - self.linear_0.skip_quant = True - - # self.linear_1 would not skip the quantization. - self.linear_1 = paddle.nn.Linear(400, 10) - self.linear_1.skip_quant = False - - def forward(self, inputs): - x = self.linear_0(inputs) - x = self.linear_1(inputs) - return x - - model = ImperativeModel() - imperative_qat = ImperativeQuantAware( - weight_quantize_type='abs_max', - activation_quantize_type='moving_average_abs_max') - - # Add the fake quant logical. - # The original model will be rewrite. - # - # There is only one Layer(self.linear1) would be added the - # fake quant logical. - imperative_qat.quantize(model) - - # Fine-tune the quantized model - # ... - - # Save quant model for the inference. - imperative_qat.save_quantized_model( - layer=model, - model_path="./imperative_model_qat") + Examples: + .. code-block:: python + + >>> import paddle + >>> from paddle.static.quantization import ( + ... ImperativeQuantAware, + ... ) + >>> from paddle.vision.models import ( + ... resnet, + ... ) + + >>> model = resnet.resnet50(pretrained=True) + + >>> imperative_qat = ImperativeQuantAware( + ... weight_quantize_type='abs_max', + ... activation_quantize_type='moving_average_abs_max') + + >>> # Add the fake quant logical. + >>> # The original model will be rewrite. + >>> # The outscale of outputs in supportted layers would be calculated. + >>> imperative_qat.quantize(model) + + >>> # Fine-tune the quantized model + >>> # ... + + >>> # Save quant model for the inference. + >>> imperative_qat.save_quantized_model( + ... layer=model, + ... model_path="./resnet50_qat", + ... input_spec=[ + ... paddle.static.InputSpec( + ... shape=[None, 3, 224, 224], dtype='float32')]) + + .. code-block:: python + + >>> import paddle + >>> from paddle.static.quantization import ( + ... ImperativeQuantAware, + ... ) + + >>> class ImperativeModel(paddle.nn.Layer): + ... def __init__(self): + ... super().__init__() + ... # self.linear_0 would skip the quantization. + ... self.linear_0 = paddle.nn.Linear(784, 400) + ... self.linear_0.skip_quant = True + + ... # self.linear_1 would not skip the quantization. + ... self.linear_1 = paddle.nn.Linear(400, 10) + ... self.linear_1.skip_quant = False + + ... def forward(self, inputs): + ... x = self.linear_0(inputs) + ... x = self.linear_1(inputs) + ... return x + + >>> model = ImperativeModel() + >>> imperative_qat = ImperativeQuantAware( + ... weight_quantize_type='abs_max', + ... activation_quantize_type='moving_average_abs_max') + + >>> # Add the fake quant logical. + >>> # The original model will be rewrite. + >>> # + >>> # There is only one Layer(self.linear1) would be added the + >>> # fake quant logical. + >>> imperative_qat.quantize(model) + + >>> # Fine-tune the quantized model + >>> # ... + + >>> # Save quant model for the inference. + >>> imperative_qat.save_quantized_model( + ... layer=model, + ... model_path="./imperative_model_qat") """ super().__init__() self.fuse_conv_bn = fuse_conv_bn @@ -245,39 +247,40 @@ class ImperativeQuantAware: None Examples: - .. code-block:: python - - import paddle - from paddle.static.quantization \ - import ImperativeQuantAware - - class ImperativeModel(paddle.nn.Layer): - def __init__(self): - super().__init__() - # self.linear_0 would skip the quantization. - self.linear_0 = paddle.nn.Linear(784, 400) - self.linear_0.skip_quant = True - - # self.linear_1 would not skip the quantization. - self.linear_1 = paddle.nn.Linear(400, 10) - self.linear_1.skip_quant = False - - def forward(self, inputs): - x = self.linear_0(inputs) - x = self.linear_1(inputs) - return x - - model = ImperativeModel() - imperative_qat = ImperativeQuantAware( - weight_quantize_type='abs_max', - activation_quantize_type='moving_average_abs_max') - - # Add the fake quant logical. - # The original model will be rewrite. - # - # There is only one Layer(self.linear1) would be added the - # fake quant logical. - imperative_qat.quantize(model) + .. code-block:: python + + >>> import paddle + >>> from paddle.static.quantization import ( + ... ImperativeQuantAware, + ... ) + + >>> class ImperativeModel(paddle.nn.Layer): + ... def __init__(self): + ... super().__init__() + ... # self.linear_0 would skip the quantization. + ... self.linear_0 = paddle.nn.Linear(784, 400) + ... self.linear_0.skip_quant = True + + ... # self.linear_1 would not skip the quantization. + ... self.linear_1 = paddle.nn.Linear(400, 10) + ... self.linear_1.skip_quant = False + + ... def forward(self, inputs): + ... x = self.linear_0(inputs) + ... x = self.linear_1(inputs) + ... return x + + >>> model = ImperativeModel() + >>> imperative_qat = ImperativeQuantAware( + ... weight_quantize_type='abs_max', + ... activation_quantize_type='moving_average_abs_max') + + >>> # Add the fake quant logical. + >>> # The original model will be rewrite. + >>> # + >>> # There is only one Layer(self.linear1) would be added the + >>> # fake quant logical. + >>> imperative_qat.quantize(model) """ assert isinstance( model, paddle.nn.Layer