diff --git a/docs/en/quick_start/quant_aware_tutorial_en.md b/docs/en/quick_start/quant_aware_tutorial_en.md index 8b169294ce1ae7bde64ed59035b40f7b2f588d0b..ada6e6ea78fb8630f14bf5c9eca86c1208352bfc 100644 --- a/docs/en/quick_start/quant_aware_tutorial_en.md +++ b/docs/en/quick_start/quant_aware_tutorial_en.md @@ -10,13 +10,14 @@ This tutorial shows how to do training-aware quantization using [API](https://pa 6. Save model after quantization ## 1. Necessary imports -PaddleSlim depends on Paddle1.7. Please make true that you have installed Paddle correctly. Then do the necessary imports: +Please make true that you have installed Paddle correctly. Then do the necessary imports: ```python import paddle import paddle.fluid as fluid import paddleslim as slim import numpy as np +paddle.enable_static() ``` ## 2. Model architecture @@ -56,7 +57,7 @@ Define functions to train and test model. We only need call the functions when f def train(prog): iter = 0 for data in train_reader(): - acc1, acc5, loss = exe.run(prog, feed=train_feeder.feed(data), fetch_list=outputs) + acc1, acc5, loss, out = exe.run(prog, feed=train_feeder.feed(data), fetch_list=outputs) if iter % 100 == 0: print('train iter={}, top1={}, top5={}, loss={}'.format(iter, acc1.mean(), acc5.mean(), loss.mean())) iter += 1 @@ -65,7 +66,7 @@ def test(prog): iter = 0 res = [[], []] for data in train_reader(): - acc1, acc5, loss = exe.run(prog, feed=train_feeder.feed(data), fetch_list=outputs) + acc1, acc5, loss, out = exe.run(prog, feed=train_feeder.feed(data), fetch_list=outputs) if iter % 100 == 0: print('test iter={}, top1={}, top5={}, loss={}'.format(iter, acc1.mean(), acc5.mean(), loss.mean())) res[0].append(acc1.mean()) diff --git a/docs/en/quick_start/quant_post_static_tutorial_en.md b/docs/en/quick_start/quant_post_static_tutorial_en.md index 0f7c5f414c2671aa97fe9189381358a76df7fba0..fd7f850875a8d9fdca155e5a6d70f1ffde49f7c5 100644 --- a/docs/en/quick_start/quant_post_static_tutorial_en.md +++ b/docs/en/quick_start/quant_post_static_tutorial_en.md @@ -16,6 +16,7 @@ import paddle import paddle.fluid as fluid import paddleslim as slim import numpy as np +paddle.enable_static() ``` ## 2. Model architecture @@ -39,10 +40,10 @@ To speed up training process, we select MNIST dataset to train image classificat ```python import paddle.dataset.mnist as reader +paddle.enable_static() train_reader = paddle.fluid.io.batch( reader.train(), batch_size=128, drop_last=True) test_reader = paddle.fluid.io.batch( -cs/en/quick_start/quant_aware_tutorial_en.md reader.train(), batch_size=128, drop_last=True) train_feeder = fluid.DataFeeder(inputs, fluid.CPUPlace()) ``` @@ -55,7 +56,7 @@ Define functions to train and test model. We only need call the functions when f def train(prog): iter = 0 for data in train_reader(): - acc1, acc5, loss = exe.run(prog, feed=train_feeder.feed(data), fetch_list=outputs) + acc1, acc5, loss, out = exe.run(prog, feed=train_feeder.feed(data), fetch_list=outputs) if iter % 100 == 0: print('train', acc1.mean(), acc5.mean(), loss.mean()) iter += 1 @@ -64,7 +65,7 @@ def test(prog, outputs=outputs): iter = 0 res = [[], []] for data in train_reader(): - acc1, acc5, loss = exe.run(prog, feed=train_feeder.feed(data), fetch_list=outputs) + acc1, acc5, loss, out = exe.run(prog, feed=train_feeder.feed(data), fetch_list=outputs) if iter % 100 == 0: print('test', acc1.mean(), acc5.mean(), loss.mean()) res[0].append(acc1.mean()) diff --git a/docs/zh_cn/api_cn/dygraph/quanter/qat.rst b/docs/zh_cn/api_cn/dygraph/quanter/qat.rst index 7042a28abe2a6cc0f11b37b497803ea2970dea8c..bc9364d4e957641180592608a927be89f8da1196 100644 --- a/docs/zh_cn/api_cn/dygraph/quanter/qat.rst +++ b/docs/zh_cn/api_cn/dygraph/quanter/qat.rst @@ -78,6 +78,7 @@ QAT .. code-block:: python + import paddle from paddle.vision.models import mobilenet_v1 from paddleslim import QAT net = mobilenet_v1(pretrained=False) @@ -86,7 +87,7 @@ QAT 'quantizable_layer_type': ['Conv2D', 'Linear'], } quanter = QAT(config=quant_config) - quanter.quantize(lenet) + quanter.quantize(net) paddle.summary(net, (1, 3, 224, 224)) .. @@ -111,6 +112,7 @@ QAT .. code-block:: python + import paddle from paddle.vision.models import mobilenet_v1 from paddleslim import QAT net = mobilenet_v1(pretrained=False) @@ -119,7 +121,7 @@ QAT 'quantizable_layer_type': ['Conv2D', 'Linear'], } quanter = QAT(config=quant_config) - quanter.quantize(lenet) + quanter.quantize(net) paddle.summary(net, (1, 3, 224, 224)) quanter.save_quantized_model( diff --git a/docs/zh_cn/api_cn/static/quant/quantization_api.rst b/docs/zh_cn/api_cn/static/quant/quantization_api.rst index 2fc8a2fa163389543624e20b716d228b9c821e8a..1c445bc082579f1906f3bb3b346098cf0da680d6 100644 --- a/docs/zh_cn/api_cn/static/quant/quantization_api.rst +++ b/docs/zh_cn/api_cn/static/quant/quantization_api.rst @@ -78,10 +78,12 @@ quant_post_dynamic .. code-block:: python + import paddle import paddle.fluid as fluid import paddle.dataset.mnist as reader from paddleslim.quant import quant_post_dynamic + paddle.enable_static() quant_post_dynamic( model_dir='./model_path', save_model_dir='./save_path', @@ -166,9 +168,11 @@ quant_post_static .. code-block:: python + import paddle import paddle.fluid as fluid import paddle.dataset.mnist as reader from paddleslim.quant import quant_post_static + paddle.enable_static() val_reader = reader.train() use_gpu = True place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() @@ -265,10 +269,11 @@ convert .. code-block:: python #encoding=utf8 + import paddle import paddle.fluid as fluid import paddleslim.quant as quant - + paddle.enable_static() train_program = fluid.Program() with fluid.program_guard(train_program): @@ -398,9 +403,11 @@ fluid.Program .. code-block:: python + import paddle import paddle.fluid as fluid import paddleslim.quant as quant + paddle.enable_static() train_program = fluid.Program() with fluid.program_guard(train_program): input_word = fluid.data(name="input_word", shape=[None, 1], dtype='int64') diff --git a/docs/zh_cn/quick_start/dygraph/dygraph_quant_post_tutorial.md b/docs/zh_cn/quick_start/dygraph/dygraph_quant_post_tutorial.md index afd18b4c065c312702e090bb0a980839c17d5210..4281d0a6f9ab4cbaac797737038cdaffefc2a557 100644 --- a/docs/zh_cn/quick_start/dygraph/dygraph_quant_post_tutorial.md +++ b/docs/zh_cn/quick_start/dygraph/dygraph_quant_post_tutorial.md @@ -20,6 +20,7 @@ ```python import paddle +import paddleslim import paddle.vision.models as models from paddle.static import InputSpec as Input from paddle.vision.datasets import Cifar10 @@ -61,7 +62,7 @@ model.evaluate(val_dataset, batch_size=256, verbose=1) 训练完成后导出预测模型: ```python -paddle.jit.save(net, "./fp32_inference_model", input_spec=[inputs]) +paddle.jit.save(net, "./fp32_inference_model", input_spec=inputs) ``` @@ -79,7 +80,7 @@ paddleslim.quant.quant_post_static( model_filename='fp32_inference_model.pdmodel', params_filename='fp32_inference_model.pdiparams', quantize_model_path='./quant_post_static_model', - sample_generator=train_dataset, + sample_generator=paddle.dataset.cifar.test10(), batch_nums=10) ``` diff --git a/docs/zh_cn/quick_start/static/quant_aware_tutorial.md b/docs/zh_cn/quick_start/static/quant_aware_tutorial.md index 047a0156ccbacd2c75d4423b9b564f2125b90b8a..998e8d82590eef64ecd4b6b5e39875204575a434 100644 --- a/docs/zh_cn/quick_start/static/quant_aware_tutorial.md +++ b/docs/zh_cn/quick_start/static/quant_aware_tutorial.md @@ -168,7 +168,7 @@ paddle.static.save_inference_model( feed_vars=[image], fetch_vars=target_vars, executor=exe, - program=float_prog) + program=quant_infer_program) ``` 根据业务场景,可以使用PaddleLite将该量化模型部署到移动端(ARM CPU),或者使用PaddleInference将该量化模型部署到服务器端(NV GPU和Intel CPU)。 diff --git a/docs/zh_cn/tutorials/quant/dygraph/quant_aware_training_tutorial.md b/docs/zh_cn/tutorials/quant/dygraph/quant_aware_training_tutorial.md index 1725673c432bc41f52c2d05b14813c5949397f66..a9d10edcf257991b3ee2d504c88470d52d3ad9b7 100755 --- a/docs/zh_cn/tutorials/quant/dygraph/quant_aware_training_tutorial.md +++ b/docs/zh_cn/tutorials/quant/dygraph/quant_aware_training_tutorial.md @@ -39,7 +39,10 @@ quant_config = { 在确认好我们的量化配置以后,我们可以根据这个配置把我们定义好的一个普通模型转换为一个模拟量化模型。转换的方式也很简单: ```python +import paddle import paddleslim +from paddle.vision.models import mobilenet_v1 +net = mobilenet_v1() quanter = paddleslim.QAT(config=quant_config) quanter.quantize(net) ``` @@ -54,10 +57,11 @@ quanter.quantize(net) ```python import paddleslim +save_path = './model' quanter.save_quantized_model( - model, + net, save_path, - input_spec=[paddle.static.InputSpec()]) + input_spec=[paddle.static.InputSpec(shape=[None, 3, 224, 224], dtype='float32')]) ``` 量化预测模型可以使用`netron`软件打开,进行可视化查看。该量化预测模型和普通FP32预测模型一样,可以使用PaddleLite和PaddleInference加载预测,具体请参考`推理部署`章节。