diff --git a/demo/quant/quant_embedding/README.md b/demo/quant/quant_embedding/README.md index 10d6f19857485bcfb85b7786c861fb136ceb536e..5667b19a7f27062dc508a68569ae9fb86d178b45 100755 --- a/demo/quant/quant_embedding/README.md +++ b/demo/quant/quant_embedding/README.md @@ -184,7 +184,7 @@ step:1 2722 epoch:9 acc:0.153 ``` -## 量化``基于skip-gram的word2vector模型``的训练流程保存的模型 +## 量化``基于skip-gram的word2vector模型`` 量化配置为: ``` @@ -236,3 +236,5 @@ quant_embedding config {'quantize_type': 'abs_max', 'params_name': 'emb', 'quant step:1 2719 epoch:9 acc:0.153 ``` + +量化后的模型保存在``./output_quant``中,可看到量化后的参数``'emb.int8'``的大小为3.9M, 在``./v1_cpu5_b100_lr1dir``中可看到量化前的参数``'emb'``的大小为16M。 diff --git a/demo/quant/quant_embedding/infer.py b/demo/quant/quant_embedding/infer.py index fa906bf922c9457cb3936ec9413548b529d37b20..40ae2ee8c639754d24a5474c5e58d7e062a1d4d0 100755 --- a/demo/quant/quant_embedding/infer.py +++ b/demo/quant/quant_embedding/infer.py @@ -82,6 +82,10 @@ def infer_epoch(args, vocab_size, test_reader, use_cuda, i2w): if args.emb_quant: config = {'params_name': 'emb', 'quantize_type': 'abs_max'} copy_program = quant_embedding(copy_program, place, config) + fluid.io.save_persistables( + exe, + './output_quant/pass-' + str(epoch), + main_program=copy_program) accum_num = 0 accum_num_sum = 0.0