From 3563b4f5f678968d80d8e8a7c8a4c287674f64c7 Mon Sep 17 00:00:00 2001 From: wuzewu Date: Tue, 1 Dec 2020 15:31:13 +0800 Subject: [PATCH] Merge module from release/v1.8 --- .../colorization/deoldify/README.md | 121 + .../colorization/deoldify/base_module.py | 400 + .../colorization/deoldify/module.py | 165 + .../colorization/deoldify/resnet.py | 332 + .../colorization/deoldify/utils.py | 220 + .../colorization/photo_restoration/README.md | 98 + .../colorization/photo_restoration/module.py | 84 + .../colorization/photo_restoration/utils.py | 15 + .../super_resolution/dcscn/README.md | 134 + .../super_resolution/dcscn/data_feed.py | 61 + .../super_resolution/dcscn/module.py | 202 + .../super_resolution/dcscn/processor.py | 82 + .../super_resolution/falsr_a/README.md | 126 + .../super_resolution/falsr_a/data_feed.py | 60 + .../super_resolution/falsr_a/module.py | 195 + .../super_resolution/falsr_a/processor.py | 80 + .../super_resolution/falsr_b/README.md | 126 + .../super_resolution/falsr_b/data_feed.py | 60 + .../super_resolution/falsr_b/module.py | 195 + .../super_resolution/falsr_b/processor.py | 80 + .../super_resolution/falsr_c/README.md | 127 + .../super_resolution/falsr_c/data_feed.py | 60 + .../super_resolution/falsr_c/module.py | 198 + .../super_resolution/falsr_c/processor.py | 81 + .../super_resolution/realsr/README.md | 121 + .../super_resolution/realsr/module.py | 145 + .../super_resolution/realsr/rrdb.py | 137 + .../super_resolution/realsr/utils.py | 68 + modules/image/Image_gan/gan/README.md | 0 .../image/Image_gan/gan/stgan_bald/README.md | 80 + .../Image_gan/gan/stgan_bald/data_feed.py | 59 + .../image/Image_gan/gan/stgan_bald/module.py | 172 + .../Image_gan/gan/stgan_bald/module/__model__ | Bin 0 -> 358737 bytes .../Image_gan/gan/stgan_bald/processor.py | 81 + .../Image_gan/gan/stgan_bald/requirements.txt | 2 + .../style_transfer/UGATIT_100w/README.md | 122 + .../style_transfer/UGATIT_100w/model.py | 68 + .../style_transfer/UGATIT_100w/module.py | 57 + .../style_transfer/UGATIT_100w/processor.py | 119 + .../style_transfer/UGATIT_83w/README.md | 122 + .../style_transfer/UGATIT_83w/model.py | 68 + .../style_transfer/UGATIT_83w/module.py | 57 + .../style_transfer/UGATIT_83w/processor.py | 119 + .../style_transfer/UGATIT_92w/README.md | 122 + .../style_transfer/UGATIT_92w/model.py | 68 + .../style_transfer/UGATIT_92w/module.py | 57 + .../style_transfer/UGATIT_92w/processor.py | 119 + .../animegan_v1_hayao_60/README.md | 127 + .../animegan_v1_hayao_60/model.py | 68 + .../animegan_v1_hayao_60/module.py | 64 + .../animegan_v1_hayao_60/processor.py | 132 + .../animegan_v2_hayao_64/README.md | 127 + .../animegan_v2_hayao_64/model.py | 68 + .../animegan_v2_hayao_64/module.py | 64 + .../animegan_v2_hayao_64/processor.py | 132 + .../animegan_v2_hayao_99/README.md | 127 + .../animegan_v2_hayao_99/model.py | 68 + .../animegan_v2_hayao_99/module.py | 64 + .../animegan_v2_hayao_99/processor.py | 132 + .../animegan_v2_paprika_54/README.md | 127 + .../animegan_v2_paprika_54/model.py | 68 + .../animegan_v2_paprika_54/module.py | 64 + .../animegan_v2_paprika_54/processor.py | 132 + .../animegan_v2_paprika_74/README.md | 127 + .../animegan_v2_paprika_74/model.py | 68 + .../animegan_v2_paprika_74/module.py | 64 + .../animegan_v2_paprika_74/processor.py | 132 + .../animegan_v2_paprika_97/README.md | 127 + .../animegan_v2_paprika_97/model.py | 68 + .../animegan_v2_paprika_97/module.py | 64 + .../animegan_v2_paprika_97/processor.py | 132 + .../animegan_v2_paprika_98/README.md | 127 + .../animegan_v2_paprika_98/model.py | 68 + .../animegan_v2_paprika_98/module.py | 64 + .../animegan_v2_paprika_98/processor.py | 132 + .../animegan_v2_shinkai_33/README.md | 127 + .../animegan_v2_shinkai_33/model.py | 68 + .../animegan_v2_shinkai_33/module.py | 64 + .../animegan_v2_shinkai_33/processor.py | 132 + .../animegan_v2_shinkai_53/README.md | 127 + .../animegan_v2_shinkai_53/model.py | 68 + .../animegan_v2_shinkai_53/module.py | 64 + .../animegan_v2_shinkai_53/processor.py | 132 + .../stylepro_artistic/decoder_network.py | 279 +- .../stylepro_artistic/encoder_network.py | 324 +- .../stylepro_artistic/module.py | 76 +- .../hand_pose_localization/README.md | 112 + .../hand_pose_localization/model.py | 71 + .../hand_pose_localization/module.py | 50 + .../hand_pose_localization/processor.py | 129 + .../humanseg_lite/README.md | 205 + .../humanseg_lite/__init__.py | 0 .../humanseg_lite/data_feed.py | 63 + .../humanseg_lite/module.py | 396 + .../humanseg_lite/optimal.py | 103 + .../humanseg_lite/processor.py | 78 + .../humanseg_mobile/README.md | 208 + .../humanseg_mobile/__init__.py | 0 .../humanseg_mobile/data_feed.py | 62 + .../humanseg_mobile/module.py | 380 + .../humanseg_mobile/optimal.py | 104 + .../humanseg_mobile/processor.py | 78 + .../humanseg_server/README.md | 210 + .../humanseg_server/__init__.py | 0 .../humanseg_server/data_feed.py | 62 + .../humanseg_server/module.py | 368 + .../humanseg_server/optimal.py | 103 + .../humanseg_server/processor.py | 76 + .../chinese_ocr_db_crnn_mobile/README.md | 24 +- .../chinese_ocr_db_crnn_mobile/character.py | 42 +- .../chinese_ocr_db_crnn_mobile/module.py | 246 +- .../chinese_ocr_db_crnn_mobile/utils.py | 4 +- .../chinese_ocr_db_crnn_server/README.md | 21 +- .../chinese_ocr_db_crnn_server/character.py | 42 +- .../chinese_ocr_db_crnn_server/module.py | 239 +- .../chinese_ocr_db_crnn_server/utils.py | 4 +- .../README.md | 25 +- .../module.py | 55 +- .../processor.py | 71 +- .../README.md | 16 + .../module.py | 10 +- .../text/language_model/lda_news/document.py | 9 +- modules/text/language_model/lda_news/model.py | 1 + .../text/language_model/lda_news/module.py | 5 +- .../text/language_model/lda_news/tokenizer.py | 2 + modules/text/language_model/lda_news/util.py | 1 + .../language_model/lda_news/vose_alias.py | 1 + .../text/language_model/lda_novel/document.py | 9 +- .../text/language_model/lda_novel/model.py | 1 + .../text/language_model/lda_novel/module.py | 5 +- .../language_model/lda_novel/tokenizer.py | 2 + modules/text/language_model/lda_novel/util.py | 1 + .../language_model/lda_novel/vose_alias.py | 1 + .../language_model/lda_webpage/document.py | 9 +- .../text/language_model/lda_webpage/model.py | 1 + .../text/language_model/lda_webpage/module.py | 5 +- .../language_model/lda_webpage/tokenizer.py | 2 + .../text/language_model/lda_webpage/util.py | 1 + .../language_model/lda_webpage/vose_alias.py | 1 + .../text/language_model/slda_news/document.py | 9 +- .../text/language_model/slda_news/model.py | 1 + .../language_model/slda_news/tokenizer.py | 2 + modules/text/language_model/slda_news/util.py | 1 + .../language_model/slda_news/vose_alias.py | 1 + .../language_model/slda_novel/document.py | 9 +- .../text/language_model/slda_novel/model.py | 1 + .../language_model/slda_novel/tokenizer.py | 2 + .../text/language_model/slda_novel/util.py | 1 + .../language_model/slda_novel/vose_alias.py | 1 + .../language_model/slda_webpage/document.py | 9 +- .../text/language_model/slda_webpage/model.py | 1 + .../language_model/slda_webpage/tokenizer.py | 2 + .../text/language_model/slda_webpage/util.py | 1 + .../language_model/slda_webpage/vose_alias.py | 1 + .../language_model/slda_weibo/document.py | 9 +- .../text/language_model/slda_weibo/model.py | 1 + .../language_model/slda_weibo/tokenizer.py | 2 + .../text/language_model/slda_weibo/util.py | 1 + .../language_model/slda_weibo/vose_alias.py | 1 + .../text/text_generation/ernie_gen/README.md | 190 + .../text_generation/ernie_gen/__init__.py | 0 .../text/text_generation/ernie_gen/decode.py | 258 + .../text/text_generation/ernie_gen/module.py | 437 + .../ernie_gen/propeller/__init__.py | 44 + .../ernie_gen/propeller/data/__init__.py | 16 + .../ernie_gen/propeller/data/functional.py | 467 + .../ernie_gen/propeller/paddle/__init__.py | 51 + .../ernie_gen/propeller/paddle/collection.py | 61 + .../propeller/paddle/data/__init__.py | 22 + .../propeller/paddle/data/example.proto | 29 + .../propeller/paddle/data/example_pb2.py | 148 + .../propeller/paddle/data/feature.proto | 46 + .../propeller/paddle/data/feature_column.py | 436 + .../propeller/paddle/data/feature_pb2.py | 549 + .../propeller/paddle/data/functional.py | 66 + .../ernie_gen/propeller/paddle/summary.py | 37 + .../propeller/paddle/train/__init__.py | 33 + .../propeller/paddle/train/distribution.py | 159 + .../propeller/paddle/train/exporter.py | 154 + .../ernie_gen/propeller/paddle/train/hooks.py | 320 + .../propeller/paddle/train/metrics.py | 666 + .../paddle/train/monitored_executor.py | 434 + .../propeller/paddle/train/trainer.py | 466 + .../ernie_gen/propeller/service/__init__.py | 14 + .../ernie_gen/propeller/service/client.py | 101 + .../propeller/service/interface.proto | 46 + .../propeller/service/interface_pb2.py | 208 + .../ernie_gen/propeller/service/server.py | 182 + .../ernie_gen/propeller/service/utils.py | 116 + .../ernie_gen/propeller/tools/__init__.py | 13 + .../propeller/tools/ckpt_inspector.py | 116 + .../ernie_gen/propeller/tools/start_server.py | 39 + .../ernie_gen/propeller/train/__init__.py | 16 + .../ernie_gen/propeller/train/model.py | 88 + .../ernie_gen/propeller/types.py | 118 + .../ernie_gen/propeller/util.py | 126 + .../ernie_gen/template/__init__.py | 0 .../template/assets/ernie_config.json | 12 + .../ernie_gen/template/assets/vocab.txt | 17964 ++++++++++++++++ .../ernie_gen/template/model/decode.py | 259 + .../ernie_gen/template/model/file_utils.py | 46 + .../template/model/modeling_ernie.py | 327 + .../template/model/modeling_ernie_gen.py | 65 + .../template/model/tokenizing_ernie.py | 163 + .../ernie_gen/template/module.temp | 177 + .../ernie_gen/test_data/dev.txt | 6 + .../ernie_gen/test_data/train.txt | 24 + .../plato2_en_base/utils/tokenization.py | 19 +- .../plato2_en_large/utils/tokenization.py | 19 +- .../reading_pictures_writing_poems/README.md | 42 + .../__init__.py | 0 .../reading_pictures_writing_poems/module.py | 137 + .../MidAutumnDetection/__init__.py | 0 .../MidAutumnDetection/module.py | 123 + .../MidAutumnPoetry/__init__.py | 0 .../MidAutumnPoetry/model/decode.py | 259 + .../MidAutumnPoetry/model/file_utils.py | 46 + .../MidAutumnPoetry/model/modeling_ernie.py | 327 + .../model/modeling_ernie_gen.py | 65 + .../MidAutumnPoetry/model/tokenizing_ernie.py | 163 + .../MidAutumnPoetry/module.py | 162 + .../__init__.py | 0 .../module.py | 121 + 223 files changed, 39452 insertions(+), 579 deletions(-) create mode 100644 modules/image/Image_editing/colorization/deoldify/README.md create mode 100644 modules/image/Image_editing/colorization/deoldify/base_module.py create mode 100644 modules/image/Image_editing/colorization/deoldify/module.py create mode 100644 modules/image/Image_editing/colorization/deoldify/resnet.py create mode 100644 modules/image/Image_editing/colorization/deoldify/utils.py create mode 100644 modules/image/Image_editing/colorization/photo_restoration/README.md create mode 100644 modules/image/Image_editing/colorization/photo_restoration/module.py create mode 100644 modules/image/Image_editing/colorization/photo_restoration/utils.py create mode 100644 modules/image/Image_editing/super_resolution/dcscn/README.md create mode 100644 modules/image/Image_editing/super_resolution/dcscn/data_feed.py create mode 100644 modules/image/Image_editing/super_resolution/dcscn/module.py create mode 100644 modules/image/Image_editing/super_resolution/dcscn/processor.py create mode 100644 modules/image/Image_editing/super_resolution/falsr_a/README.md create mode 100644 modules/image/Image_editing/super_resolution/falsr_a/data_feed.py create mode 100644 modules/image/Image_editing/super_resolution/falsr_a/module.py create mode 100644 modules/image/Image_editing/super_resolution/falsr_a/processor.py create mode 100644 modules/image/Image_editing/super_resolution/falsr_b/README.md create mode 100644 modules/image/Image_editing/super_resolution/falsr_b/data_feed.py create mode 100644 modules/image/Image_editing/super_resolution/falsr_b/module.py create mode 100644 modules/image/Image_editing/super_resolution/falsr_b/processor.py create mode 100644 modules/image/Image_editing/super_resolution/falsr_c/README.md create mode 100644 modules/image/Image_editing/super_resolution/falsr_c/data_feed.py create mode 100644 modules/image/Image_editing/super_resolution/falsr_c/module.py create mode 100644 modules/image/Image_editing/super_resolution/falsr_c/processor.py create mode 100644 modules/image/Image_editing/super_resolution/realsr/README.md create mode 100644 modules/image/Image_editing/super_resolution/realsr/module.py create mode 100644 modules/image/Image_editing/super_resolution/realsr/rrdb.py create mode 100644 modules/image/Image_editing/super_resolution/realsr/utils.py create mode 100644 modules/image/Image_gan/gan/README.md create mode 100644 modules/image/Image_gan/gan/stgan_bald/README.md create mode 100644 modules/image/Image_gan/gan/stgan_bald/data_feed.py create mode 100644 modules/image/Image_gan/gan/stgan_bald/module.py create mode 100644 modules/image/Image_gan/gan/stgan_bald/module/__model__ create mode 100644 modules/image/Image_gan/gan/stgan_bald/processor.py create mode 100644 modules/image/Image_gan/gan/stgan_bald/requirements.txt create mode 100644 modules/image/Image_gan/style_transfer/UGATIT_100w/README.md create mode 100644 modules/image/Image_gan/style_transfer/UGATIT_100w/model.py create mode 100644 modules/image/Image_gan/style_transfer/UGATIT_100w/module.py create mode 100644 modules/image/Image_gan/style_transfer/UGATIT_100w/processor.py create mode 100644 modules/image/Image_gan/style_transfer/UGATIT_83w/README.md create mode 100644 modules/image/Image_gan/style_transfer/UGATIT_83w/model.py create mode 100644 modules/image/Image_gan/style_transfer/UGATIT_83w/module.py create mode 100644 modules/image/Image_gan/style_transfer/UGATIT_83w/processor.py create mode 100644 modules/image/Image_gan/style_transfer/UGATIT_92w/README.md create mode 100644 modules/image/Image_gan/style_transfer/UGATIT_92w/model.py create mode 100644 modules/image/Image_gan/style_transfer/UGATIT_92w/module.py create mode 100644 modules/image/Image_gan/style_transfer/UGATIT_92w/processor.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/README.md create mode 100644 modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/model.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/module.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/processor.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/README.md create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/model.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/module.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/processor.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/README.md create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/model.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/module.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/processor.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/README.md create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/model.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/module.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/processor.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/README.md create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/model.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/module.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/processor.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/README.md create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/model.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/module.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/processor.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/README.md create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/model.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/module.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/processor.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/README.md create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/model.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/module.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/processor.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/README.md create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/model.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/module.py create mode 100644 modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/processor.py create mode 100644 modules/image/keypoint_detection/hand_pose_localization/README.md create mode 100644 modules/image/keypoint_detection/hand_pose_localization/model.py create mode 100644 modules/image/keypoint_detection/hand_pose_localization/module.py create mode 100644 modules/image/keypoint_detection/hand_pose_localization/processor.py create mode 100644 modules/image/semantic_segmentation/humanseg_lite/README.md create mode 100644 modules/image/semantic_segmentation/humanseg_lite/__init__.py create mode 100644 modules/image/semantic_segmentation/humanseg_lite/data_feed.py create mode 100644 modules/image/semantic_segmentation/humanseg_lite/module.py create mode 100644 modules/image/semantic_segmentation/humanseg_lite/optimal.py create mode 100644 modules/image/semantic_segmentation/humanseg_lite/processor.py create mode 100644 modules/image/semantic_segmentation/humanseg_mobile/README.md create mode 100644 modules/image/semantic_segmentation/humanseg_mobile/__init__.py create mode 100644 modules/image/semantic_segmentation/humanseg_mobile/data_feed.py create mode 100644 modules/image/semantic_segmentation/humanseg_mobile/module.py create mode 100644 modules/image/semantic_segmentation/humanseg_mobile/optimal.py create mode 100644 modules/image/semantic_segmentation/humanseg_mobile/processor.py create mode 100644 modules/image/semantic_segmentation/humanseg_server/README.md create mode 100644 modules/image/semantic_segmentation/humanseg_server/__init__.py create mode 100644 modules/image/semantic_segmentation/humanseg_server/data_feed.py create mode 100644 modules/image/semantic_segmentation/humanseg_server/module.py create mode 100644 modules/image/semantic_segmentation/humanseg_server/optimal.py create mode 100644 modules/image/semantic_segmentation/humanseg_server/processor.py create mode 100644 modules/text/text_generation/ernie_gen/README.md create mode 100644 modules/text/text_generation/ernie_gen/__init__.py create mode 100644 modules/text/text_generation/ernie_gen/decode.py create mode 100644 modules/text/text_generation/ernie_gen/module.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/__init__.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/data/__init__.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/data/functional.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/paddle/__init__.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/paddle/collection.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/paddle/data/__init__.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/paddle/data/example.proto create mode 100644 modules/text/text_generation/ernie_gen/propeller/paddle/data/example_pb2.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/paddle/data/feature.proto create mode 100644 modules/text/text_generation/ernie_gen/propeller/paddle/data/feature_column.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/paddle/data/feature_pb2.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/paddle/data/functional.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/paddle/summary.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/paddle/train/__init__.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/paddle/train/distribution.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/paddle/train/exporter.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/paddle/train/hooks.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/paddle/train/metrics.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/paddle/train/monitored_executor.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/paddle/train/trainer.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/service/__init__.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/service/client.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/service/interface.proto create mode 100644 modules/text/text_generation/ernie_gen/propeller/service/interface_pb2.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/service/server.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/service/utils.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/tools/__init__.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/tools/ckpt_inspector.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/tools/start_server.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/train/__init__.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/train/model.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/types.py create mode 100644 modules/text/text_generation/ernie_gen/propeller/util.py create mode 100644 modules/text/text_generation/ernie_gen/template/__init__.py create mode 100644 modules/text/text_generation/ernie_gen/template/assets/ernie_config.json create mode 100644 modules/text/text_generation/ernie_gen/template/assets/vocab.txt create mode 100644 modules/text/text_generation/ernie_gen/template/model/decode.py create mode 100644 modules/text/text_generation/ernie_gen/template/model/file_utils.py create mode 100644 modules/text/text_generation/ernie_gen/template/model/modeling_ernie.py create mode 100644 modules/text/text_generation/ernie_gen/template/model/modeling_ernie_gen.py create mode 100644 modules/text/text_generation/ernie_gen/template/model/tokenizing_ernie.py create mode 100644 modules/text/text_generation/ernie_gen/template/module.temp create mode 100644 modules/text/text_generation/ernie_gen/test_data/dev.txt create mode 100644 modules/text/text_generation/ernie_gen/test_data/train.txt create mode 100644 modules/text/text_generation/reading_pictures_writing_poems/README.md create mode 100644 modules/text/text_generation/reading_pictures_writing_poems/__init__.py create mode 100644 modules/text/text_generation/reading_pictures_writing_poems/module.py create mode 100644 modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnDetection/__init__.py create mode 100644 modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnDetection/module.py create mode 100644 modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/__init__.py create mode 100644 modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/decode.py create mode 100644 modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/file_utils.py create mode 100644 modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/modeling_ernie.py create mode 100644 modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/modeling_ernie_gen.py create mode 100644 modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/tokenizing_ernie.py create mode 100644 modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/module.py create mode 100644 modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/__init__.py create mode 100644 modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/module.py diff --git a/modules/image/Image_editing/colorization/deoldify/README.md b/modules/image/Image_editing/colorization/deoldify/README.md new file mode 100644 index 00000000..21bfe6da --- /dev/null +++ b/modules/image/Image_editing/colorization/deoldify/README.md @@ -0,0 +1,121 @@ + +## 模型概述 +deoldify是用于图像和视频的着色渲染模型,该模型能够实现给黑白照片和视频恢复原彩。 + +## API 说明 + +```python +def predict(self, input): +``` + +着色变换API,得到着色后的图片或者视频。 + + +**参数** + +* input(str): 图片或者视频的路径; + +**返回** + +若输入是图片,返回值为: +* pred_img(np.ndarray): BGR图片数据; +* out_path(str): 保存图片路径。 + +若输入是视频,返回值为: +* frame_pattern_combined(str): 视频着色后单帧数据保存路径; +* vid_out_path(str): 视频保存路径。 + +```python +def run_image(self, img): +``` +图像着色API, 得到着色后的图片。 + +**参数** + +* img (str|np.ndarray): 图片路径或则BGR格式图片。 + +**返回** + +* pred_img(np.ndarray): BGR图片数据; + +```python +def run_video(self, video): +``` +视频着色API, 得到着色后的视频。 + +**参数** + +* video (str): 待处理视频路径。 + +**返回** + +* frame_pattern_combined(str): 视频着色后单帧数据保存路径; +* vid_out_path(str): 视频保存路径。 + +## 预测代码示例 + +```python +import paddlehub as hub + +model = hub.Module(name='deoldify') +model.predict('/PATH/TO/IMAGE/OR/VIDEO') +``` + +## 服务部署 + +PaddleHub Serving可以部署一个在线照片着色服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: +```shell +$ hub serving start -m deoldify +``` + +这样就完成了一个图像着色的在线服务API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import base64 + +import cv2 +import numpy as np + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('/PATH/TO/ORIGIN/IMAGE') +data = {'images':cv2_to_base64(org_im)} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/deoldify" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) +img = base64_to_cv2(r.json()["results"]) +cv2.imwrite('/PATH/TO/SAVE/IMAGE', img) +``` + + +## 模型相关信息 + +### 模型代码 + +https://github.com/jantic/DeOldify + +### 依赖 + +paddlepaddle >= 2.0.0rc + +paddlehub >= 1.8.3 diff --git a/modules/image/Image_editing/colorization/deoldify/base_module.py b/modules/image/Image_editing/colorization/deoldify/base_module.py new file mode 100644 index 00000000..3c36d2d8 --- /dev/null +++ b/modules/image/Image_editing/colorization/deoldify/base_module.py @@ -0,0 +1,400 @@ +import paddle +import numpy as np +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.vision.models import resnet101 + +import deoldify.utils as U + + +class SequentialEx(nn.Layer): + "Like `nn.Sequential`, but with ModuleList semantics, and can access module input" + + def __init__(self, *layers): + super().__init__() + self.layers = nn.LayerList(layers) + + def forward(self, x): + res = x + for l in self.layers: + if isinstance(l, MergeLayer): + l.orig = x + nres = l(res) + # We have to remove res.orig to avoid hanging refs and therefore memory leaks + # l.orig = None + res = nres + return res + + def __getitem__(self, i): + return self.layers[i] + + def append(self, l): + return self.layers.append(l) + + def extend(self, l): + return self.layers.extend(l) + + def insert(self, i, l): + return self.layers.insert(i, l) + + +class Deoldify(SequentialEx): + def __init__(self, + encoder, + n_classes, + blur=False, + blur_final=True, + self_attention=False, + y_range=None, + last_cross=True, + bottle=False, + norm_type='Batch', + nf_factor=1, + **kwargs): + + imsize = (256, 256) + sfs_szs = U.model_sizes(encoder, size=imsize) + sfs_idxs = list(reversed(_get_sfs_idxs(sfs_szs))) + self.sfs = U.hook_outputs([encoder[i] for i in sfs_idxs], detach=False) + x = U.dummy_eval(encoder, imsize).detach() + + nf = 512 * nf_factor + extra_bn = norm_type == 'Spectral' + ni = sfs_szs[-1][1] + middle_conv = nn.Sequential( + custom_conv_layer(ni, ni * 2, norm_type=norm_type, extra_bn=extra_bn), + custom_conv_layer(ni * 2, ni, norm_type=norm_type, extra_bn=extra_bn), + ) + + layers = [encoder, nn.BatchNorm(ni), nn.ReLU(), middle_conv] + + for i, idx in enumerate(sfs_idxs): + not_final = i != len(sfs_idxs) - 1 + up_in_c, x_in_c = int(x.shape[1]), int(sfs_szs[idx][1]) + do_blur = blur and (not_final or blur_final) + sa = self_attention and (i == len(sfs_idxs) - 3) + + n_out = nf if not_final else nf // 2 + + unet_block = UnetBlockWide( + up_in_c, + x_in_c, + n_out, + self.sfs[i], + final_div=not_final, + blur=blur, + self_attention=sa, + norm_type=norm_type, + extra_bn=extra_bn, + **kwargs) + unet_block.eval() + layers.append(unet_block) + x = unet_block(x) + + ni = x.shape[1] + if imsize != sfs_szs[0][-2:]: + layers.append(PixelShuffle_ICNR(ni, **kwargs)) + if last_cross: + layers.append(MergeLayer(dense=True)) + ni += 3 + layers.append(res_block(ni, bottle=bottle, norm_type=norm_type, **kwargs)) + layers += [custom_conv_layer(ni, n_classes, ks=1, use_activ=False, norm_type=norm_type)] + if y_range is not None: + layers.append(SigmoidRange(*y_range)) + super().__init__(*layers) + + +def custom_conv_layer(ni: int, + nf: int, + ks: int = 3, + stride: int = 1, + padding: int = None, + bias: bool = None, + is_1d: bool = False, + norm_type='Batch', + use_activ: bool = True, + leaky: float = None, + transpose: bool = False, + self_attention: bool = False, + extra_bn: bool = False, + **kwargs): + "Create a sequence of convolutional (`ni` to `nf`), ReLU (if `use_activ`) and batchnorm (if `bn`) layers." + if padding is None: + padding = (ks - 1) // 2 if not transpose else 0 + bn = norm_type in ('Batch', 'Batchzero') or extra_bn == True + if bias is None: + bias = not bn + conv_func = nn.Conv2DTranspose if transpose else nn.Conv1d if is_1d else nn.Conv2D + + conv = conv_func(ni, nf, kernel_size=ks, bias_attr=bias, stride=stride, padding=padding) + if norm_type == 'Weight': + conv = nn.utils.weight_norm(conv) + elif norm_type == 'Spectral': + conv = U.Spectralnorm(conv) + layers = [conv] + if use_activ: + layers.append(relu(True, leaky=leaky)) + if bn: + layers.append((nn.BatchNorm if is_1d else nn.BatchNorm)(nf)) + if self_attention: + layers.append(SelfAttention(nf)) + + return nn.Sequential(*layers) + + +def relu(inplace: bool = False, leaky: float = None): + "Return a relu activation, maybe `leaky` and `inplace`." + return nn.LeakyReLU(leaky) if leaky is not None else nn.ReLU() + + +class UnetBlockWide(nn.Layer): + "A quasi-UNet block, using `PixelShuffle_ICNR upsampling`." + + def __init__(self, + up_in_c: int, + x_in_c: int, + n_out: int, + hook, + final_div: bool = True, + blur: bool = False, + leaky: float = None, + self_attention: bool = False, + **kwargs): + super().__init__() + self.hook = hook + up_out = x_out = n_out // 2 + self.shuf = CustomPixelShuffle_ICNR(up_in_c, up_out, blur=blur, leaky=leaky, **kwargs) + self.bn = nn.BatchNorm(x_in_c) + ni = up_out + x_in_c + self.conv = custom_conv_layer(ni, x_out, leaky=leaky, self_attention=self_attention, **kwargs) + self.relu = relu(leaky=leaky) + + def forward(self, up_in): + s = self.hook.stored + up_out = self.shuf(up_in) + ssh = s.shape[-2:] + if ssh != up_out.shape[-2:]: + up_out = F.interpolate(up_out, s.shape[-2:], mode='nearest') + cat_x = self.relu(paddle.concat([up_out, self.bn(s)], axis=1)) + return self.conv(cat_x) + + +class UnetBlockDeep(nn.Layer): + "A quasi-UNet block, using `PixelShuffle_ICNR upsampling`." + + def __init__( + self, + up_in_c: int, + x_in_c: int, + # hook: Hook, + final_div: bool = True, + blur: bool = False, + leaky: float = None, + self_attention: bool = False, + nf_factor: float = 1.0, + **kwargs): + super().__init__() + + self.shuf = CustomPixelShuffle_ICNR(up_in_c, up_in_c // 2, blur=blur, leaky=leaky, **kwargs) + self.bn = nn.BatchNorm(x_in_c) + ni = up_in_c // 2 + x_in_c + nf = int((ni if final_div else ni // 2) * nf_factor) + self.conv1 = custom_conv_layer(ni, nf, leaky=leaky, **kwargs) + self.conv2 = custom_conv_layer(nf, nf, leaky=leaky, self_attention=self_attention, **kwargs) + self.relu = relu(leaky=leaky) + + def forward(self, up_in): + s = self.hook.stored + up_out = self.shuf(up_in) + ssh = s.shape[-2:] + if ssh != up_out.shape[-2:]: + up_out = F.interpolate(up_out, s.shape[-2:], mode='nearest') + cat_x = self.relu(paddle.concat([up_out, self.bn(s)], axis=1)) + return self.conv2(self.conv1(cat_x)) + + +def ifnone(a, b): + "`a` if `a` is not None, otherwise `b`." + return b if a is None else a + + +class PixelShuffle_ICNR(nn.Layer): + "Upsample by `scale` from `ni` filters to `nf` (default `ni`), using `nn.PixelShuffle`, \ + `icnr` init, and `weight_norm`." + + def __init__(self, + ni: int, + nf: int = None, + scale: int = 2, + blur: bool = False, + norm_type='Weight', + leaky: float = None): + super().__init__() + nf = ifnone(nf, ni) + self.conv = conv_layer(ni, nf * (scale**2), ks=1, norm_type=norm_type, use_activ=False) + + self.shuf = PixelShuffle(scale) + + self.pad = ReplicationPad2d([1, 0, 1, 0]) + self.blur = nn.AvgPool2D(2, stride=1) + self.relu = relu(True, leaky=leaky) + + def forward(self, x): + x = self.shuf(self.relu(self.conv(x))) + return self.blur(self.pad(x)) if self.blur else x + + +def conv_layer(ni: int, + nf: int, + ks: int = 3, + stride: int = 1, + padding: int = None, + bias: bool = None, + is_1d: bool = False, + norm_type='Batch', + use_activ: bool = True, + leaky: float = None, + transpose: bool = False, + init=None, + self_attention: bool = False): + "Create a sequence of convolutional (`ni` to `nf`), ReLU (if `use_activ`) and batchnorm (if `bn`) layers." + if padding is None: padding = (ks - 1) // 2 if not transpose else 0 + bn = norm_type in ('Batch', 'BatchZero') + if bias is None: bias = not bn + conv_func = nn.Conv2DTranspose if transpose else nn.Conv1d if is_1d else nn.Conv2D + + conv = conv_func(ni, nf, kernel_size=ks, bias_attr=bias, stride=stride, padding=padding) + if norm_type == 'Weight': + conv = nn.utils.weight_norm(conv) + elif norm_type == 'Spectral': + conv = U.Spectralnorm(conv) + + layers = [conv] + if use_activ: layers.append(relu(True, leaky=leaky)) + if bn: layers.append((nn.BatchNorm if is_1d else nn.BatchNorm)(nf)) + if self_attention: layers.append(SelfAttention(nf)) + return nn.Sequential(*layers) + + +class CustomPixelShuffle_ICNR(nn.Layer): + "Upsample by `scale` from `ni` filters to `nf` (default `ni`), using `nn.PixelShuffle`, `icnr` init, \ + and `weight_norm`." + + def __init__(self, ni: int, nf: int = None, scale: int = 2, blur: bool = False, leaky: float = None, **kwargs): + super().__init__() + nf = ifnone(nf, ni) + self.conv = custom_conv_layer(ni, nf * (scale**2), ks=1, use_activ=False, **kwargs) + + self.shuf = PixelShuffle(scale) + + self.pad = ReplicationPad2d([1, 0, 1, 0]) + self.blur = paddle.nn.AvgPool2D(2, stride=1) + self.relu = nn.LeakyReLU(leaky) if leaky is not None else nn.ReLU() # relu(True, leaky=leaky) + + def forward(self, x): + x = self.shuf(self.relu(self.conv(x))) + return self.blur(self.pad(x)) if self.blur else x + + +class MergeLayer(nn.Layer): + "Merge a shortcut with the result of the module by adding them or concatenating thme if `dense=True`." + + def __init__(self, dense: bool = False): + super().__init__() + self.dense = dense + self.orig = None + + def forward(self, x): + out = paddle.concat([x, self.orig], axis=1) if self.dense else (x + self.orig) + self.orig = None + return out + + +def res_block(nf, dense: bool = False, norm_type='Batch', bottle: bool = False, **conv_kwargs): + "Resnet block of `nf` features. `conv_kwargs` are passed to `conv_layer`." + norm2 = norm_type + if not dense and (norm_type == 'Batch'): norm2 = 'BatchZero' + nf_inner = nf // 2 if bottle else nf + return SequentialEx( + conv_layer(nf, nf_inner, norm_type=norm_type, **conv_kwargs), + conv_layer(nf_inner, nf, norm_type=norm2, **conv_kwargs), MergeLayer(dense)) + + +class SigmoidRange(nn.Layer): + "Sigmoid module with range `(low,x_max)`" + + def __init__(self, low, high): + super().__init__() + self.low, self.high = low, high + + def forward(self, x): + return sigmoid_range(x, self.low, self.high) + + +def sigmoid_range(x, low, high): + "Sigmoid function with range `(low, high)`" + return F.sigmoid(x) * (high - low) + low + + +class PixelShuffle(nn.Layer): + def __init__(self, upscale_factor): + super(PixelShuffle, self).__init__() + self.upscale_factor = upscale_factor + + def forward(self, x): + return F.pixel_shuffle(x, self.upscale_factor) + + +class ReplicationPad2d(nn.Layer): + def __init__(self, size): + super(ReplicationPad2d, self).__init__() + self.size = size + + def forward(self, x): + return F.pad(x, self.size, mode="replicate") + + +def conv1d(ni: int, no: int, ks: int = 1, stride: int = 1, padding: int = 0, bias: bool = False): + "Create and initialize a `nn.Conv1d` layer with spectral normalization." + conv = nn.Conv1D(ni, no, ks, stride=stride, padding=padding, bias_attr=bias) + return U.Spectralnorm(conv) + + +class SelfAttention(nn.Layer): + "Self attention layer for nd." + + def __init__(self, n_channels): + super().__init__() + self.query = conv1d(n_channels, n_channels // 8) + self.key = conv1d(n_channels, n_channels // 8) + self.value = conv1d(n_channels, n_channels) + self.gamma = self.create_parameter( + shape=[1], default_initializer=paddle.nn.initializer.Constant(0.0)) # nn.Parameter(tensor([0.])) + + def forward(self, x): + # Notation from https://arxiv.org/pdf/1805.08318.pdf + size = x.shape + x = paddle.reshape(x, list(size[:2]) + [-1]) + f, g, h = self.query(x), self.key(x), self.value(x) + + beta = paddle.nn.functional.softmax(paddle.bmm(paddle.transpose(f, [0, 2, 1]), g), axis=1) + o = self.gamma * paddle.bmm(h, beta) + x + return paddle.reshape(o, size) + + +def _get_sfs_idxs(sizes): + "Get the indexes of the layers where the size of the activation changes." + feature_szs = [size[-1] for size in sizes] + sfs_idxs = list(np.where(np.array(feature_szs[:-1]) != np.array(feature_szs[1:]))[0]) + if feature_szs[0] != feature_szs[1]: + sfs_idxs = [0] + sfs_idxs + return sfs_idxs + + +def build_model(): + backbone = resnet101() + cut = -2 + encoder = nn.Sequential(*list(backbone.children())[:cut]) + + model = Deoldify(encoder, 3, blur=True, y_range=(-3, 3), norm_type='Spectral', self_attention=True, nf_factor=2) + return model diff --git a/modules/image/Image_editing/colorization/deoldify/module.py b/modules/image/Image_editing/colorization/deoldify/module.py new file mode 100644 index 00000000..1cbe28f1 --- /dev/null +++ b/modules/image/Image_editing/colorization/deoldify/module.py @@ -0,0 +1,165 @@ +# coding:utf-8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import glob + +import cv2 +import paddle +import paddle.nn as nn +import numpy as np +from PIL import Image +from tqdm import tqdm + +import deoldify.utils as U +from paddlehub.module.module import moduleinfo, serving, Module +from deoldify.base_module import build_model + + +@moduleinfo( + name="deoldify", + type="CV/image_editing", + author="paddlepaddle", + author_email="", + summary="Deoldify is a colorizaton model", + version="1.0.0") +class DeOldifyPredictor(Module): + def _initialize(self, render_factor: int = 32, output_path: int = 'result', load_checkpoint: str = None): + #super(DeOldifyPredictor, self).__init__() + self.model = build_model() + self.render_factor = render_factor + self.output = os.path.join(output_path, 'DeOldify') + if not os.path.exists(self.output): + os.makedirs(self.output) + if load_checkpoint is not None: + state_dict = paddle.load(load_checkpoint) + self.model.load_dict(state_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'DeOldify_stable.pdparams') + state_dict = paddle.load(checkpoint) + self.model.load_dict(state_dict) + print("load pretrained checkpoint success") + + def norm(self, img, render_factor=32, render_base=16): + target_size = render_factor * render_base + img = img.resize((target_size, target_size), resample=Image.BILINEAR) + + img = np.array(img).transpose([2, 0, 1]).astype('float32') / 255.0 + + img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) + img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + img -= img_mean + img /= img_std + return img.astype('float32') + + def denorm(self, img): + img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) + img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + img *= img_std + img += img_mean + img = img.transpose((1, 2, 0)) + + return (img * 255).clip(0, 255).astype('uint8') + + def post_process(self, raw_color, orig): + color_np = np.asarray(raw_color) + orig_np = np.asarray(orig) + color_yuv = cv2.cvtColor(color_np, cv2.COLOR_BGR2YUV) + orig_yuv = cv2.cvtColor(orig_np, cv2.COLOR_BGR2YUV) + hires = np.copy(orig_yuv) + hires[:, :, 1:3] = color_yuv[:, :, 1:3] + final = cv2.cvtColor(hires, cv2.COLOR_YUV2BGR) + return final + + def run_image(self, img): + if isinstance(img, str): + ori_img = Image.open(img).convert('LA').convert('RGB') + elif isinstance(img, np.ndarray): + ori_img = Image.fromarray(img).convert('LA').convert('RGB') + elif isinstance(img, Image.Image): + ori_img = img + + img = self.norm(ori_img, self.render_factor) + x = paddle.to_tensor(img[np.newaxis, ...]) + out = self.model(x) + + pred_img = self.denorm(out.numpy()[0]) + pred_img = Image.fromarray(pred_img) + pred_img = pred_img.resize(ori_img.size, resample=Image.BILINEAR) + pred_img = self.post_process(pred_img, ori_img) + pred_img = cv2.cvtColor(pred_img, cv2.COLOR_RGB2BGR) + return pred_img + + def run_video(self, video): + base_name = os.path.basename(video).split('.')[0] + output_path = os.path.join(self.output, base_name) + pred_frame_path = os.path.join(output_path, 'frames_pred') + + if not os.path.exists(output_path): + os.makedirs(output_path) + + if not os.path.exists(pred_frame_path): + os.makedirs(pred_frame_path) + + cap = cv2.VideoCapture(video) + fps = cap.get(cv2.CAP_PROP_FPS) + + out_path = U.video2frames(video, output_path) + + frames = sorted(glob.glob(os.path.join(out_path, '*.png'))) + + for frame in tqdm(frames): + pred_img = self.run_image(frame) + pred_img = cv2.cvtColor(pred_img, cv2.COLOR_BGR2RGB) + pred_img = Image.fromarray(pred_img) + frame_name = os.path.basename(frame) + pred_img.save(os.path.join(pred_frame_path, frame_name)) + + frame_pattern_combined = os.path.join(pred_frame_path, '%08d.png') + + vid_out_path = os.path.join(output_path, '{}_deoldify_out.mp4'.format(base_name)) + U.frames2video(frame_pattern_combined, vid_out_path, str(int(fps))) + print('Save video result at {}.'.format(vid_out_path)) + + return frame_pattern_combined, vid_out_path + + def predict(self, input): + if not os.path.exists(self.output): + os.makedirs(self.output) + + if not U.is_image(input): + return self.run_video(input) + else: + pred_img = self.run_image(input) + + if self.output: + base_name = os.path.splitext(os.path.basename(input))[0] + out_path = os.path.join(self.output, base_name + '.png') + cv2.imwrite(out_path, pred_img) + return pred_img, out_path + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = U.base64_to_cv2(images) + results = self.run_image(img=images_decode) + results = U.cv2_to_base64(results) + return results diff --git a/modules/image/Image_editing/colorization/deoldify/resnet.py b/modules/image/Image_editing/colorization/deoldify/resnet.py new file mode 100644 index 00000000..46196c6b --- /dev/null +++ b/modules/image/Image_editing/colorization/deoldify/resnet.py @@ -0,0 +1,332 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from __future__ import print_function + +import math +import paddle.fluid as fluid + +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear +from paddle.fluid.dygraph.container import Sequential + +from paddle.utils.download import get_weights_path_from_url + +__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152'] + +model_urls = { + 'resnet18': ('https://paddle-hapi.bj.bcebos.com/models/resnet18.pdparams', '0ba53eea9bc970962d0ef96f7b94057e'), + 'resnet34': ('https://paddle-hapi.bj.bcebos.com/models/resnet34.pdparams', '46bc9f7c3dd2e55b7866285bee91eff3'), + 'resnet50': ('https://paddle-hapi.bj.bcebos.com/models/resnet50.pdparams', '5ce890a9ad386df17cf7fe2313dca0a1'), + 'resnet101': ('https://paddle-hapi.bj.bcebos.com/models/resnet101.pdparams', 'fb07a451df331e4b0bb861ed97c3a9b9'), + 'resnet152': ('https://paddle-hapi.bj.bcebos.com/models/resnet152.pdparams', 'f9c700f26d3644bb76ad2226ed5f5713'), +} + + +class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters, filter_size, stride=1, groups=1, act=None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + bias_attr=False) + + self._batch_norm = BatchNorm(num_filters, act=act) + + def forward(self, inputs): + x = self._conv(inputs) + x = self._batch_norm(x) + + return x + + +class BasicBlock(fluid.dygraph.Layer): + """residual block of resnet18 and resnet34 + """ + expansion = 1 + + def __init__(self, num_channels, num_filters, stride, shortcut=True): + super(BasicBlock, self).__init__() + + self.conv0 = ConvBNLayer(num_channels=num_channels, num_filters=num_filters, filter_size=3, act='relu') + self.conv1 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters, filter_size=3, stride=stride, act='relu') + + if not shortcut: + self.short = ConvBNLayer(num_channels=num_channels, num_filters=num_filters, filter_size=1, stride=stride) + + self.shortcut = shortcut + + def forward(self, inputs): + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = short + conv1 + + return fluid.layers.relu(y) + + +class BottleneckBlock(fluid.dygraph.Layer): + """residual block of resnet50, resnet101 amd resnet152 + """ + + expansion = 4 + + def __init__(self, num_channels, num_filters, stride, shortcut=True): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer(num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu') + self.conv1 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters, filter_size=3, stride=stride, act='relu') + self.conv2 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters * self.expansion, filter_size=1, act=None) + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters * self.expansion, filter_size=1, stride=stride) + + self.shortcut = shortcut + + self._num_channels_out = num_filters * self.expansion + + def forward(self, inputs): + x = self.conv0(inputs) + conv1 = self.conv1(x) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + x = fluid.layers.elementwise_add(x=short, y=conv2) + + return fluid.layers.relu(x) + + +class ResNet(fluid.dygraph.Layer): + """ResNet model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + Block (BasicBlock|BottleneckBlock): block module of model. + depth (int): layers of resnet, default: 50. + num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer + will not be defined. Default: 1000. + with_pool (bool): use pool before the last fc layer or not. Default: True. + classifier_activation (str): activation for the last fc layer. Default: 'softmax'. + + Examples: + .. code-block:: python + + from paddle.vision.models import ResNet + from paddle.vision.models.resnet import BottleneckBlock, BasicBlock + + resnet50 = ResNet(BottleneckBlock, 50) + + resnet18 = ResNet(BasicBlock, 18) + + """ + + def __init__(self, Block, depth=50, num_classes=1000, with_pool=True, classifier_activation='softmax'): + super(ResNet, self).__init__() + + self.num_classes = num_classes + self.with_pool = with_pool + + layer_config = { + 18: [2, 2, 2, 2], + 34: [3, 4, 6, 3], + 50: [3, 4, 6, 3], + 101: [3, 4, 23, 3], + 152: [3, 8, 36, 3], + } + assert depth in layer_config.keys(), \ + "supported depth are {} but input layer is {}".format( + layer_config.keys(), depth) + + layers = layer_config[depth] + + in_channels = 64 + out_channels = [64, 128, 256, 512] + + self.conv = ConvBNLayer(num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu') + self.pool = Pool2D(pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + + self.layers = [] + for idx, num_blocks in enumerate(layers): + blocks = [] + shortcut = False + for b in range(num_blocks): + if b == 1: + in_channels = out_channels[idx] * Block.expansion + block = Block( + num_channels=in_channels, + num_filters=out_channels[idx], + stride=2 if b == 0 and idx != 0 else 1, + shortcut=shortcut) + blocks.append(block) + shortcut = True + layer = self.add_sublayer("layer_{}".format(idx), Sequential(*blocks)) + self.layers.append(layer) + + if with_pool: + self.global_pool = Pool2D(pool_size=7, pool_type='avg', global_pooling=True) + + if num_classes > 0: + stdv = 1.0 / math.sqrt(out_channels[-1] * Block.expansion * 1.0) + self.fc_input_dim = out_channels[-1] * Block.expansion * 1 * 1 + self.fc = Linear( + self.fc_input_dim, + num_classes, + act=classifier_activation, + param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Uniform(-stdv, stdv))) + + def forward(self, inputs): + x = self.conv(inputs) + x = self.pool(x) + for layer in self.layers: + x = layer(x) + + if self.with_pool: + x = self.global_pool(x) + + if self.num_classes > -1: + x = fluid.layers.reshape(x, shape=[-1, self.fc_input_dim]) + x = self.fc(x) + return x + + +def _resnet(arch, Block, depth, pretrained, **kwargs): + model = ResNet(Block, depth, **kwargs) + if pretrained: + assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format( + arch) + weight_path = get_weights_path_from_url(model_urls[arch][0], model_urls[arch][1]) + assert weight_path.endswith('.pdparams'), "suffix of weight must be .pdparams" + param, _ = fluid.load_dygraph(weight_path) + model.set_dict(param) + + return model + + +def resnet18(pretrained=False, **kwargs): + """ResNet 18-layer model + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + + Examples: + .. code-block:: python + + from paddle.vision.models import resnet18 + + # build model + model = resnet18() + + # build model and load imagenet pretrained weight + # model = resnet18(pretrained=True) + """ + return _resnet('resnet18', BasicBlock, 18, pretrained, **kwargs) + + +def resnet34(pretrained=False, **kwargs): + """ResNet 34-layer model + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + + Examples: + .. code-block:: python + + from paddle.vision.models import resnet34 + + # build model + model = resnet34() + + # build model and load imagenet pretrained weight + # model = resnet34(pretrained=True) + """ + return _resnet('resnet34', BasicBlock, 34, pretrained, **kwargs) + + +def resnet50(pretrained=False, **kwargs): + """ResNet 50-layer model + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + + Examples: + .. code-block:: python + + from paddle.vision.models import resnet50 + + # build model + model = resnet50() + + # build model and load imagenet pretrained weight + # model = resnet50(pretrained=True) + """ + return _resnet('resnet50', BottleneckBlock, 50, pretrained, **kwargs) + + +def resnet101(pretrained=False, **kwargs): + """ResNet 101-layer model + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + + Examples: + .. code-block:: python + + from paddle.vision.models import resnet101 + + # build model + model = resnet101() + + # build model and load imagenet pretrained weight + # model = resnet101(pretrained=True) + """ + return _resnet('resnet101', BottleneckBlock, 101, pretrained, **kwargs) + + +def resnet152(pretrained=False, **kwargs): + """ResNet 152-layer model + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + + Examples: + .. code-block:: python + + from paddle.vision.models import resnet152 + + # build model + model = resnet152() + + # build model and load imagenet pretrained weight + # model = resnet152(pretrained=True) + """ + return _resnet('resnet152', BottleneckBlock, 152, pretrained, **kwargs) diff --git a/modules/image/Image_editing/colorization/deoldify/utils.py b/modules/image/Image_editing/colorization/deoldify/utils.py new file mode 100644 index 00000000..a9969153 --- /dev/null +++ b/modules/image/Image_editing/colorization/deoldify/utils.py @@ -0,0 +1,220 @@ +import os +import sys +import base64 + +import cv2 +import numpy as np +import paddle +import paddle.nn as nn +from PIL import Image + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def is_listy(x): + return isinstance(x, (tuple, list)) + + +class Hook(): + "Create a hook on `m` with `hook_func`." + + def __init__(self, m, hook_func, is_forward=True, detach=True): + self.hook_func, self.detach, self.stored = hook_func, detach, None + f = m.register_forward_post_hook if is_forward else m.register_backward_hook + self.hook = f(self.hook_fn) + self.removed = False + + def hook_fn(self, module, input, output): + "Applies `hook_func` to `module`, `input`, `output`." + if self.detach: + input = (o.detach() for o in input) if is_listy(input) else input.detach() + output = (o.detach() for o in output) if is_listy(output) else output.detach() + self.stored = self.hook_func(module, input, output) + + def remove(self): + "Remove the hook from the model." + if not self.removed: + self.hook.remove() + self.removed = True + + def __enter__(self, *args): + return self + + def __exit__(self, *args): + self.remove() + + +class Hooks(): + "Create several hooks on the modules in `ms` with `hook_func`." + + def __init__(self, ms, hook_func, is_forward=True, detach=True): + self.hooks = [] + try: + for m in ms: + self.hooks.append(Hook(m, hook_func, is_forward, detach)) + except Exception as e: + pass + + def __getitem__(self, i: int) -> Hook: + return self.hooks[i] + + def __len__(self) -> int: + return len(self.hooks) + + def __iter__(self): + return iter(self.hooks) + + @property + def stored(self): + return [o.stored for o in self] + + def remove(self): + "Remove the hooks from the model." + for h in self.hooks: + h.remove() + + def __enter__(self, *args): + return self + + def __exit__(self, *args): + self.remove() + + +def _hook_inner(m, i, o): + return o if isinstance(o, paddle.fluid.framework.Variable) else o if is_listy(o) else list(o) + + +def hook_output(module, detach=True, grad=False): + "Return a `Hook` that stores activations of `module` in `self.stored`" + return Hook(module, _hook_inner, detach=detach, is_forward=not grad) + + +def hook_outputs(modules, detach=True, grad=False): + "Return `Hooks` that store activations of all `modules` in `self.stored`" + return Hooks(modules, _hook_inner, detach=detach, is_forward=not grad) + + +def model_sizes(m, size=(64, 64)): + "Pass a dummy input through the model `m` to get the various sizes of activations." + with hook_outputs(m) as hooks: + x = dummy_eval(m, size) + return [o.stored.shape for o in hooks] + + +def dummy_eval(m, size=(64, 64)): + "Pass a `dummy_batch` in evaluation mode in `m` with `size`." + m.eval() + return m(dummy_batch(size)) + + +def dummy_batch(size=(64, 64), ch_in=3): + "Create a dummy batch to go through `m` with `size`." + arr = np.random.rand(1, ch_in, *size).astype('float32') * 2 - 1 + return paddle.to_tensor(arr) + + +class _SpectralNorm(nn.SpectralNorm): + def __init__(self, weight_shape, dim=0, power_iters=1, eps=1e-12, dtype='float32'): + super(_SpectralNorm, self).__init__(weight_shape, dim, power_iters, eps, dtype) + + def forward(self, weight): + inputs = {'Weight': weight, 'U': self.weight_u, 'V': self.weight_v} + out = self._helper.create_variable_for_type_inference(self._dtype) + _power_iters = self._power_iters if self.training else 0 + self._helper.append_op( + type="spectral_norm", + inputs=inputs, + outputs={ + "Out": out, + }, + attrs={ + "dim": self._dim, + "power_iters": _power_iters, + "eps": self._eps, + }) + + return out + + +class Spectralnorm(paddle.nn.Layer): + def __init__(self, layer, dim=0, power_iters=1, eps=1e-12, dtype='float32'): + super(Spectralnorm, self).__init__() + self.spectral_norm = _SpectralNorm(layer.weight.shape, dim, power_iters, eps, dtype) + self.dim = dim + self.power_iters = power_iters + self.eps = eps + self.layer = layer + weight = layer._parameters['weight'] + del layer._parameters['weight'] + self.weight_orig = self.create_parameter(weight.shape, dtype=weight.dtype) + self.weight_orig.set_value(weight) + + def forward(self, x): + weight = self.spectral_norm(self.weight_orig) + self.layer.weight = weight + out = self.layer(x) + return out + + +def video2frames(video_path, outpath, **kargs): + def _dict2str(kargs): + cmd_str = '' + for k, v in kargs.items(): + cmd_str += (' ' + str(k) + ' ' + str(v)) + return cmd_str + + ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error '] + vid_name = video_path.split('/')[-1].split('.')[0] + out_full_path = os.path.join(outpath, vid_name) + + if not os.path.exists(out_full_path): + os.makedirs(out_full_path) + + # video file name + outformat = out_full_path + '/%08d.png' + + cmd = ffmpeg + cmd = ffmpeg + [' -i ', video_path, ' -start_number ', ' 0 ', outformat] + + cmd = ''.join(cmd) + _dict2str(kargs) + + if os.system(cmd) != 0: + raise RuntimeError('ffmpeg process video: {} error'.format(vid_name)) + + sys.stdout.flush() + return out_full_path + + +def frames2video(frame_path, video_path, r): + + ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error '] + cmd = ffmpeg + [ + ' -r ', r, ' -f ', ' image2 ', ' -i ', frame_path, ' -vcodec ', ' libx264 ', ' -pix_fmt ', ' yuv420p ', + ' -crf ', ' 16 ', video_path + ] + cmd = ''.join(cmd) + + if os.system(cmd) != 0: + raise RuntimeError('ffmpeg process video: {} error'.format(video_path)) + + sys.stdout.flush() + + +def is_image(input): + try: + img = Image.open(input) + _ = img.size + + return True + except: + return False diff --git a/modules/image/Image_editing/colorization/photo_restoration/README.md b/modules/image/Image_editing/colorization/photo_restoration/README.md new file mode 100644 index 00000000..653b313c --- /dev/null +++ b/modules/image/Image_editing/colorization/photo_restoration/README.md @@ -0,0 +1,98 @@ +## 模型概述 + +photo_restoration 是针对老照片修复的模型。它主要由两个部分组成:着色和超分。着色模型基于deoldify +,超分模型基于realsr. 用户可以根据自己的需求选择对图像进行着色或超分操作。因此在使用该模型时,请预先安装deoldify和realsr两个模型。 + + +## API + +```python +def run_image(self, + input, + model_select= ['Colorization', 'SuperResolution'], + save_path = 'photo_restoration'): +``` + +预测API,用于图片修复。 + +**参数** + +* input (numpy.ndarray|str): 图片数据,numpy.ndarray 或者 str形式。ndarray.shape 为 \[H, W, C\],BGR格式; str为图片的路径。 + +* model_select (list\[str\]): 选择对图片对操作,\['Colorization'\]对图像只进行着色操作, \['SuperResolution'\]对图像只进行超分操作; +默认值为\['Colorization', 'SuperResolution'\]。 + +* save_path (str): 保存图片的路径, 默认为'photo_restoration'。 + +**返回** + +* output (numpy.ndarray): 照片修复结果,ndarray.shape 为 \[H, W, C\],BGR格式。 + + + +## 代码示例 + +图片修复代码示例: + +```python +import cv2 +import paddlehub as hub + +model = hub.Module(name='photo_restoration', visualization=True) +im = cv2.imread('/PATH/TO/IMAGE') +res = model.run_image(im) + +``` + +## 服务部署 + +PaddleHub Serving可以部署一个照片修复的在线服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: + +```shell +$ hub serving start -m photo_restoration +``` + +这样就完成了一个照片修复的服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import base64 + +import cv2 +import numpy as np + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('PATH/TO/IMAGE') +data = {'images':cv2_to_base64(org_im), 'model_select': ['Colorization', 'SuperResolution']} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/photo_restoration" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) +img = base64_to_cv2(r.json()["results"]) +cv2.imwrite('PATH/TO/SAVE/IMAGE', img) +``` + +### 依赖 + +paddlepaddle >= 2.0.0rc + +paddlehub >= 1.8.2 diff --git a/modules/image/Image_editing/colorization/photo_restoration/module.py b/modules/image/Image_editing/colorization/photo_restoration/module.py new file mode 100644 index 00000000..83903748 --- /dev/null +++ b/modules/image/Image_editing/colorization/photo_restoration/module.py @@ -0,0 +1,84 @@ +# coding:utf-8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import time + +import cv2 +import paddle.nn as nn +import paddlehub as hub +from paddlehub.module.module import moduleinfo, serving, Module + +import photo_restoration.utils as U + + +@moduleinfo( + name="photo_restoration", + type="CV/image_editing", + author="paddlepaddle", + author_email="", + summary="photo_restoration is a photo restoration model based on deoldify and realsr.", + version="1.0.0") +class PhotoRestoreModel(Module): + """ + PhotoRestoreModel + + Args: + load_checkpoint(str): Checkpoint save path, default is None. + visualization (bool): Whether to save the estimation result. Default is True. + """ + + def _initialize(self, visualization: bool = False): + #super(PhotoRestoreModel, self).__init__() + self.deoldify = hub.Module(name='deoldify') + self.realsr = hub.Module(name='realsr') + self.visualization = visualization + + def run_image(self, + input, + model_select: list = ['Colorization', 'SuperResolution'], + save_path: str = 'photo_restoration'): + self.models = [] + for model in model_select: + print('\n {} model proccess start..'.format(model)) + if model == 'Colorization': + self.deoldify.eval() + self.models.append(self.deoldify) + if model == 'SuperResolution': + self.realsr.eval() + self.models.append(self.realsr) + + for model in self.models: + output = model.run_image(input) + input = output + if self.visualization: + if not os.path.exists(save_path): + os.mkdir(save_path) + img_name = str(time.time()) + '.png' + save_img = os.path.join(save_path, img_name) + cv2.imwrite(save_img, output) + print("save result at: ", save_img) + + return output + + @serving + def serving_method(self, images, model_select): + """ + Run as a service. + """ + print(model_select) + images_decode = U.base64_to_cv2(images) + results = self.run_image(input=images_decode, model_select=model_select) + results = U.cv2_to_base64(results) + return results diff --git a/modules/image/Image_editing/colorization/photo_restoration/utils.py b/modules/image/Image_editing/colorization/photo_restoration/utils.py new file mode 100644 index 00000000..c3756451 --- /dev/null +++ b/modules/image/Image_editing/colorization/photo_restoration/utils.py @@ -0,0 +1,15 @@ +import base64 +import cv2 +import numpy as np + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data diff --git a/modules/image/Image_editing/super_resolution/dcscn/README.md b/modules/image/Image_editing/super_resolution/dcscn/README.md new file mode 100644 index 00000000..da9bfa44 --- /dev/null +++ b/modules/image/Image_editing/super_resolution/dcscn/README.md @@ -0,0 +1,134 @@ +## 模型概述 + +DCSCN是基于Fast and Accurate Image Super Resolution by Deep CNN with Skip Connection and Network in Network设计的轻量化超分辨模型。该模型使用残差结构和跳连的方式构建网络来提取局部和全局特征,同时使用并行1*1的卷积网络学习细节特征提升模型性能。该模型提供的超分倍数为2倍。 + +## 命令行预测 + +``` +$ hub run dcscn --input_path "/PATH/TO/IMAGE" + +``` + +## API + +```python +def reconstruct(self, + images=None, + paths=None, + use_gpu=False, + visualization=False, + output_dir="dcscn_output") +``` + +预测API,用于图像超分辨率。 + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; +* paths (list\[str\]): 图片的路径; +* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; +* visualization (bool): 是否将识别结果保存为图片文件; +* output\_dir (str): 图片的保存路径。 + +**返回** + +* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: + * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); + * data (numpy.ndarray): 超分辨后图像。 + +```python +def save_inference_model(self, + dirname='dcscn_save_model', + model_filename=None, + params_filename=None, + combined=False) +``` + +将模型保存到指定路径。 + +**参数** + +* dirname: 存在模型的目录名称 +* model\_filename: 模型文件名称,默认为\_\_model\_\_ +* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) +* combined: 是否将参数保存到统一的一个文件中 + +## 代码示例 + +```python +import cv2 +import paddlehub as hub + +sr_model = hub.Module(name='dcscn') +im = cv2.imread('/PATH/TO/IMAGE').astype('float32') +#visualization=True可以用于查看超分图片效果,可设置为False提升运行速度。 +res = sr_model.reconstruct(images=[im], visualization=True) +print(res[0]['data']) +sr_model.save_inference_model() +``` + +## 服务部署 + +PaddleHub Serving可以部署一个图像超分的在线服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: + +```shell +$ hub serving start -m dcscn +``` + +这样就完成了一个超分任务的服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import base64 + +import cv2 +import numpy as np + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 + +org_im = cv2.imread('/PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/dcscn" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +sr = np.expand_dims(cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY), axis=2) +shape =sr.shape +org_im = cv2.cvtColor(org_im, cv2.COLOR_BGR2YUV) +uv = cv2.resize(org_im[...,1:], (shape[1], shape[0]), interpolation=cv2.INTER_CUBIC) +combine_im = cv2.cvtColor(np.concatenate((sr, uv), axis=2), cv2.COLOR_YUV2BGR) +cv2.imwrite('dcscn_X2.png', combine_im) +print("save image as dcscn_X2.png") + +``` +### 查看代码 + +https://github.com/jiny2001/dcscn-super-resolution + + + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.7.1 diff --git a/modules/image/Image_editing/super_resolution/dcscn/data_feed.py b/modules/image/Image_editing/super_resolution/dcscn/data_feed.py new file mode 100644 index 00000000..10eeba2e --- /dev/null +++ b/modules/image/Image_editing/super_resolution/dcscn/data_feed.py @@ -0,0 +1,61 @@ +# -*- coding:utf-8 -*- +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader'] + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path).astype('float32') + each['org_im'] = im + each['org_im_path'] = im_path + each['org_im_shape'] = im.shape + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + im = im.astype(np.float32) + each = OrderedDict() + each['org_im'] = im + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_shape'] = im.shape + component.append(each) + + for element in component: + img = element['org_im'].copy() + img = cv2.cvtColor(img, cv2.COLOR_BGR2YUV) + shape = img.shape + img_x = np.expand_dims(img[:, :, 0], axis=2) + img_x2 = np.expand_dims(cv2.resize(img_x, (shape[1] * 2, shape[0] * 2), interpolation=cv2.INTER_CUBIC), axis=2) + img_x = img_x.transpose((2, 0, 1)) / 255 + img_x2 = img_x2.transpose(2, 0, 1) / 255 + img_x = img_x.astype(np.float32) + img_x2 = img_x2.astype(np.float32) + element['img_x'] = img_x + element['img_x2'] = img_x2 + yield element + + +if __name__ == "__main__": + path = ['photo.jpg'] + reader(paths=path) diff --git a/modules/image/Image_editing/super_resolution/dcscn/module.py b/modules/image/Image_editing/super_resolution/dcscn/module.py new file mode 100644 index 00000000..96b2715b --- /dev/null +++ b/modules/image/Image_editing/super_resolution/dcscn/module.py @@ -0,0 +1,202 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ast +import os +import argparse + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from dcscn.data_feed import reader +from dcscn.processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir + + +@moduleinfo( + name="dcscn", + type="CV/image_editing", + author="paddlepaddle", + author_email="", + summary="dcscn is a super resolution model.", + version="1.0.0") +class Dcscn(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "dcscn_model") + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + self.model_file_path = self.default_pretrained_model_path + cpu_config = AnalysisConfig(self.model_file_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.model_file_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def reconstruct(self, images=None, paths=None, use_gpu=False, visualization=False, output_dir="dcscn_output"): + """ + API for super resolution. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR. + paths (list[str]): The paths of images. + use_gpu (bool): Whether to use gpu. + visualization (bool): Whether to save image or not. + output_dir (str): The path to store output images. + + Returns: + res (list[dict]): each element in the list is a dict, the keys and values are: + save_path (str, optional): the path to save images. (Exists only if visualization is True) + data (numpy.ndarray): data of post processed image. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + res = list() + + for i in range(total_num): + image_x = np.array([all_data[i]['img_x']]) + image_x2 = np.array([all_data[i]['img_x2']]) + dropout = np.array([0]) + image_x = PaddleTensor(image_x.copy()) + image_x2 = PaddleTensor(image_x2.copy()) + drop_out = PaddleTensor(dropout.copy()) + output = self.gpu_predictor.run([image_x, image_x2]) if use_gpu else self.cpu_predictor.run( + [image_x, image_x2]) + + output = np.expand_dims(output[0].as_ndarray(), axis=1) + + out = postprocess( + data_out=output, + org_im=all_data[i]['org_im'], + org_im_shape=all_data[i]['org_im_shape'], + org_im_path=all_data[i]['org_im_path'], + output_dir=output_dir, + visualization=visualization) + res.append(out) + return res + + def save_inference_model(self, + dirname='dcscn_save_model', + model_filename=None, + params_filename=None, + combined=False): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.reconstruct(images=images_decode, **kwargs) + results = [{'data': cv2_to_base64(result['data'])} for result in results] + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.reconstruct( + paths=[args.input_path], use_gpu=args.use_gpu, output_dir=args.output_dir, visualization=args.visualization) + if args.save_dir is not None: + check_dir(args.save_dir) + self.save_inference_model(args.save_dir) + + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='dcscn_output', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--save_dir', type=str, default='dcscn_save_model', help="The directory to save model.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=True, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + + +if __name__ == "__main__": + module = Dcscn() + #module.reconstruct(paths=["BSD100_001.png","BSD100_002.png"]) + import cv2 + img = cv2.imread("BSD100_001.png").astype('float32') + res = module.reconstruct(images=[img]) + module.save_inference_model() diff --git a/modules/image/Image_editing/super_resolution/dcscn/processor.py b/modules/image/Image_editing/super_resolution/dcscn/processor.py new file mode 100644 index 00000000..04ac460e --- /dev/null +++ b/modules/image/Image_editing/super_resolution/dcscn/processor.py @@ -0,0 +1,82 @@ +# -*- coding:utf-8 -*- +import os +import time +import base64 + +import cv2 +import numpy as np + +__all__ = ['cv2_to_base64', 'base64_to_cv2', 'postprocess'] + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def postprocess(data_out, org_im, org_im_shape, org_im_path, output_dir, visualization): + """ + Postprocess output of network. one image at a time. + + Args: + data_out (numpy.ndarray): output of network. + org_im (numpy.ndarray): original image. + org_im_shape (list): shape pf original image. + org_im_path (list): path of riginal image. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + + Returns: + result (dict): The data of processed image. + """ + result = dict() + for sr in data_out: + sr = np.squeeze(sr, 0) + sr = np.clip(sr * 255, 0, 255) + sr = sr.astype(np.uint8) + shape = sr.shape + if visualization: + org_im = cv2.cvtColor(org_im, cv2.COLOR_BGR2YUV) + uv = cv2.resize(org_im[..., 1:], (shape[1], shape[0]), interpolation=cv2.INTER_CUBIC) + combine_im = cv2.cvtColor(np.concatenate((sr, uv), axis=2), cv2.COLOR_YUV2BGR) + check_dir(output_dir) + save_im_path = get_save_image_name(org_im, org_im_path, output_dir) + cv2.imwrite(save_im_path, combine_im) + print("save image at: ", save_im_path) + result['save_path'] = save_im_path + result['data'] = sr + else: + result['data'] = sr + + return result + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(org_im, org_im_path, output_dir): + """ + Get save image name from source image path. + """ + # name prefix of orginal image + org_im_name = os.path.split(org_im_path)[-1] + im_prefix = os.path.splitext(org_im_name)[0] + ext = '.png' + # save image path + save_im_path = os.path.join(output_dir, im_prefix + ext) + if os.path.exists(save_im_path): + save_im_path = os.path.join(output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) + + return save_im_path diff --git a/modules/image/Image_editing/super_resolution/falsr_a/README.md b/modules/image/Image_editing/super_resolution/falsr_a/README.md new file mode 100644 index 00000000..2981753c --- /dev/null +++ b/modules/image/Image_editing/super_resolution/falsr_a/README.md @@ -0,0 +1,126 @@ +## 模型概述 + +falsr_a是基于Fast, Accurate and Lightweight Super-Resolution with Neural Architecture Search设计的轻量化超分辨模型。该模型使用多目标方法处理超分问题,同时使用基于混合控制器的弹性搜索策略来提升模型性能。该模型提供的超分倍数为2倍。 + +## 命令行预测 + +``` +$ hub run falsr_a --input_path "/PATH/TO/IMAGE" + +``` + +## API + +```python +def reconstruct(self, + images=None, + paths=None, + use_gpu=False, + visualization=False, + output_dir="falsr_a_output") +``` + +预测API,用于图像超分辨率。 + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; +* paths (list\[str\]): 图片的路径; +* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; +* visualization (bool): 是否将识别结果保存为图片文件; +* output\_dir (str): 图片的保存路径。 + +**返回** + +* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: + * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); + * data (numpy.ndarray): 超分辨后图像。 + +```python +def save_inference_model(self, + dirname='falsr_a_save_model', + model_filename=None, + params_filename=None, + combined=False) +``` + +将模型保存到指定路径。 + +**参数** + +* dirname: 存在模型的目录名称 +* model\_filename: 模型文件名称,默认为\_\_model\_\_ +* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) +* combined: 是否将参数保存到统一的一个文件中 + +## 代码示例 + +```python +import cv2 +import paddlehub as hub + +sr_model = hub.Module(name='falsr_a') +im = cv2.imread('/PATH/TO/IMAGE').astype('float32') +#visualization=True可以用于查看超分图片效果,可设置为False提升运行速度。 +res = sr_model.reconstruct(images=[im], visualization=True) +print(res[0]['data']) +sr_model.save_inference_model() +``` + +## 服务部署 + +PaddleHub Serving可以部署一个图像超分的在线服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: + +```shell +$ hub serving start -m falsr_a +``` + +这样就完成了一个超分任务的服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import base64 + +import cv2 +import numpy as np + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('/PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/falsr_a" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) +sr = base64_to_cv2(r.json()["results"][0]['data']) +cv2.imwrite('falsr_a_X2.png', sr) +print("save image as falsr_a_X2.png") +``` +### 查看代码 + +https://github.com/xiaomi-automl/FALSR + + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.7.1 diff --git a/modules/image/Image_editing/super_resolution/falsr_a/data_feed.py b/modules/image/Image_editing/super_resolution/falsr_a/data_feed.py new file mode 100644 index 00000000..8aa6514b --- /dev/null +++ b/modules/image/Image_editing/super_resolution/falsr_a/data_feed.py @@ -0,0 +1,60 @@ +# -*- coding:utf-8 -*- +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader'] + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path).astype('float32') + each['org_im'] = im + each['org_im_path'] = im_path + each['org_im_shape'] = im.shape + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + im = im.astype(np.float32) + each = OrderedDict() + each['org_im'] = im + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_shape'] = im.shape + component.append(each) + + for element in component: + img = element['org_im'].copy() + img = cv2.cvtColor(img, cv2.COLOR_BGR2YUV) + shape = img.shape + img_scale = cv2.resize(img, (shape[1] * 2, shape[0] * 2), interpolation=cv2.INTER_CUBIC) + img_y = np.expand_dims(img[:, :, 0], axis=2) + img_scale_pbpr = img_scale[..., 1:] + img_y = img_y.transpose((2, 0, 1)) / 255 + img_scale_pbpr = img_scale_pbpr.transpose(2, 0, 1) / 255 + element['img_y'] = img_y + element['img_scale_pbpr'] = img_scale_pbpr + yield element + + +if __name__ == "__main__": + path = ['BSD100_001.png'] + reader(paths=path) diff --git a/modules/image/Image_editing/super_resolution/falsr_a/module.py b/modules/image/Image_editing/super_resolution/falsr_a/module.py new file mode 100644 index 00000000..5d1d1843 --- /dev/null +++ b/modules/image/Image_editing/super_resolution/falsr_a/module.py @@ -0,0 +1,195 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ast +import os +import argparse + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from falsr_a.data_feed import reader +from falsr_a.processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir + + +@moduleinfo( + name="falsr_a", + type="CV/image_editing", + author="paddlepaddle", + author_email="", + summary="falsr_a is a super resolution model.", + version="1.0.0") +class Falsr_A(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "falsr_a_model") + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + self.model_file_path = self.default_pretrained_model_path + cpu_config = AnalysisConfig(self.model_file_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.model_file_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def reconstruct(self, images=None, paths=None, use_gpu=False, visualization=False, output_dir="falsr_a_output"): + """ + API for super resolution. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR. + paths (list[str]): The paths of images. + use_gpu (bool): Whether to use gpu. + visualization (bool): Whether to save image or not. + output_dir (str): The path to store output images. + + Returns: + res (list[dict]): each element in the list is a dict, the keys and values are: + save_path (str, optional): the path to save images. (Exists only if visualization is True) + data (numpy.ndarray): data of post processed image. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + res = list() + + for i in range(total_num): + image_y = np.array([all_data[i]['img_y']]) + image_scale_pbpr = np.array([all_data[i]['img_scale_pbpr']]) + image_y = PaddleTensor(image_y.copy()) + image_scale_pbpr = PaddleTensor(image_scale_pbpr.copy()) + output = self.gpu_predictor.run([image_y, image_scale_pbpr]) if use_gpu else self.cpu_predictor.run( + [image_y, image_scale_pbpr]) + output = np.expand_dims(output[0].as_ndarray(), axis=1) + out = postprocess( + data_out=output, + org_im=all_data[i]['org_im'], + org_im_shape=all_data[i]['org_im_shape'], + org_im_path=all_data[i]['org_im_path'], + output_dir=output_dir, + visualization=visualization) + res.append(out) + return res + + def save_inference_model(self, + dirname='falsr_a_save_model', + model_filename=None, + params_filename=None, + combined=False): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.reconstruct(images=images_decode, **kwargs) + results = [{'data': cv2_to_base64(result['data'])} for result in results] + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.reconstruct( + paths=[args.input_path], use_gpu=args.use_gpu, output_dir=args.output_dir, visualization=args.visualization) + if args.save_dir is not None: + check_dir(args.save_dir) + self.save_inference_model(args.save_dir) + + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='falsr_a_output', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--save_dir', type=str, default='falsr_a_save_model', help="The directory to save model.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=True, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + + +if __name__ == "__main__": + module = Falsr_A() + module.reconstruct(paths=["BSD100_001.png", "BSD100_002.png", "Set5_003.png"]) + module.save_inference_model() diff --git a/modules/image/Image_editing/super_resolution/falsr_a/processor.py b/modules/image/Image_editing/super_resolution/falsr_a/processor.py new file mode 100644 index 00000000..805ada4d --- /dev/null +++ b/modules/image/Image_editing/super_resolution/falsr_a/processor.py @@ -0,0 +1,80 @@ +# -*- coding:utf-8 -*- +import os +import time +import base64 + +import cv2 +import numpy as np + +__all__ = ['cv2_to_base64', 'base64_to_cv2', 'postprocess'] + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def postprocess(data_out, org_im, org_im_shape, org_im_path, output_dir, visualization): + """ + Postprocess output of network. one image at a time. + + Args: + data_out (numpy.ndarray): output of network. + org_im (numpy.ndarray): original image. + org_im_shape (list): shape pf original image. + org_im_path (list): path of riginal image. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + + Returns: + result (dict): The data of processed image. + """ + result = dict() + for sr in data_out: + sr = np.squeeze(sr, 0) + sr = np.clip(sr * 255, 0, 255) + sr = sr.astype(np.uint8) + sr = cv2.cvtColor(sr, cv2.COLOR_RGB2BGR) + + if visualization: + check_dir(output_dir) + save_im_path = get_save_image_name(org_im, org_im_path, output_dir) + cv2.imwrite(save_im_path, sr) + print("save image at: ", save_im_path) + result['save_path'] = save_im_path + result['data'] = sr + else: + result['data'] = sr + + return result + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(org_im, org_im_path, output_dir): + """ + Get save image name from source image path. + """ + # name prefix of orginal image + org_im_name = os.path.split(org_im_path)[-1] + im_prefix = os.path.splitext(org_im_name)[0] + ext = '.png' + # save image path + save_im_path = os.path.join(output_dir, im_prefix + ext) + if os.path.exists(save_im_path): + save_im_path = os.path.join(output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) + + return save_im_path diff --git a/modules/image/Image_editing/super_resolution/falsr_b/README.md b/modules/image/Image_editing/super_resolution/falsr_b/README.md new file mode 100644 index 00000000..f54f159d --- /dev/null +++ b/modules/image/Image_editing/super_resolution/falsr_b/README.md @@ -0,0 +1,126 @@ +## 模型概述 + +falsr_b是基于Fast, Accurate and Lightweight Super-Resolution with Neural Architecture Search设计的轻量化超分辨模型。falsr_b较falsr_a更轻量化。该模型使用多目标方法处理超分问题,同时使用基于混合控制器的弹性搜索策略来提升模型性能。该模型提供的超分倍数为2倍。 + +## 命令行预测 + +``` +$ hub run falsr_b --input_path "/PATH/TO/IMAGE" + +``` + +## API + +```python +def reconstruct(self, + images=None, + paths=None, + use_gpu=False, + visualization=True, + output_dir="falsr_b_output") +``` + +预测API,用于图像超分辨率。 + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; +* paths (list\[str\]): 图片的路径; +* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; +* visualization (bool): 是否将识别结果保存为图片文件; +* output\_dir (str): 图片的保存路径。 + +**返回** + +* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: + * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); + * data (numpy.ndarray): 超分辨后图像。 + +```python +def save_inference_model(self, + dirname='falsr_b_save_model', + model_filename=None, + params_filename=None, + combined=False) +``` + +将模型保存到指定路径。 + +**参数** + +* dirname: 存在模型的目录名称 +* model\_filename: 模型文件名称,默认为\_\_model\_\_ +* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) +* combined: 是否将参数保存到统一的一个文件中 + +## 代码示例 + +```python +import cv2 +import paddlehub as hub + +sr_model = hub.Module(name='falsr_b') +im = cv2.imread('/PATH/TO/IMAGE').astype('float32') +#visualization=True可以用于查看超分图片效果,可设置为False提升运行速度。 +res = sr_model.reconstruct(images=[im], visualization=True) +print(res[0]['data']) +sr_model.save_inference_model() +``` + +## 服务部署 + +PaddleHub Serving可以部署一个图像超分的在线服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: + +```shell +$ hub serving start -m falsr_b +``` + +这样就完成了一个超分任务的服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import base64 + +import cv2 +import numpy as np + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('/PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/falsr_b" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) +sr = base64_to_cv2(r.json()["results"][0]['data']) +cv2.imwrite('falsr_b_X2.png', sr) +print("save image as falsr_b_X2.png") +``` + +### 查看代码 + +https://github.com/xiaomi-automl/FALSR + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.7.1 diff --git a/modules/image/Image_editing/super_resolution/falsr_b/data_feed.py b/modules/image/Image_editing/super_resolution/falsr_b/data_feed.py new file mode 100644 index 00000000..8aa6514b --- /dev/null +++ b/modules/image/Image_editing/super_resolution/falsr_b/data_feed.py @@ -0,0 +1,60 @@ +# -*- coding:utf-8 -*- +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader'] + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path).astype('float32') + each['org_im'] = im + each['org_im_path'] = im_path + each['org_im_shape'] = im.shape + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + im = im.astype(np.float32) + each = OrderedDict() + each['org_im'] = im + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_shape'] = im.shape + component.append(each) + + for element in component: + img = element['org_im'].copy() + img = cv2.cvtColor(img, cv2.COLOR_BGR2YUV) + shape = img.shape + img_scale = cv2.resize(img, (shape[1] * 2, shape[0] * 2), interpolation=cv2.INTER_CUBIC) + img_y = np.expand_dims(img[:, :, 0], axis=2) + img_scale_pbpr = img_scale[..., 1:] + img_y = img_y.transpose((2, 0, 1)) / 255 + img_scale_pbpr = img_scale_pbpr.transpose(2, 0, 1) / 255 + element['img_y'] = img_y + element['img_scale_pbpr'] = img_scale_pbpr + yield element + + +if __name__ == "__main__": + path = ['BSD100_001.png'] + reader(paths=path) diff --git a/modules/image/Image_editing/super_resolution/falsr_b/module.py b/modules/image/Image_editing/super_resolution/falsr_b/module.py new file mode 100644 index 00000000..b5db9e5e --- /dev/null +++ b/modules/image/Image_editing/super_resolution/falsr_b/module.py @@ -0,0 +1,195 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ast +import os +import argparse + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from falsr_b.data_feed import reader +from falsr_b.processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir + + +@moduleinfo( + name="falsr_b", + type="CV/image_editing", + author="paddlepaddle", + author_email="", + summary="falsr_b is a super resolution model.", + version="1.0.0") +class Falsr_B(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "falsr_b_model") + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + self.model_file_path = self.default_pretrained_model_path + cpu_config = AnalysisConfig(self.model_file_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.model_file_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def reconstruct(self, images=None, paths=None, use_gpu=False, visualization=False, output_dir="falsr_b_output"): + """ + API for super resolution. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR. + paths (list[str]): The paths of images. + use_gpu (bool): Whether to use gpu. + visualization (bool): Whether to save image or not. + output_dir (str): The path to store output images. + + Returns: + res (list[dict]): each element in the list is a dict, the keys and values are: + save_path (str, optional): the path to save images. (Exists only if visualization is True) + data (numpy.ndarray): data of post processed image. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + res = list() + + for i in range(total_num): + image_y = np.array([all_data[i]['img_y']]) + image_scale_pbpr = np.array([all_data[i]['img_scale_pbpr']]) + image_y = PaddleTensor(image_y.copy()) + image_scale_pbpr = PaddleTensor(image_scale_pbpr.copy()) + output = self.gpu_predictor.run([image_y, image_scale_pbpr]) if use_gpu else self.cpu_predictor.run( + [image_y, image_scale_pbpr]) + output = np.expand_dims(output[0].as_ndarray(), axis=1) + out = postprocess( + data_out=output, + org_im=all_data[i]['org_im'], + org_im_shape=all_data[i]['org_im_shape'], + org_im_path=all_data[i]['org_im_path'], + output_dir=output_dir, + visualization=visualization) + res.append(out) + return res + + def save_inference_model(self, + dirname='falsr_b_save_model', + model_filename=None, + params_filename=None, + combined=False): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.reconstruct(images=images_decode, **kwargs) + results = [{'data': cv2_to_base64(result['data'])} for result in results] + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.reconstruct( + paths=[args.input_path], use_gpu=args.use_gpu, output_dir=args.output_dir, visualization=args.visualization) + if args.save_dir is not None: + check_dir(args.save_dir) + self.save_inference_model(args.save_dir) + + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='falsr_b_output', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--save_dir', type=str, default='falsr_b_save_model', help="The directory to save model.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=True, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + + +if __name__ == "__main__": + module = Falsr_B() + module.reconstruct(paths=["BSD100_001.png", "BSD100_002.png", "Set5_003.png"]) + module.save_inference_model() diff --git a/modules/image/Image_editing/super_resolution/falsr_b/processor.py b/modules/image/Image_editing/super_resolution/falsr_b/processor.py new file mode 100644 index 00000000..805ada4d --- /dev/null +++ b/modules/image/Image_editing/super_resolution/falsr_b/processor.py @@ -0,0 +1,80 @@ +# -*- coding:utf-8 -*- +import os +import time +import base64 + +import cv2 +import numpy as np + +__all__ = ['cv2_to_base64', 'base64_to_cv2', 'postprocess'] + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def postprocess(data_out, org_im, org_im_shape, org_im_path, output_dir, visualization): + """ + Postprocess output of network. one image at a time. + + Args: + data_out (numpy.ndarray): output of network. + org_im (numpy.ndarray): original image. + org_im_shape (list): shape pf original image. + org_im_path (list): path of riginal image. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + + Returns: + result (dict): The data of processed image. + """ + result = dict() + for sr in data_out: + sr = np.squeeze(sr, 0) + sr = np.clip(sr * 255, 0, 255) + sr = sr.astype(np.uint8) + sr = cv2.cvtColor(sr, cv2.COLOR_RGB2BGR) + + if visualization: + check_dir(output_dir) + save_im_path = get_save_image_name(org_im, org_im_path, output_dir) + cv2.imwrite(save_im_path, sr) + print("save image at: ", save_im_path) + result['save_path'] = save_im_path + result['data'] = sr + else: + result['data'] = sr + + return result + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(org_im, org_im_path, output_dir): + """ + Get save image name from source image path. + """ + # name prefix of orginal image + org_im_name = os.path.split(org_im_path)[-1] + im_prefix = os.path.splitext(org_im_name)[0] + ext = '.png' + # save image path + save_im_path = os.path.join(output_dir, im_prefix + ext) + if os.path.exists(save_im_path): + save_im_path = os.path.join(output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) + + return save_im_path diff --git a/modules/image/Image_editing/super_resolution/falsr_c/README.md b/modules/image/Image_editing/super_resolution/falsr_c/README.md new file mode 100644 index 00000000..c61b2ed4 --- /dev/null +++ b/modules/image/Image_editing/super_resolution/falsr_c/README.md @@ -0,0 +1,127 @@ +## 模型概述 + +falsr_c是基于Fast, Accurate and Lightweight Super-Resolution with Neural Architecture Search设计的轻量化超分辨模型。该模型使用多目标方法处理超分问题,同时使用基于混合控制器的弹性搜索策略来提升模型性能。该模型提供的超分倍数为2倍。 + +## 命令行预测 + +``` +$ hub run falsr_c --input_path "/PATH/TO/IMAGE" + +``` + +## API + +```python +def reconstruct(self, + images=None, + paths=None, + use_gpu=False, + visualization=False, + output_dir="falsr_c_output") +``` + +预测API,用于图像超分辨率。 + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; +* paths (list\[str\]): 图片的路径; +* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; +* visualization (bool): 是否将识别结果保存为图片文件; +* output\_dir (str): 图片的保存路径。 + +**返回** + +* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: + * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); + * data (numpy.ndarray): 超分辨后图像。 + +```python +def save_inference_model(self, + dirname='falsr_c_save_model', + model_filename=None, + params_filename=None, + combined=False) +``` + +将模型保存到指定路径。 + +**参数** + +* dirname: 存在模型的目录名称 +* model\_filename: 模型文件名称,默认为\_\_model\_\_ +* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) +* combined: 是否将参数保存到统一的一个文件中 + +## 代码示例 + +```python +import cv2 +import paddlehub as hub + +sr_model = hub.Module(name='falsr_c') +im = cv2.imread('/PATH/TO/IMAGE').astype('float32') +#visualization=True可以用于查看超分图片效果,可设置为False提升运行速度。 +res = sr_model.reconstruct(images=[im], visualization=True) +print(res[0]['data']) +sr_model.save_inference_model() +``` + +## 服务部署 + +PaddleHub Serving可以部署一个图像超分的在线服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: + +```shell +$ hub serving start -m falsr_c +``` + +这样就完成了一个超分任务的服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import base64 + +import cv2 +import numpy as np + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('/PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/falsr_c" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) +sr = base64_to_cv2(r.json()["results"][0]['data']) +cv2.imwrite('falsr_c_X2.png', sr) +print("save image as falsr_c_X2.png") +``` + +### 查看代码 + +https://github.com/xiaomi-automl/FALSR + + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.7.1 diff --git a/modules/image/Image_editing/super_resolution/falsr_c/data_feed.py b/modules/image/Image_editing/super_resolution/falsr_c/data_feed.py new file mode 100644 index 00000000..8aa6514b --- /dev/null +++ b/modules/image/Image_editing/super_resolution/falsr_c/data_feed.py @@ -0,0 +1,60 @@ +# -*- coding:utf-8 -*- +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader'] + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path).astype('float32') + each['org_im'] = im + each['org_im_path'] = im_path + each['org_im_shape'] = im.shape + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + im = im.astype(np.float32) + each = OrderedDict() + each['org_im'] = im + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_shape'] = im.shape + component.append(each) + + for element in component: + img = element['org_im'].copy() + img = cv2.cvtColor(img, cv2.COLOR_BGR2YUV) + shape = img.shape + img_scale = cv2.resize(img, (shape[1] * 2, shape[0] * 2), interpolation=cv2.INTER_CUBIC) + img_y = np.expand_dims(img[:, :, 0], axis=2) + img_scale_pbpr = img_scale[..., 1:] + img_y = img_y.transpose((2, 0, 1)) / 255 + img_scale_pbpr = img_scale_pbpr.transpose(2, 0, 1) / 255 + element['img_y'] = img_y + element['img_scale_pbpr'] = img_scale_pbpr + yield element + + +if __name__ == "__main__": + path = ['BSD100_001.png'] + reader(paths=path) diff --git a/modules/image/Image_editing/super_resolution/falsr_c/module.py b/modules/image/Image_editing/super_resolution/falsr_c/module.py new file mode 100644 index 00000000..8a8f2599 --- /dev/null +++ b/modules/image/Image_editing/super_resolution/falsr_c/module.py @@ -0,0 +1,198 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ast +import os +import argparse + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from falsr_c.data_feed import reader +from falsr_c.processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir + + +@moduleinfo( + name="falsr_c", + type="CV/image_editing", + author="paddlepaddle", + author_email="", + summary="falsr_c is a super resolution model.", + version="1.0.0") +class Falsr_C(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "falsr_c_model") + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + self.model_file_path = self.default_pretrained_model_path + cpu_config = AnalysisConfig(self.model_file_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.model_file_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def reconstruct(self, images=None, paths=None, use_gpu=False, visualization=False, output_dir="falsr_c_output"): + """ + API for super resolution. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR. + paths (list[str]): The paths of images. + use_gpu (bool): Whether to use gpu. + visualization (bool): Whether to save image or not. + output_dir (str): The path to store output images. + + Returns: + res (list[dict]): each element in the list is a dict, the keys and values are: + save_path (str, optional): the path to save images. (Exists only if visualization is True) + data (numpy.ndarray): data of post processed image. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + res = list() + + for i in range(total_num): + image_y = np.array([all_data[i]['img_y']]) + image_scale_pbpr = np.array([all_data[i]['img_scale_pbpr']]) + image_y = PaddleTensor(image_y.copy()) + image_scale_pbpr = PaddleTensor(image_scale_pbpr.copy()) + output = self.gpu_predictor.run([image_y, image_scale_pbpr]) if use_gpu else self.cpu_predictor.run( + [image_y, image_scale_pbpr]) + output = np.expand_dims(output[0].as_ndarray(), axis=1) + out = postprocess( + data_out=output, + org_im=all_data[i]['org_im'], + org_im_shape=all_data[i]['org_im_shape'], + org_im_path=all_data[i]['org_im_path'], + output_dir=output_dir, + visualization=visualization) + res.append(out) + return res + + def save_inference_model(self, + dirname='falsr_c_save_model', + model_filename=None, + params_filename=None, + combined=False): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.reconstruct(images=images_decode, **kwargs) + results = [{'data': cv2_to_base64(result['data'])} for result in results] + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.reconstruct( + paths=[args.input_path], use_gpu=args.use_gpu, output_dir=args.output_dir, visualization=args.visualization) + if args.save_dir is not None: + check_dir(args.save_dir) + self.save_inference_model(args.save_dir) + + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='falsr_c_output', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--save_dir', type=str, default='falsr_c_save_model', help="The directory to save model.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=True, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + + +if __name__ == "__main__": + module = Falsr_C() + #module.reconstruct(paths=["BSD100_001.png","BSD100_002.png", "Set5_003.png"]) + import cv2 + img = cv2.imread("BSD100_001.png").astype('float32') + res = module.reconstruct(images=[img]) + module.save_inference_model() diff --git a/modules/image/Image_editing/super_resolution/falsr_c/processor.py b/modules/image/Image_editing/super_resolution/falsr_c/processor.py new file mode 100644 index 00000000..fe451116 --- /dev/null +++ b/modules/image/Image_editing/super_resolution/falsr_c/processor.py @@ -0,0 +1,81 @@ +# -*- coding:utf-8 -*- +import os +import time +import base64 + +import cv2 +import numpy as np + +__all__ = ['cv2_to_base64', 'base64_to_cv2', 'postprocess'] + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def postprocess(data_out, org_im, org_im_shape, org_im_path, output_dir, visualization): + """ + Postprocess output of network. one image at a time. + + Args: + data_out (numpy.ndarray): output of network. + org_im (numpy.ndarray): original image. + org_im_shape (list): shape pf original image. + org_im_path (list): path of riginal image. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + + Returns: + result (dict): The data of processed image. + """ + result = dict() + for sr in data_out: + sr = np.squeeze(sr, 0) + sr = np.clip(sr * 255, 0, 255) + sr = sr.astype(np.uint8) + sr = cv2.cvtColor(sr, cv2.COLOR_RGB2BGR) + + if visualization: + check_dir(output_dir) + save_im_path = get_save_image_name(org_im, org_im_path, output_dir) + cv2.imwrite(save_im_path, sr) + print("save image at: ", save_im_path) + result['save_path'] = save_im_path + result['data'] = sr + else: + result['data'] = sr + print("result['data'] shape", result['data'].shape) + + return result + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(org_im, org_im_path, output_dir): + """ + Get save image name from source image path. + """ + # name prefix of orginal image + org_im_name = os.path.split(org_im_path)[-1] + im_prefix = os.path.splitext(org_im_name)[0] + ext = '.png' + # save image path + save_im_path = os.path.join(output_dir, im_prefix + ext) + if os.path.exists(save_im_path): + save_im_path = os.path.join(output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) + + return save_im_path diff --git a/modules/image/Image_editing/super_resolution/realsr/README.md b/modules/image/Image_editing/super_resolution/realsr/README.md new file mode 100644 index 00000000..0ca1f879 --- /dev/null +++ b/modules/image/Image_editing/super_resolution/realsr/README.md @@ -0,0 +1,121 @@ + +## 模型概述 +realsr是用于图像和视频超分模型,该模型基于Toward Real-World Single Image Super-Resolution: A New Benchmark and A New Mode,它能够将输入的图片和视频超分四倍。 + +## API 说明 + +```python +def predict(self, input): +``` + +超分API,得到超分后的图片或者视频。 + + +**参数** + +* input (str): 图片或者视频的路径; + +**返回** + +若输入是图片,返回值为: +* pred_img(np.ndarray): BGR图片数据; +* out_path(str): 保存图片路径。 + +若输入是视频,返回值为: +* frame_pattern_combined(str): 视频超分后单帧数据保存路径; +* vid_out_path(str): 视频保存路径。 + +```python +def run_image(self, img): +``` +图像超分API, 得到超分后的图片。 + +**参数** + +* img (str|np.ndarray): 图片路径或则BGR格式图片。 + +**返回** + +* pred_img(np.ndarray): BGR图片数据; + +```python +def run_video(self, video): +``` +视频超分API, 得到超分后的视频。 + +**参数** + +* video(str): 待处理视频路径。 + +**返回** + +* frame_pattern_combined(str): 视频超分后单帧数据保存路径; +* vid_out_path(str): 视频保存路径。 + +## 预测代码示例 + +```python +import paddlehub as hub + +model = hub.Module(name='realsr') +model.predict('/PATH/TO/IMAGE/OR/VIDEO') +``` + +## 服务部署 + +PaddleHub Serving可以部署一个在线照片超分服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: +```shell +$ hub serving start -m realsr +``` + +这样就完成了一个图像超分的在线服务API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import base64 + +import cv2 +import numpy as np + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('/PATH/TO/IMAGE') +data = {'images':cv2_to_base64(org_im)} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/realsr" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) +img = base64_to_cv2(r.json()["results"]) +cv2.imwrite('/PATH/TO/SAVE/IMAGE', img) + +``` + +## 模型相关信息 + +### 模型代码 + +https://github.com/csjcai/RealSR + +### 依赖 + +paddlepaddle >= 2.0.0rc + +paddlehub >= 1.8.3 diff --git a/modules/image/Image_editing/super_resolution/realsr/module.py b/modules/image/Image_editing/super_resolution/realsr/module.py new file mode 100644 index 00000000..3b6281a4 --- /dev/null +++ b/modules/image/Image_editing/super_resolution/realsr/module.py @@ -0,0 +1,145 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import cv2 +import glob + +from tqdm import tqdm +import numpy as np +from PIL import Image +import paddle +import paddle.nn as nn +from paddlehub.module.module import moduleinfo, serving, Module + +from realsr.rrdb import RRDBNet +import realsr.utils as U + + +@moduleinfo( + name="realsr", + type="CV/image_editing", + author="paddlepaddle", + author_email="", + summary="realsr is a super resolution model", + version="1.0.0") +class RealSRPredictor(Module): + def _initialize(self, output='output', weight_path=None, load_checkpoint: str = None): + #super(RealSRPredictor, self).__init__() + self.input = input + self.output = os.path.join(output, 'RealSR') + self.model = RRDBNet(3, 3, 64, 23) + + if load_checkpoint is not None: + state_dict = paddle.load(load_checkpoint) + self.model.load_dict(state_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'DF2K_JPEG.pdparams') + state_dict = paddle.load(checkpoint) + self.model.load_dict(state_dict) + print("load pretrained checkpoint success") + + self.model.eval() + + def norm(self, img): + img = np.array(img).transpose([2, 0, 1]).astype('float32') / 255.0 + return img.astype('float32') + + def denorm(self, img): + img = img.transpose((1, 2, 0)) + return (img * 255).clip(0, 255).astype('uint8') + + def run_image(self, img): + if isinstance(img, str): + ori_img = Image.open(img).convert('RGB') + elif isinstance(img, np.ndarray): + # ori_img = Image.fromarray(img).convert('RGB') + ori_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) + elif isinstance(img, Image.Image): + ori_img = img + + img = self.norm(ori_img) + x = paddle.to_tensor(img[np.newaxis, ...]) + out = self.model(x) + + pred_img = self.denorm(out.numpy()[0]) + # pred_img = Image.fromarray(pred_img) + pred_img = cv2.cvtColor(pred_img, cv2.COLOR_RGB2BGR) + + return pred_img + + def run_video(self, video): + base_name = os.path.basename(video).split('.')[0] + output_path = os.path.join(self.output, base_name) + pred_frame_path = os.path.join(output_path, 'frames_pred') + + if not os.path.exists(output_path): + os.makedirs(output_path) + + if not os.path.exists(pred_frame_path): + os.makedirs(pred_frame_path) + + cap = cv2.VideoCapture(video) + fps = cap.get(cv2.CAP_PROP_FPS) + + out_path = U.video2frames(video, output_path) + + frames = sorted(glob.glob(os.path.join(out_path, '*.png'))) + + for frame in tqdm(frames): + pred_img = self.run_image(frame) + pred_img = cv2.cvtColor(pred_img, cv2.COLOR_BGR2RGB) + pred_img = Image.fromarray(pred_img) + frame_name = os.path.basename(frame) + pred_img.save(os.path.join(pred_frame_path, frame_name)) + + frame_pattern_combined = os.path.join(pred_frame_path, '%08d.png') + + vid_out_path = os.path.join(output_path, '{}_realsr_out.mp4'.format(base_name)) + U.frames2video(frame_pattern_combined, vid_out_path, str(int(fps))) + print("save result at {}".format(vid_out_path)) + + return frame_pattern_combined, vid_out_path + + def predict(self, input): + if not os.path.exists(self.output): + os.makedirs(self.output) + + if not U.is_image(input): + return self.run_video(input) + else: + pred_img = self.run_image(input) + + out_path = None + if self.output: + final = cv2.cvtColor(pred_img, cv2.COLOR_BGR2RGB) + final = Image.fromarray(final) + base_name = os.path.splitext(os.path.basename(input))[0] + out_path = os.path.join(self.output, base_name + '.png') + final.save(out_path) + print('save result at {}'.format(out_path)) + + return pred_img, out_path + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = U.base64_to_cv2(images) + results = self.run_image(img=images_decode) + results = U.cv2_to_base64(results) + return results diff --git a/modules/image/Image_editing/super_resolution/realsr/rrdb.py b/modules/image/Image_editing/super_resolution/realsr/rrdb.py new file mode 100644 index 00000000..79e5de8a --- /dev/null +++ b/modules/image/Image_editing/super_resolution/realsr/rrdb.py @@ -0,0 +1,137 @@ +import functools +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +class Registry(object): + """ + The registry that provides name -> object mapping, to support third-party users' custom modules. + To create a registry (inside segmentron): + .. code-block:: python + BACKBONE_REGISTRY = Registry('BACKBONE') + To register an object: + .. code-block:: python + @BACKBONE_REGISTRY.register() + class MyBackbone(): + ... + Or: + .. code-block:: python + BACKBONE_REGISTRY.register(MyBackbone) + """ + + def __init__(self, name): + """ + Args: + name (str): the name of this registry + """ + self._name = name + + self._obj_map = {} + + def _do_register(self, name, obj): + assert (name not in self._obj_map), "An object named '{}' was already registered in '{}' registry!".format( + name, self._name) + self._obj_map[name] = obj + + def register(self, obj=None, name=None): + """ + Register the given object under the the name `obj.__name__`. + Can be used as either a decorator or not. See docstring of this class for usage. + """ + if obj is None: + # used as a decorator + def deco(func_or_class, name=name): + if name is None: + name = func_or_class.__name__ + self._do_register(name, func_or_class) + return func_or_class + + return deco + + # used as a function call + if name is None: + name = obj.__name__ + self._do_register(name, obj) + + def get(self, name): + ret = self._obj_map.get(name) + if ret is None: + raise KeyError("No object named '{}' found in '{}' registry!".format(name, self._name)) + + return ret + + +class ResidualDenseBlock_5C(nn.Layer): + def __init__(self, nf=64, gc=32, bias=True): + super(ResidualDenseBlock_5C, self).__init__() + # gc: growth channel, i.e. intermediate channels + self.conv1 = nn.Conv2D(nf, gc, 3, 1, 1, bias_attr=bias) + self.conv2 = nn.Conv2D(nf + gc, gc, 3, 1, 1, bias_attr=bias) + self.conv3 = nn.Conv2D(nf + 2 * gc, gc, 3, 1, 1, bias_attr=bias) + self.conv4 = nn.Conv2D(nf + 3 * gc, gc, 3, 1, 1, bias_attr=bias) + self.conv5 = nn.Conv2D(nf + 4 * gc, nf, 3, 1, 1, bias_attr=bias) + self.lrelu = nn.LeakyReLU(negative_slope=0.2) + + def forward(self, x): + x1 = self.lrelu(self.conv1(x)) + x2 = self.lrelu(self.conv2(paddle.concat((x, x1), 1))) + x3 = self.lrelu(self.conv3(paddle.concat((x, x1, x2), 1))) + x4 = self.lrelu(self.conv4(paddle.concat((x, x1, x2, x3), 1))) + x5 = self.conv5(paddle.concat((x, x1, x2, x3, x4), 1)) + return x5 * 0.2 + x + + +class RRDB(nn.Layer): + '''Residual in Residual Dense Block''' + + def __init__(self, nf, gc=32): + super(RRDB, self).__init__() + self.RDB1 = ResidualDenseBlock_5C(nf, gc) + self.RDB2 = ResidualDenseBlock_5C(nf, gc) + self.RDB3 = ResidualDenseBlock_5C(nf, gc) + + def forward(self, x): + out = self.RDB1(x) + out = self.RDB2(out) + out = self.RDB3(out) + return out * 0.2 + x + + +def make_layer(block, n_layers): + layers = [] + for _ in range(n_layers): + layers.append(block()) + return nn.Sequential(*layers) + + +GENERATORS = Registry("GENERATOR") + + +@GENERATORS.register() +class RRDBNet(nn.Layer): + def __init__(self, in_nc, out_nc, nf, nb, gc=32): + super(RRDBNet, self).__init__() + RRDB_block_f = functools.partial(RRDB, nf=nf, gc=gc) + + self.conv_first = nn.Conv2D(in_nc, nf, 3, 1, 1, bias_attr=True) + self.RRDB_trunk = make_layer(RRDB_block_f, nb) + self.trunk_conv = nn.Conv2D(nf, nf, 3, 1, 1, bias_attr=True) + #### upsampling + self.upconv1 = nn.Conv2D(nf, nf, 3, 1, 1, bias_attr=True) + self.upconv2 = nn.Conv2D(nf, nf, 3, 1, 1, bias_attr=True) + self.HRconv = nn.Conv2D(nf, nf, 3, 1, 1, bias_attr=True) + self.conv_last = nn.Conv2D(nf, out_nc, 3, 1, 1, bias_attr=True) + + self.lrelu = nn.LeakyReLU(negative_slope=0.2) + + def forward(self, x): + fea = self.conv_first(x) + trunk = self.trunk_conv(self.RRDB_trunk(fea)) + fea = fea + trunk + + fea = self.lrelu(self.upconv1(F.interpolate(fea, scale_factor=2, mode='nearest'))) + fea = self.lrelu(self.upconv2(F.interpolate(fea, scale_factor=2, mode='nearest'))) + out = self.conv_last(self.lrelu(self.HRconv(fea))) + + return out diff --git a/modules/image/Image_editing/super_resolution/realsr/utils.py b/modules/image/Image_editing/super_resolution/realsr/utils.py new file mode 100644 index 00000000..e83aac69 --- /dev/null +++ b/modules/image/Image_editing/super_resolution/realsr/utils.py @@ -0,0 +1,68 @@ +import os +import sys +import base64 + +import cv2 +from PIL import Image +import numpy as np + + +def video2frames(video_path, outpath, **kargs): + def _dict2str(kargs): + cmd_str = '' + for k, v in kargs.items(): + cmd_str += (' ' + str(k) + ' ' + str(v)) + return cmd_str + + ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error '] + vid_name = video_path.split('/')[-1].split('.')[0] + out_full_path = os.path.join(outpath, vid_name) + + if not os.path.exists(out_full_path): + os.makedirs(out_full_path) + + # video file name + outformat = out_full_path + '/%08d.png' + + cmd = ffmpeg + cmd = ffmpeg + [' -i ', video_path, ' -start_number ', ' 0 ', outformat] + + cmd = ''.join(cmd) + _dict2str(kargs) + + if os.system(cmd) != 0: + raise RuntimeError('ffmpeg process video: {} error'.format(vid_name)) + + sys.stdout.flush() + return out_full_path + + +def frames2video(frame_path, video_path, r): + ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error '] + cmd = ffmpeg + [' -r ', r, ' -f ', ' image2 ', ' -i ', frame_path, ' -pix_fmt ', ' yuv420p ', video_path] + cmd = ''.join(cmd) + + if os.system(cmd) != 0: + raise RuntimeError('ffmpeg process video: {} error'.format(video_path)) + + sys.stdout.flush() + + +def is_image(input): + try: + img = Image.open(input) + _ = img.size + return True + except: + return False + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data diff --git a/modules/image/Image_gan/gan/README.md b/modules/image/Image_gan/gan/README.md new file mode 100644 index 00000000..e69de29b diff --git a/modules/image/Image_gan/gan/stgan_bald/README.md b/modules/image/Image_gan/gan/stgan_bald/README.md new file mode 100644 index 00000000..0345a677 --- /dev/null +++ b/modules/image/Image_gan/gan/stgan_bald/README.md @@ -0,0 +1,80 @@ +# stgan_bald +基于PaddleHub的秃头生成器 +# 模型概述 +秃头生成器(stgan_bald),该模型可自动根据图像生成1年、3年、5年的秃头效果。 +# 模型效果: + +详情请查看此链接:https://aistudio.baidu.com/aistudio/projectdetail/1145381 + +本模型为大家提供了小程序,欢迎大家体验 + +![image](https://github.com/1084667371/stgan_bald/blob/main/images/code.jpg) + +# 选择模型版本进行安装 + $ hub install stgan_bald==1.0.0 +# Module API说明 + def bald(self, + images=None, + paths=None, + use_gpu=False, + visualization=False): +秃头生成器API预测接口,预测输入一张人像,输出三张秃头效果(1年、3年、5年) +## 参数 + images (list(numpy.ndarray)): 图像数据,每个图像的形状为[H,W,C],颜色空间为BGR。 + paths (list[str]): 图像的路径。 + use_gpu (bool): 是否使用gpu。 + visualization (bool): 是否保存图像。 +## 返回 + data_0 ([numpy.ndarray]):秃头一年的预测结果图。 + data_1 ([numpy.ndarray]):秃头三年的预测结果图。 + data_2 ([numpy.ndarray]):秃头五年的预测结果图。 +# API预测代码示例 + import paddlehub as hub + import cv2 + + stgan_bald = hub.Module(name='stgan_bald') + im = cv2.imread('/PATH/TO/IMAGE') + res = stgan_bald.bald(images=[im],visualization=True) +# 服务部署 +## 第一步:启动PaddleHub Serving +$ hub serving start -m stgan_bald +## 第二步:发送预测请求 + import requests + import json + import base64 + + import cv2 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/stgan_bald" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 保存图片 1年 3年 5年 + one_year =cv2.cvtColor(base64_to_cv2(r.json()["results"]['data_0']), cv2.COLOR_RGB2BGR) + three_year =cv2.cvtColor(base64_to_cv2(r.json()["results"]['data_1']), cv2.COLOR_RGB2BGR) + five_year =cv2.cvtColor(base64_to_cv2(r.json()["results"]['data_2']), cv2.COLOR_RGB2BGR) + cv2.imwrite("stgan_bald_server.png", one_year) + +# 贡献者 +刘炫、彭兆帅、郑博培 +# 依赖 +paddlepaddle >= 1.8.2 + +paddlehub >= 1.8.0 + +# 查看代码 + +[基于PaddleHub的秃头生成器](https://github.com/PaddlePaddle/PaddleHub/tree/release/v1.8/hub_module/modules/image/gan/stgan_bald) diff --git a/modules/image/Image_gan/gan/stgan_bald/data_feed.py b/modules/image/Image_gan/gan/stgan_bald/data_feed.py new file mode 100644 index 00000000..5626f02e --- /dev/null +++ b/modules/image/Image_gan/gan/stgan_bald/data_feed.py @@ -0,0 +1,59 @@ +# -*- coding:utf-8 -*- +import os +import time +from collections import OrderedDict + +from PIL import Image, ImageOps +import numpy as np +from PIL import Image +import cv2 + +__all__ = ['reader'] + + +def reader(images=None, paths=None, org_labels=None, target_labels=None): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for i, im_path in enumerate(paths): + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path) + each['org_im'] = im + each['org_im_path'] = im_path + each['org_label'] = np.array(org_labels[i]).astype('float32') + if not target_labels: + each['target_label'] = np.array(org_labels[i]).astype('float32') + else: + each['target_label'] = np.array(target_labels[i]).astype('float32') + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for i, im in enumerate(images): + each = OrderedDict() + each['org_im'] = im + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_label'] = np.array(org_labels[i]).astype('float32') + if not target_labels: + each['target_label'] = np.array(org_labels[i]).astype('float32') + else: + each['target_label'] = np.array(target_labels[i]).astype('float32') + component.append(each) + + for element in component: + img = cv2.cvtColor(element['org_im'], cv2.COLOR_BGR2RGB) + img = cv2.resize(img, (128, 128), interpolation=cv2.INTER_LINEAR) + img = (img.astype('float32') / 255.0 - 0.5) / 0.5 + img = img.transpose([2, 0, 1]) + element['img'] = img[np.newaxis, :, :, :] + + yield element diff --git a/modules/image/Image_gan/gan/stgan_bald/module.py b/modules/image/Image_gan/gan/stgan_bald/module.py new file mode 100644 index 00000000..5d832857 --- /dev/null +++ b/modules/image/Image_gan/gan/stgan_bald/module.py @@ -0,0 +1,172 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ast +import os +import argparse +import copy +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving +from stgan_bald.data_feed import reader +from stgan_bald.processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir + + +def check_attribute_conflict(label_batch): + ''' Based on https://github.com/LynnHo/AttGAN-Tensorflow''' + attrs = "Bald,Bangs,Black_Hair,Blond_Hair,Brown_Hair,Bushy_Eyebrows,Eyeglasses,Male,Mouth_Slightly_Open,Mustache,No_Beard,Pale_Skin,Young".split( + ',') + + def _set(label, value, attr): + if attr in attrs: + label[attrs.index(attr)] = value + + attr_id = attrs.index('Bald') + for label in label_batch: + if attrs[attr_id] != 0: + _set(label, 0, 'Bangs') + + return label_batch + + +@moduleinfo( + name="stgan_bald", + version="1.0.0", + summary="Baldness generator", + author="Arrow, 七年期限,Mr.郑先生_", + author_email="1084667371@qq.com,2733821739@qq.com", + type="image/gan") +class StganBald(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "module") + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + self.model_file_path = os.path.join(self.default_pretrained_model_path, '__model__') + self.params_file_path = os.path.join(self.default_pretrained_model_path, '__params__') + cpu_config = AnalysisConfig(self.model_file_path, self.params_file_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + self.place = fluid.CUDAPlace(0) + except: + use_gpu = False + self.place = fluid.CPUPlace() + if use_gpu: + gpu_config = AnalysisConfig(self.model_file_path, self.params_file_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def bald(self, + images=None, + paths=None, + data=None, + use_gpu=False, + org_labels=[[0., 0., 1., 0., 0., 1., 1., 1., 0., 0., 0., 0., 1.]], + target_labels=None, + visualization=True, + output_dir="bald_output"): + """ + API for super resolution. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR. + paths (list[str]): The paths of images. + data (dict): key is 'image', the corresponding value is the path to image. + use_gpu (bool): Whether to use gpu. + visualization (bool): Whether to save image or not. + output_dir (str): The path to store output images. + + Returns: + res (list[dict]): each element in the list is a dict, the keys and values are: + save_path (str, optional): the path to save images. (Exists only if visualization is True) + data (numpy.ndarray): data of post processed image. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + if data and 'image' in data: + if paths is None: + paths = list() + paths += data['image'] + + all_data = list() + for yield_data in reader(images, paths, org_labels, target_labels): + all_data.append(yield_data) + + total_num = len(all_data) + res = list() + outputs = [] + for i in range(total_num): + image_np = all_data[i]['img'] + org_label_np = [all_data[i]['org_label']] + target_label_np = [all_data[i]['target_label']] + for j in range(5): + if j % 2 == 0: + label_trg_tmp = copy.deepcopy(target_label_np) + new_i = 0 + label_trg_tmp[0][new_i] = 1.0 - label_trg_tmp[0][new_i] + label_trg_tmp = check_attribute_conflict(label_trg_tmp) + change_num = j * 0.02 + 0.3 + label_org_tmp = list(map(lambda x: ((x * 2) - 1) * change_num, org_label_np)) + label_trg_tmp = list(map(lambda x: ((x * 2) - 1) * change_num, label_trg_tmp)) + + image = PaddleTensor(image_np.copy()) + org_label = PaddleTensor(np.array(label_org_tmp).astype('float32')) + target_label = PaddleTensor(np.array(label_trg_tmp).astype('float32')) + + output = self.gpu_predictor.run([ + image, target_label, org_label + ]) if use_gpu else self.cpu_predictor.run([image, org_label, target_label]) + outputs.append(output) + + out = postprocess( + data_out=outputs, + org_im=all_data[i]['org_im'], + org_im_path=all_data[i]['org_im_path'], + output_dir=output_dir, + visualization=visualization) + res.append(out) + return res + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.bald(images=images_decode, **kwargs) + output = {} + for key, value in results[0].items(): + output[key] = cv2_to_base64(value) + + return output diff --git a/modules/image/Image_gan/gan/stgan_bald/module/__model__ b/modules/image/Image_gan/gan/stgan_bald/module/__model__ new file mode 100644 index 0000000000000000000000000000000000000000..605addef56cab766ff83f7c084eb7f815a777d0b GIT binary patch literal 358737 zcmeFaYm6h=btafumGz3OtYlJMkIwF{qNuJWS;|T#$*jsOZD)5YTK!s7KhzJY8a0hT zkc?!8D)>-Duqv}!f?8V3Fdzun9|L0-v*R&f%|HL}j{gj>wk*u7uoy@)_So=qXAK)( z1D3rzyB=e)zt-Aw?!EE65o9Kd%&yElZ8?h!Mg(tM-g_S3Ip;gdxBvS5>8Z@mz5k{c z&nh#UhSAI(JALNT^y24~Y+ZL7TUw`Ux3%?Ux82h!+1b;l7EdWBPM^qpGJR=kaY=c) zWpoT%ce}Rs+Pfdrw0^ItyN1@nKlQ8vr=L`2;PjW^mN%3Oo2F%Hjc&(r^^U8pR@iM< zvQNT!=atXEdERh-KAoA%%w%TZOI9Nzxxp@}W}k*jE-6n$FZtwj2LEpguBs^W@I4LP z)hcV^TM{q%{%QOgcF7l%Cu2`zb~d}>$)AGnyBHthOBwt>{JQ8Ms{;plEQ9~yRnI9W z*}T?L9%nj}ga4aa{I8W_WMtLIJUeEq-8Gwop7IlMr^v)qz3ES$n3Mi6^O0X*Ma)R} z$CZaz+4B&CX{jZAUo|`A7YO~}e6_BmOzK&GjjSl=yfrd#@`6>hp*$1$Ay%}e0k^nc z^=5w&mdOQaS^ zc$C$F6A*0SYmqGszm1sSwxM^z<2nuFT2h`0J;1vktZ2637_KL~LZWU4&MrlMI=FKs z_~~M&1}m}3f4G#D`2FMlWd2u?apPEg(UWrVC&91&TwJc+xFwdew?&U-P+DJA&PN`& z#=b-n(^1LKmqSGIAA@^EPr50-FESBRi!UixVqX}{u()E#rj9N$nDrW;^#|b}bDI5_ ziO=>Ry8ugUPMJj&1Uf5oJaZh*mS*gx_d`Wz?77&C@t*}|Eb^d9_wz(+^nTZ4`Z6lV zBe%te3gnZwVUi_vu>K3mLTJXBfO(M3z!aQS6gCCNnNpn&tH*)&{v_D&@ikfr)@UU~ z)CMBm({yQa#nW`s51uAfyc@2je6@GOU=F!bPTNj_j1R8xwo&4&hh`?a89eP1S|hmo zq9=LlW8g{L)>#R!Hq;`4e5rW188*VJ$_X?y)>9@+{7$hkdUjK2jJWkfUq5h4zaQ8f z(P!tvJR(w}k5wJKBm=`l+Q)0&ghcmUR4u@^5W+jI7I4#vsY)}+gxF0GA>L|tRtYNf6V~f;V9|OJ5&?9)OHE}?pM@X12@2gVg%X3=SBK8K#De>ENiU?yId@Mr@6qxI%011?dWRpsGhmS<0tT>oc zxc>kcVrAIxD9vysSt|)8m!gY2-y&uY@Ck}&A~bBI@APqn=&vfDmprzTqF8#}ur$|h zX#-ruT*87{O;$Y8SNn!jbck`BC8~L%S~YDMdV5*=&NWYdM88uyZ*{lldG=ZKeo;pi zmZz|w5o3s0a&e5$o4U|RXlO{|yWyP}{oc50se}!Zh-x{8dYdb4u@2LW_*7%dcdVmh z@XZu?i1;YdhP>fT;lK?kU3Sy^g;Fj9sSMwiAXB~TQe>*9Hl-=AdgDxaTzMnmVExO%t3E%L`3#>4t0a1ES=S)TO7hlJ{m{GF*@7 zSr2XjHL*+6vLdf998X*q2Iyc^7ozTod(+k7-gNN8yrf(XDM)|(0IF{dG#CQR=!-FE zCJcV<@)psb6ZEVQG!Z?BYCkZcs@}p#{DD9-dsd)t%!(D4Xw0e+qZwx*6nz-D#DjUL z#DKS8k3?1jn44hWajPK?;zYhy{-L8r26rXWjEumDi381nStWj*WT2TX@r11(yF6{{ z2RkG|czK&YMR<7zVH{SC=z+*q7ban7B{Ro|1)SosqxkO>JW%v^0*4S>QfA$iVn1zQ z6T^?>NhThdN_bjN;-oySw@|N&2+pm6mYKBML-=mmZxO7~7h}6!;I^UNo;XMxA`ebQ z0R_(!3+Z`;U!uDSeun8bsk+L!TMv!P;`+C{Wz|v#1lp|Rmh%_#<=Fq zWXdl9O9XzmxHbsy*@1#In8@hzK|gS}!Z^5xGP>$T5*9K`Rb@ZMv)VLSV{E_ z-;vP`#edj{UxemYNbL=_OM);^)PP`)FO!CGoDvNPVLIO$0_g^LinqX$w?v?N;_zE^ zI@4BODeiTzL-VIDXPXHGe> zX+ZWLBm*iipVyQVW~bM8v(G^2?MdZ)G+~EHVGzWS(FT}o+z^aNC$WB#_hztdlH^9j zzCyVXmjz)vZdbA((DZ?+=HumwLxhFn!MoDB5t%8{TJ#bXh6syLxY2X86Ndo5tmkP< zf>R73E;w;VNE!$A{t%55z%lXPGy-`H{7r*7PDlguq|tyhu&AvFa?vy1xaE+%5@B&j z(*344t5NALL5SY|NSl1`_xFC*G-p99=9pjyV>dZ(41XhOEZ(ywk0r?ZiT*tB9o5~s z?twZK?nF8=#U30UM+9fOLxVHXn072VSS*?$_FpuV$0jdqtANb$Y?bIX>Bt@`!(Zi0e`y|XE zP3Q*e-v0_&KHyix!Gf^CIZ*Hg-w;nC6mF!XD54rAST+fk2+tb7IT&*szW;{W?;*W7 zSS|*tA`Z!kryUB(1q)8vKC7vwun<}|huCD%)yZZyygEUp`k--iNx`YByWKH?IF?qp zc!0QGMiW>(L7Em3>JygXWk$o(u;p zotPHxl1z^^S+t;I8Mj{}E=?3BNj3n2E#pt7P#%es84m{s-UC}gLzZBIxRovYw0i_2 ztqy{pQdFz%eX_%wf1V)rJFDz3}I3vQq;3x2++Yz94_Y}%kT>kmof@g)r@|3A+vXO!tXSzdXz zpd5c2oXk-A&m#LEKd;Podz#$^c?U*R<^&u#0|%PMeMoW49J@A^KcgIPbS-c}kL7== zC}-hAkd79VnKkZZPN=Wua<7_}k;|7qa17fiZ|U8g{*KvcmA}+|(6PFD(<#5_w)9Tf zH5_-jx07GWS!Tz`-CW9n$UOphXIxOI78#VwZa~N4OGR!=X$!Ww;c`Ab&ZbRHa7B_CJt$N^?C10pNF5e z%gXw$LwNT3&Gi?&8L6@x2FeIjq`?)EI)nFDzvo)tHM)8_%x16QiwQKht;H#2)fBXa8gt;{ybnScL0ao~w`6I*udLv98 z**C^NGP4^RQl+-aXD#t(T*dMAY@=s%ni^JwYTcewZ0lx6>)G8FJmOLg55otr88uu* zFq_+x9&=^2#(wGe$kg*VUfp$VXXE1n2!X;Ea^^Qn>cjC5DDmgLwR(%MKyP5(p89Ms zB)-~~TZUWAvq8c}daK(pN+tEd{*174Yi-TjBOTL)J@R=R-e%X%u?Ip4=h}J$#8l(n zvayY?U5$y(V(AO{&oF7qe&t5DhthVsoS#!p34x5L-#{^rK*8R{_%w;}(1s;3j^aOA zSdtjekr>~TIYDAPMU4MW#2P=#t#Kat*}DXvA_-0soDA`}FGGfS;Czql5RwE>k>LMb zMghJZ)zls3Ddi+!9%0k;N$Ckp#Fj5cIMnfR@}^HJ??^ z@Bsn5<2^;pg*vO8Y}ws@&w%w@~ z0=l{4_uY8k?i(fb-Bg!54g`i!wT=;n1O*HM2XUQiuc=RjzeaprNqy_lzo}BGFu(mZ z*mo)4^5H1oQU{1q6XOq=D3x35?Cbc>RxjaW)j^VI2wQCUuS#k^^-D(<=$5{NxNN5r z9?y++j#7yMUBLn6L)#+%gU0DNAfE zfz;OAE#S-8>RL_q@sD20gHT7=&&4>C{#TVNzB-f02Ko219jela@tdEo-zOh!N+E}{&*LZGfmTCTF1#8UzuK+ zhB2NNn$J4|`V+-9 zWrb<*Q0MdbJBF{7jB@VHE^>MG+wU74r)w)$loRY}QttXdc~&|772P&<2oz*f>Cco! z3eVabp+lA^tTpyvN`l{>gnlmuDLseg)51@~xPElB!Y%*h?uRyvYa12Ez2cTEIcJyZ6zO zhfop|5iFkM4Xl8NAs9L-%^tHjs_Sdq1PXSr`ZA7@&09V=ic^K|le5A9@glg+2A9AP zXXF7X_IGmQC(Gn8M$=?VIfrm}-qZ|CQ zCzNx{q<8@Cap2Krlv75}F)c7frmz0`j|*#PjkJYEheMm2n=fK?Pe=e}&v?!#Y+6{} zzBBq8a|-CdJ4reav?$Bu7)oljs01~}GM4cFl5Ql=R!Sq;yXL7@R>Pj{O%&-|D>4UR z17Vy^z<9cLR4*1nxu5%|g>J1^dAvT@8tNBt$ilzmsWHpoD`P+uy$ao04$duZgYrSY zhd)b6{q}UK5+A`|?{vz+*OmidEaDwY;}0c7d|Y6ogDj1X0W{$Z_Ol?d08%z4rRGUZ z9yB#6tH8uG6baA;T-)>c|NT#IzYxz;`ORIx<4`K7lstHRr~=zPaxO|32suYfhLCea zE+3~%QS!ahQi6y3-h`aXIlIimC&hcv6z^w5Xw%c8Q}7=l=V{2fM;)DpoQu!*h@5*2 z1cQ+Cpo%yVIrr)%(~7Amp5ob4FLS&lO{YoD*^mBxzI)C-cfZPLMz%01zubmSKYf zgPaQ;n1Y=^6LOwhvrWi(F32ONobyrXDk=4ix%3Au^-WfRiHdU)Ip@&UDmCX^ zR$#kF&ILy?A?Jjg6LL;$Eivxvk$se;46q8ZwS;-aN1ofHkZ_gQT8OO$s#7&&g7O+! zq~)B4+Ue|ptwmflkgX-)`cFj8y$&(y$a!dJ>BzZvn}NtVKWP|p9t>DU&iNe(IVa?t zkn$4LC$L-_6yz~h>&wa&Ivh>vXl~A%NT>7 zq$Urfn*0=z^VI>!xvao;kDQCv%Y>X0a!$xOA?Jjg6VJJz<{cRWNJ91>xh9%&&Uc@4 zE?QBi=A4JxbngK<_ZnOe$~hP3q~)9oV`Bi%d1z?q$hmi$fylYnL3=RIc`#rZIp=pE z}=ha*6FA=z=^si3R{D(fKk3HB2cMX?S6&RC4YCvzF3fgVq&`U9SC5keFRQ@Bkn>)1&i~6G!&vf<9-bc!#tduR&U3^H4aroqp}MckO>q4Knw$jmQn&#Vcr#`5ZQ{h_{x?nVUI` zEwt=@&afOK_fY-T__u6grgG)xE#9Caw!qa_LtQqY>1?nqy1n9U>`?hSYXn&=gg=8V zQ&`Gfy9T`-b;~K1K=$n0gVd_LUsY_d>ScV80!}Y)X9&$Uf;ZG?!yrG+)v*)l7m_;) zCuyY_ecQy|#6S1`n~pcv_^a8}8YbBx%09uh&z!TR_Y7a>oM3Nv(Ka!bg!TI*;tzy~ ze=}9Y5Ab%P{g*4%TdbMWYwv!b=26aJ-!mh(kz;*#p{-LxcZ(05yVolpK_jG+`u3xL zWzdHh?#&zJvR;7jrHsdKnX8#)ws?yVOLtd3+P5ajm1?cV=7W8?`V!6vYgbrc2S7E6 zPK6&A-oTcHpbFT5Zw5UJKQ2J0Q#cOI-AFf(Zh-wXKsWsAS>>#vywYhpqW>>+1Px?p|%)W<) zz0eU0%6#Z8Qxb5-S9Gf{&hX{_rfFFkY*-j>$JInF;No25@Gj-Q^hEzRC?I%7aMLZg zedAu{1bX}_ka64|bq&W2^|<8A_jw+^ctJIB;W83>RkxbUY(|P^8^}GuMm(Ykgd^iX z2h>U6ZV6X5>N#8x_i=%5RF+pJ2?@DI$LZRkrMXt+4nbtks6j*cplNwX(k|tg#kIly zf@{#zxDye%3G~4IrLi`X3>;e^iY-H0nA|F@Jrx5ECzj zk6_=Sa*147WVb1)-`U59s8pDDCXGdX5r+>tx2J*Mg`Q+s@)hr*_gXMJZmkL~T+?hX zD)Z@h2!js0Cx!pDq^QN4&y;JC0>TDIOaA%-AEMtyuiyU4`OM=%X!R^fhxv&Ql<5D!jnqSOJD@mL-@`7-gzf;iBFa6viaxVFZR9lLg{Qp^AEW3w}E zi26pdGYTM#5G5+9U*5H8^eH#rAv=TY46|PD?YPLs>wq7d65b&@gY1l5tr2Qle9*WX z$7g3`qISmg>KM(8H8L~MxIjpR%#2Q}9LJbKhSaAY^CUQNe{s|3IO zD?=kPyiM`Sm?AR&oI>;zqMuxd{;@G(2tE&r!A91ZM-V<=4B&Gw?e^!>+jHA9+cR-A zeJUQL525KNBs3igHNt56afngo|N5Mu^^GbMA@P-`v3?}2SR}ESq}{i?46emUF$sJi z=NlZ5Vv_%FmXZj-Y-6SW`_TFJh=KdJsw`S8k_cEC2b8lpkjM=Gg9QAIl@ew(D1!U|_GE0~cK^Z&*& zsJ{q%SY*e&fTfzbmeDb6-DCMrsF;c9-jHt1p65B!=eX4HRqjdtUe1;qUXh8#Vqfn= z2A=uQupv}GmH$KX)W}ntn5VWkwKsvBU=bRYa_MR}c`lt$$gDt=C&uw3L3s^NM3akZ z?n0#FBPg~B<=PQXPD+6O2zq!@lCrEy5;absU!?5pUzt}5#+jfLo!xsY0Ib zCo}o;%KRqk3ccZ)_jT9oc3AllCJW9n|l6)Hhtlcatj&A4e=Y#81-fR(b5 z)D2hnvXNt%%TQm{vb+5r5+pEvEZ9!iIB|-Y8fDK7YVVKcl{~t9gSD!183ET+yk z*yEs{2j8U<1zth(%8xAlgpVhS&!2OK|2Wo^TOlJ^#Ftpr@x%j5wXjCT#kZ1#2QOMv@iOFqtf zCG~y+=|!CbsIlxsru+Aa%&ixd1!H?l?>pf010G=Lc4Lcq`A-5T6cBV6P~;D!mW-6v zqj4ST?_W^n__YrVv$X|eCo>yUQ=hyDgPjG((eA=sp#w|i1ZGM_9rPH~kOE1-kH~=l zACnjoNlkL32OHwx=_RfNG z+S96@Hcdjc6Toj7G6&VoSv)p-wJLJgKt-n*RS0gM25|lN=AkDV;M~Kk3MGd~_W)a_ zo_&sE-9zqlh|uoeC$yWiJ85_G(Q0tx+TAY*LhVin_oEq4G6cWMEl7dDcYEq<2>J-p zQAW?**1@mq*7+CQiwKO5rrYzKbTjp#&eVt%?jnS7^59z z&eWk^m=z=qGf^l?`<~m5r%X{=0>26T_QCH&s9Q$F z2SjOqIJY~kUdfiZ$}SFoDJV`m)D`4iAVWd2!%mENNOd8_X(>)y9@&~Xf-beJl@gWL zp5wHCF{>=0Te{oyA>`Bea9A1bS+46&xX^NQCCZC&kO_%@cA9uGjtU|1{^;FUVE9=~ z^0ACf=%tHAyAT@)$_#VYL?y5K?aB4jZR_rq2Khui-EC}ztSqlVu%@v-_L}-6jx@&g zfRqp5UQM97_z24TMCqQS1fV}WBfkw;>*MtFKaO)HFo7S&4vkJDfh&QF?>Hxd*C>9h zoCv>5oCuV1GtQKobzXnqhwnaz@+#m&2xKhrBJ8G>E2Z5~+D-lkLE6m~Kkepx49_w< zo82Tv1fe#hd>i1+h1_JwTbB6u@J^IyGldztL1`;1{~VIkN?Zt(XrsQFTF1ZpR;c^p z0|{aRUsF?}4JF!8qD|M%F{c-5dUC+^-hgz%#=T`@8(%YYz#|H#M4Nb`%@601&-cy% zdjgxUeV9$OTT8{+t@p<&_%(dBDO_Vm^Sp?vaCh zM96p`2^n_@To1^p`N7-}1zyQsVmRn_%C+USvSrrGy&ad3aMp4F*o0Uhnum8eFVJ$!aEti^ zsgRO_rU~Rcd(NEs?Kx#mnPp^uqPysIbUSyQ;F%3)$y63kI7r5uzo2Az&#F!6C8x1t zfcawk9En+X22b=z2G2qNa=Zb5|2z4=B^^gPZmc>k7Eyf(RSqYVfjpIBC3ig+D!DHC z=_){}6FLruJH`W`oZjqy8hI=prH-G+vVu?H(Wb%v(QU15fFfuXkwk3?NQ1h%TkN`9 zuo;2Tx>QnsasRV#sx{s@hKU0hq3!hRnpzAE-TMSmcZx8~rQG$(GF)?CxB5m&{e@3q z$dxtLA|^C2HH!nzqtF5>BK(Jvg}iLK%B?jqU$8YySJTwb<75H*>#ks6iDMzr35)Te z|Jkgfpu%ElK9xP^byg2gUgvkuP+q6E5~-fz(F3Q|TZchcQqE_m?3nIe%HgYrXc*_tVM^&#P zxagIbvTh*|I|lyjBFFRZR9J3T`E0^5Sn&YT?x6YCeCcmt&srOvncD*aJ(7QGC9j0hHewqh7#+lAR6dtp*5sxMDg^$G|dQ4Sur)WfoE)Ah%%C z(p&uOpJiBD5!ypSO)Mz$?*(s}o`A%ouV8GSpOJlri~b9nre$G6QW!2Yd#iZf!Cd6{ zE_HzPNdEvz2=WUcNy38rH|}LlsIR83_jpkRF|Zyi2G&o-SMYmAqOWicr5Q_L*??S@ zlKQRGwKlT&7mcZ1qvLe#5ZldKl|O!D+wdlq!Xx1SfRp69wzI+hF6EfvwZZ-ZyMIsP zzC`3Elg+s8yQbxYp2Lgaf*#YjRmdL|a!jyMn7WQr%wOLv#6(QtBXH4B!9=bsvfGr@ z?@TVENP37$g=Z4Q$D+Q7!w0?F!`8^?P==*nkvXviv*XsP;K?=3HY9U(ZLozKz5d4g zHs(d`=aLexW@L=&MSKFsg|z}*I-4eR=oBN{nb2Y<9*={@<1q_)y{P;%v)h;ie8ykH zEGsw$=fTNMlscKIOULqn95V6AGJ>bDGx-_E#Vkg!8z8cDtWwMW&G>8$j~0q-jRFVJ zK-7OZbr;9YBL_1HcqIAgG2D`^0Y&~%#lRY7ldS=Ak8F+947619m28bIid>Emxg4LZ zk%`(G)9d3jHExLNUNSYv)YxO^I5IWJ)F4=+{0^^m-ec*r1AVmONEw+LWNM63u{KFl z;}>{gb`UaAdQt=>n;a(3ri4(W;{xuGjtkoaLpb38jTf7bczN2bu^KYJNZ^%D}d4wTDbZ2dSyEc1VJ4tXZ; zWX~rGedTGa$hZiN!rKt`d0;{|0ao>|aFvj~%p>}d&!KgWpYS^$zVIOT)cFLG;wY>Q_90-BP^#kOM);w2fx2Fqu< z&DraIP7z}G$K+V?^!%hg-_xx7uLUP6Be$pST00~`LUee-ZuS0Yr)n!i(phtt$u zU4yJlXq^f!PUsz}svfB-uT4*aha)lio>yi=%Lh_)XWn}G^{?hHDGRts9GFeO2+fCv z_NEF6zMstG&nxq+GaR%8GVklI+3mnZtQd(MwhgP#`o)2!MAy5~2REFh4PQ|fgeuW= ztJSqlcdHGTo_^)dJGb9>`HlAh3_S&SrrBg=O2<}?fmS*#u3%M4P}L-RuXhEM*yrt6Fxq-*e!Rm_8O19zN+YYYxbs8`Rz(%_C3coml_zxK_pX z0UxI8?RxfEj;;>W&4;MZ&z}>xRU4nr|};3UqUq>>rr3TEJMGyqoH9V;-rIBcpT#OGKa?s@&1G$ zUec~()vla>!uPtbpbH%Iu7_Rc8d?yuTv7?uO+Z8$E-mZ2RgOvrv_r4q?0Ov_5UFqL zW=HGUT`0+IFE{$O4fz>b9YU-3iX~5rg95*%+bz)E*RJsfIYhQG9s^Jy0%sG)(7stU~kBI)MV?UBfY(eck#J;6B-skMmwhyJo z@hqjtYYsHj-@l;D@oOI#W@`(WPG&ZyrapNQ1}k+PIu&)$xr6UR4JnWW{D{=UK38b( zXo&E@N-LY~7GN)CtD`j_lcTPeSMa~u^4EGTh>YRS-s<(idw3NRIt-wbU@!>0r;?vN zmi+7oPQJ#Kim$Pg5%~|th?7@D%R%mZ4nWEGi%juG!1RcHZ<)=K55bwx@+b6eiNJ9J z$00hv(|-~|na?Os1PViQw?I8_b*&~-Xg_)>AxL^vx#EM~THSzvi~*6_p1Z|TYjK+} zB`HDSmz7K6I_o6~n}_G(omtpOGu zF{%*UJ`Ldd@69XEq60k4s!(!>bPup)>e=Tw);;7-hY0QdeL}m*A0MYbPTHNi+JE2} zcDUEgqjAm=EOF3ixDqqTWN2bQZrN|m?MkIrvddgy7g7NQ2!^-XU9%~Gg;V0Kj4b>; z<%5wXJlLABT3PqXchIfeK(H2#d8B)^`mhIh`th>Z1BtEf#b=Y{-sEueX1~K+2O!Su z7`E<4Xhpr|!+4T<8=oW^8U|3AB}fbEP<(Er>D>4@e+Rth?Qq+H{M&sOj&T=)La`Kz z^+U0;PD)f=60jR{jHMEr{Rz zs8~vbR%j0FloN^2Uh^Zg;bfxN4MSRoLbQG?cZjY97@Q;HV1;_n_`u}ul>GKY`{aA% zx{~WUPS;h0Cw-kwAzBL2+WIJV%MmQKO{_LO;vw2Um{+dGbCeU!3gxpHqa9?<)S+JZ zI3PNaeCMQlQIz&QY?J^zq>dXZ8s+ZT^SpDy3#y6G8tbT_>Q-}^HP|YeZ77<@{3*)8 zvhQiHsn6jg39op=^Nw@oCQm)H`#A&h&Tv3v?)On zT}c@c9}uPe;oR=DdL>)tD!VuUrl2@&e`iRx?$PUZbU1Y(#c3%{TOK(n>Ik~jvQ|n| zUVDzy{>7}agl_3>(}$2xuIo;?&};L?;-TRl{@H2b9)|D}>ydrvxrg^f z@5Tbd&!UrU8Jo~V7mIcQehA78bJqZ)z+jg8?a4LM1yZUl4f2V4y4%O5ox< z&WW%V3Vx0)<>q&Z6M@2aV-4T&-Xnha?sF)w0#1ZL#u6{WR<~1j%)wnlDeZ>RZt_0} z(r&K!X*cI%cosxYx=D-(LTyO-Ho%(;ku1ntmiYJZP9a@Gi8hpILtF@lFAgV&34BdW zi8hpILy0zBJI9<}sOiZ8gLDHJUmEw8jct6*&;jB?NaI5I;XLyB-Wgy|V4t-Qvx$~V zJ)7g$<&z0^4YNilr-pKBBBkA5>OSaLUA^g)`NXhHBCQUoMCE8+%MA`G3KILnm{UW# zoN{U?r-pKBdRj+s8&0F!GoadXx}3njG$5zu2XjLdcqMz4E9uJ#N?LRE&X&LpX7IJl zbMSXmEJA4WIML?)*L&4SlNogl;q4@z1Ka?KBnS8bF&qH&U0Yr&TV}o7+i?jA??EpC zi2tfA5Y5B8oUVAeWw^yWJ95yF@T8#WZrbY%V>tNjIb}|nWhIU88y%->E7#HO+;xIy zBunPz;t2=Ic=H#O4DVUB3BBYrb_^*0uzilitUH4zdX<#tpjA2EfWQBp{NIv}BONzZ z9T$tJzJw}=6UsoIO0kl=o(q*+m;7{<+~>o`;c&-z0F=|4-A^Nr#iP{GygZf_dC~l{Lo06dIVC#R2D0XaN-w{zJ(^UN&9j)|!|v z*czs*Y3k>3vVi?{S1_=|v5=TO6f3s>vspz!g~if*Ztgj+bM5ftb$<5@<#l>1k@7kp zy@0WL>oDj_%K03a7nPKg`iNpYQEDf-FT-;_|8aJY-Uu|XhM3q8KrQ03tdjzu;_@)Z zF$KarB}xilOYiRRVooaZ+Pxh=Mg{P1!q)v7TKXSyOMg8g#BW7~xZX@?I3+<(Nlact z$iIMSllL+bt5OuU!Drfzhhio2al*lEPT$Gfrbn;c_O2x>o@hHgViNUTvf_E2 z9$E1R*oyzjEac0dcmrv@x$wS`eS$~6=bSCQXJ}Os=sv;T%0U7N7>Z#)e@Jy#M<+_a zo6Z3_s(KZ{MX$sZf$n999YZl+k>mMyD_9j(Qr~`bAsTdfgWL14VjXKsB|d&!&c%nN zyDJ~5`_+Q3QmyeRVVg&N2`7aS>KE7nXshTE>KE7+gm&iaK$uv+0L3eC9OjK!^@&yA zXVp(Mt&srOvncD*aJ(7QGC9oHRg)Kvx zM#DCsE3?L^7x18DXG3}`q*CD(({VZmmicP%n=L4_jBZrhwDcA~`)3)JR)qGDP!kKv z{CmM$rY9ir=qni8=VxS};iCV-rfFH&kQ9asJ?Ja9gm*9(IlfCBAU)DQfD(fI0!Wgu z;Qoz!nG@=(sp~zC1rr0SfJ5UG^O#nPd@8;IkD78^3F=pYhi)%LY?znr>@ z2Ur0|kKvYV4Jh)DDh9GOx;EJwyQxQV63kb!H7NaLlr-`Q+8Q^9PIG{ z)>M>%qq0i22H6^fYn0#N#m;-IeI{F@GrP)EyMCnN}lx%XKJev|k zi68d>59zotPB4hGix7>+rahhz&9742p5pcsjoUvqAsX^=1N(R609aBDTXlV%SDWj- z!^j4*7~tta$As;S73ttty4 zi_C({7k=bs1aY^Do+kZbLGRK}0*4%{yA3?!QDABH@Qg#%i-|8*HDfggY+cL_VR^M!vkDxavB`M1) zBT=&idPGXk{*`&GOQ5Pj26(pv9f^`U9SI$hE%=>{o{7*D z2UiPalM(lV-VPn7*L92py5y{Kj&BWU&Dd;0lkCiK(3#36Y8&0^ZRuW?*Dn+As(PfVyjDF49*#t+V>ooO~44jv(VmDA<6fXnf!TWo-2WdYu?vgv)f_xD3~dXy20-m2M6$dqYrL4 zOC7$VEC^Ks3Z&JwO?Rsem!5v*&O5i?c=?U@!S^`@fTr1GrAo(Ej)7J>Ev{gdN>JG( zsb9!3ZkiSwhIbC&qHH8}!}XO^ENvMItXg)r-*e!Rm_8OX9zO9gYY@nu8`Rz(%_CRk zomda@xK_m$0w1R9?RxfEj;;>W&4;Mh&z}>Y zaOeBu0z&0z-@y3TUb9YU-3iX~5rg95*%+bz)E*RJs~xX!1o9zCyhN$jlIT0oxa9SM&v&nBTilsZ3nsYIRGW! zFG9r|0ox-;IhENg`4F55ZGS@VmIxdta2%ooJpCsjl=+PE1V@fhVQB6asOPP&)nt** zAH9?iB)zI!@j-8`ZU9w=0g>9CyTwv#ahtGw90>})tXvY;IcBr3GYYMa0f0Jy>6pN? zpq%DB@5blwumtcv5 zM#YurN6OH|fZVd*n%k91uVk0G!Y-r&3J?r$wYz3h01KzYTR9^C_vGi2COp`huv%I7 z%6CxlY#>;R#ynC!V&dyEPd{E3dmyphz4&ah{F@wZ-t2dn>j31L9mCe$2(_r!fEZ6w zZ{w3hL&E?nvjk~DABxY7RGk|i=kI{`yd7>mkbk@H!ZGebP$-r{v3@94)=7z~OG-YA zImS|n&7VysKzo5JyLo}LS492mB1LG4j5eH36uTn@X{QiW4={Iz zOH+`x1qt$ItD`lb+`g`tSMa~u^4EGTC||^!`c|(GuE?uUb#DOgKST71270gUE-I`0 zp|nMS(pTCPq@^J3Xb|}kEVfOoH$CD(+CP|AuEw*J6U_?cvlyivWY5&0V)!^ zmPfW`j-X2|Yo$cxwdY9fU(PB^=$7s_fe0BjJ{-)bQOVA6U3b!jj+>R0#Y00s{Iky{0}F{+dJKgvLU{&*B4G#wPUA z#kyU+q%1@K}u0icN<$F$i(OE)mSfkO??ta8l!ta)(7yf zCQx8}1OQ zBT)M3IMYu>5rQAV`y5KFfEXc=vP6!soA#kpg>cZS5GIg%^Dl$cn=5|m&G{Il1twt^ zXc6R`8=*I(oEu=zg=iLJFH0Y_p=2A1#dUIr*bNf&1iq@KWE)Dhp=6t`onwA4 zRQBY6N4fzig^hd5#x}lY00A)~q%k7=a2{EG?+mafltsib6HS+THplVHCld@CW{pr@ z4dvBD@-hZQETg$Chab`;U0xyngR!P95%O@*kcVAj*8}ouelRygfmgCuxstw|prkcd z?`#R&UJHQQ)NOgc85YIukQ?4zql`XSg z?(Mh)g&)RV-br!O-89)5#&ZDapL5DAqx=(0^enWx>m<);rc7n=gaa*kjr;{A!&_Hv zLQ6S~9fJx>?4mOZv3a{|8QOi_&YXZi?+l*kaSfh>mgRT@{{DCJe@jY^l-yXA94B`3 zf!dc)=lF%J#Y*maE>w73@>5oFpAR30!yV&+hQXarBbUWv*3rCNmKA&okJKIZk8W#i z12jRi=m5q05^x7~b+_1cw_r2EzJq?*zqtPyIMo`@hGOCXMrb?zx~3LGL-#%b5mAI; zF6FLQmf@QFy45#I>Mwi>L$0hbI;POT)GQ7-k3tI=iSQpv81gdeD!10ee8JW*T}@Lz zkCO!?u)D&6rEqVe7Z#&M|4*~ZECQuG)u*!e?9S@p$?p8_8OrYT)*@whK6)Kv_10m~ zmX!I~DLbaSm#Vo(6y%9=JIR9?p85InS!Hhy33RfCu-FhxE$X$@NCQxjdALZKf?=K( zB@M8pcLl!>6?^U8lpmu8_;JL*ucEd8p|JLAQ8~UEmE(G|DMw8t04k~T(hwf7&nR=8 zQbY7XXI-c>$|;7TX*(X41*aip*4O)b$Avc5HlrAWgWFoDGfec_ZSPvLf4VlMT4Gi4c%2)scRj}Orgp(%Fd6=x%jYj zcjY5>zZ%k2sx>|(Z1bou;iNEn{Q^4x{GA@Xe&G$|*@x!l>_C`izW_xna2)21NQU+T zmmG2iY87qPa7vp&)^&9&$#Pu7TLLxd6HG>L_(=)KY#L6(9hT~_IlVB@>@XZ`wnaTqk z(KH&ip#xCCSQzl2WM@N~s{w)@u9%K9Gq4O;gWqgHnPp@u+NPzq_}M?pu=FCdheUeO zf-?VJ@RsQbNI?1uhWPmz*=M-uzp!aqtUW0V7dp~c*MxU47dgI59Uwi@KY$W~Gy}+$ zu;BiUdzlmJtEuZfjt3JP>s~3(Ki#U4PsLa8J4m9ha1Nyz%VXJql$MhEt<<$PviKJS zsjks+B5kkp&E!^n!1zpvCE-B$^M#iXK#3yiE zSSu}CZ<_UH9+o>t&y6BmTJC|twAX$qhym$&eo_^$7pNRJT6MYH3~dn2X5t;Q$2yxcPwg%xE<#%||^B!xU9cZc*hswy-AY0?Hvo*fUOSFTaiPDo|DB0vdc{U}8 zA{`fiNBp>FoL~@V7a^J(V}(tAmE!gkx1VU-{;>(sklrD^Gm73>4Dj@zdBXPGc5XWt zhu5d#k^2z5enNuRp(-K_uOEk?W&UU9l&7%LI<8ZM)K{Ly!i z3fBqQ%RH(d`Mlk?{G{JQ(_-fnNQ|S*f+Nym=hN`OZ&g_cnI}1`moe(v?HCTk@8N&Y zNo8ZD6f}gGV5UK|6>DW}o#%0d1psVtOsny^Nwn~X-CpswzGK+08&=P-i-quKa3RFG zuU)gckfGxgOTbt%x%hVMLHsI{@PN8KKGOv(>1yAznw<`0N(XN!VVV-ASt41n^aaSO zOQ7bG>)!+RNJ%+M4Ue7;I8IigN;afKr9MYh;gF5B&(WWdUq*h}B>l3zDYywVYfFno z)^2irFH|qnY&}{PAW4h@RR{F%E?zv3kVrwT?J|y2)0=FF!_BImh<~G-_-%p=WmHIH*l!6D=y; z>TT&>qSr4I1LV=sHv7>kT+j*l43J5!dY06?qeP(VSVkeb9ECEMtGD=bdAm%#gf08c*-Dz9r#f|DcBtDIM6L(2y;b!Xms z`Sq{nFDVNTy7oN>W)omSU`Ns3R3XpzlbQT^Wu7a6hHKu}U9;O^6e*Z2jJm-#tUe=B zfdlxy(FaeQWe;Cb7KADR1=8x;rn}XKOHaRY=bhVcy!^)d0EnIfOw(+#YNcZ<$3QEc z7FV!hB`9u^^e?0tH%*HT!#f9{Q8tpg;rdD{mbnZyRxP{R?_pD0rjSLaXEsipBGxOA zJvXSmKbl9<$~&=^ao)h{q%DToF6%wUwa*L~fOGM4e9bD9umQa7Za}>glq+daKe|z!oP8sg z0##}^7)%}=BDZh2_x*7J(Q>qJV0>(_1mY5iJ35Ffx!!*k?^^#QRP(uz^9)_V^y!5hr)Neub>YcG_!|o=NeiNw_H*Q*G<4g z88R&^J5hBebREPhU&Gn;8bTme-`35J*0Z}%n%iD(^lck*G_*Q|SML=|o*D-YeoePq zpuVqNyY~QJ5$VQwNH-+EQU*l#itGN5K_mOCKCb&zNVmNEK@F6O12yk0+*(QP?1ko# z+S!+Ur1whd4MUMd0FGtPIo-og3{4*{Cc0x0xuiQsX9$00tzbAS?pna?Os1PViQw?IE{b*&~-Xg_)>AxwHzx#Gj# zTHOG?3V_G(pRuK_w9F{%*YJ`M2t@69VLk7<}yq2v(j9$?JWv(Is~d&r#*5#Ig# zgm;s6C+%+5ZQb4(x#TmN>Pv0zsm=Y7&NoZ+BV}x2KziA4&FxC1SF+1oVHZ*X1qg?? z+Fi3LfQ3`yt&A-EJ^8t$2@kd=tX9^&8Xj~jHxR8wV_s?`z_&yZKU>Om^tQ2qf1nw0 z!P_pACE?_F^Jc%pTnAv#>=?H0M)*a&X2f`sdK;f48XN{xndL|e4pDq=*W>v zueSWPUJEK1F{{4S>w_!uDwN(EK>p7VeRxm&j8Lam9>72u)u)x!{U~P=0ZLzKQ;?Q| zw4*`fN3h;DvEuZI2WkIcUb!02Q%*E1l+R+6c91<&hnnHzfayeXo|EoHaoYFXmfitI zQpXJyj>Oc}{Hnf@&hT###@k(0O2)_1Y?$ZK#|_<=o~y$w|S(Bnh{8!*h>w<|emm z?0(JwqbB!Iee=;BW>WB}bNHxTtSC-P@HfHVKKz{scFUOffH>_B=XR&nE7>wv*~I}k z1x0H6J40N-qt}e!a5}pbsijD5d1Pzm2)fj=R!US}dydrp<*c%VZs~3lh>%m`!@-;y zmFyhXbtheDyIHxhcxX6?e|DNUh#>%FnH?i1cpaFxtS+Dn?Rkgq+kNQBHipRQr`z3l zUleaFH2f^iqh)MDLtQM~1^gi>HOyTDkOE^`>bED?ShuaaTNnwf6@pBB@?MQK zwAa)pailTA2jqPK|7rpi#z#=#CyMtZEil6x%uihknCjz{^b+gs^*9ijOqd|#x5SL# z^&=T8Gs5o@GXkZbjx+sqowp(JBY2-fi4`y-1X7mR5w^OWvSSYJAxfz?lzNl@mqF^y z6+iXnd<@b8ldzk}i{N!MIj5X`*Wd+|UxM6;ED-2IGz+qqB_2M!6(!qDVFhbY;L3_X zhh(-ABLXGc9GqC3peOKEH6`0nvJEBMbnP7Td!e!?2TalpNGWXGTQ;`wH3JBU5h0Bc z;fM3c>w9N_J)tZjj+tn>)U!E`Up|>&*f48^@@gor#&*gvNSMO{I++YS@%WMaKM{EZK zH;)r;-haJUjWnB4*AU-MVmrVMkVtia9}v$0VBfXnwX$W_%e@_!pm5fD0Jx4>ESg7n zIcxE9%W#W%cI2Qz;Yo4R-L%;m#&ZDapL5DAqx^f{=r~aFCNX ze?iIc=2e@}RZe5a0QAK^IugO|44&w54W5I(<#+@B{&(_!OG=KE+*p-dET;Mr>Kwn2 zwOGkr&xH!FOMc1Ym^gqD+D^Z&sm0LHy-y&0rwGGb%3ZH4 z!!`GH3-hsl;ZqoLWsT7>g$AZ(alm;LTEIwz|4_n^mr+-_wI=2Zwub3yn)-R1EFgj1 z6%H&hEhJ_T#c0w0)2uR!Kq*i4xw-f3&b7mn-TB=!l-=pAMau3>t-;$h)?2-G7_=p2 zeh$oyO3F-qL_wY?x05`W;hCR5pWUNF0?n);EH(sFi+U~VqyebNoPuFm9moU9QK}l! z06~b9ioJGk%8yY4{5WFZuc5X7p|JMXqjLOKRF3P-gzi%k0F{(yI3zU@NH;m9h6bJu z)SIwA)EVUzL(#My56epA=7fXWTBtKj^xAFjTC(Jc!qej@8Al-+$#Ih7qd+oal;i); zS!Eu98pYDh1yvSvmI6IMhlJW_}YAIM?x10vx6 z2PziZ*T4qvSO>=KgRXr~drd7dVAASCTM0&G?<~ibTv4kp5i zk{MNQ-s0$KY$WQdI97q~L#xYRX8?o#ZGFeEUuR9%i-quKusMLU^V&7&F&x$>VCCCtyF7VyRt7=UqT_z8oLzO0nkETW0wNkg&_6WfpCMD0+dg}aR|hd z8Xz?=SPgs+8eS^OE1jkzGL|yRx!~Px({Ysxp1_aFapcN>&(&?$yD|$s#6$1#+U(O@ z1Dt1z*V6!+W0?&@TY>y0d?9OsL`Uf#C>~4V|I-rx4^y?y$ZktwsBo>i%5!9(3788M z3~fmJxQaT!^H{vYjCV&QB;k1_^&9&$#Pu7Tq{ts28;P31LGtua_Lq2n1ToelMt{#L zCqaONkj)%lnaTs;4K|W(K!XX5@%-RH$!>;13IhZ>TrnNz{$Vv12EW;YGRx??v`tHI z@w5M9h9$6~xh~Yff-?VJ@RsQVNOAoNCXw+ovd?f4e__+KSOaVrZpYPDYXW4NiyYsj ztd}0?e+6X&iEQ9~TX6rzz03)8Tio`l2w z3iTXHEtV0v0m+e*1a(}N4<4dWTdQ(AKe9d4S8zbW!sq{h)8o3fv%&r@<(MJ0!Tv(e zn4ZRcgUC%LTf@ zN&U{`QdFgfs8qPxjgLir5r+@@wWonQg$`m^$`$XTQ(7=PZmkNQS<`Goc5v4Q+qTi` zZ*bzn$>D_Zk`k_CWQ^)Xd;-UXwbHWnrU`)nF|wUh0Xnix!yB23&ZI%mag{H1z2_B^ zyoO`)QmoO>Z|i19>)BlxVtcvKw`~}vRtLECUa_?7Ust@Q+bsuySYCocM#gxIj9DlY zK_#D=-NvetGyWQ8WhOBg4o+^O)X7X;I+lkL6DB@csC{ZGzo49PTuh4x`v9Wx$11h_ z?~Tu<@DN0@DGp+jFtRCPcIu;vSqJ*qV2xQw7n4olxMWl4BllBHT2f?FP_oM?$u8rw zDKb%;VtRF)CdC@T6iBrRo(-84P}za0`$?(W`yQesn4&%!<)6p~l05wCI`i=34nDyY z1XFlbjs#Q4So+95N@95H(Mp;pX;S>3XXBBNo)r1WCWo=JDWMPPxPULD8&)J^ap4py>gW^;1xOxZ_KOuqQ zP!$&j#g9X%G5;UWDNkVmM_hLZ>8?DD#T;ps8;O-2?Y`wDSVc?s-~%~F;DA(i{0&i7 zk$M`3db(9*?w^SJv)YMn$ADO(4*!EjWg9D{pz+WC6?d$y^K7bcZ3pbv(W+G7LaI_} zmMI9$az((aeG8U2lqx+Kf1rN?-d$qSugr52umnp=_}{Ty<}bo-exI{1zJMAw7h(iX zE>nr9*^nBQI^B6-@mM?E{fIm;QgQpQ;wI3RQ$Vj$Sh{wT=l=-h%PKH=-Wop=6t#FM zZ=6FCdSZ)Gmb-A^BPekRrNWg|%3p#+SiNO9h0^^Vi)Iz|E~epWfh_X4!wZ8ptLD{~x@zhQ|fhg__r4UtbaX|s=l&wzn5~gY*GpmNe zcf)@*$zNcM2dwv&l+zj;$O6)o@x| z!76p2vPV*SkbB!SEjA4A96%m3HAO6)7YdA8cDLVi;E|XX6ii7z!#vYr?72br zKr-)Tt`@Tw;>xfC{9RgI`K&VYrlEHt&DNN*WS_U;BL!+q-|QkKO}{NXwJXXAM);L- z*9Xe8%IUA@wyAd-M)p$THv`8tc5%BC>Eafs+w8N-DI6E*DoUTqlI#l zBc^O1%;QuGXri>c_rW|yLVhMBSi322U;yw28MH%MRu|Z5H7KNovf`8POyw7q zC%_3cJ9^y$3kMDZb%WX9pFN?RV6s8f%gXs$Az+NR9A)*mtvQQuOXGz=u(k zQ1tFO)CvPcfA)k6W!bAzlx|&UhOtKJ{x4!5I;{GH*QGLi^#!!jk$f(~1IMFO#k&ue zazI4a(YV-!=4Ms+fAgbI-UtQN5&}|ey2!;iXfB2XGRgi#qTnA8RQuPn$|a9XZl3Qd zts=yAHcA$HQwZ@<$wL1%+5d-CUUT!7$?GRQc^x4O1=NIiLOo>ZW8w+zw8|s9SVu6M zB2lXU;@){eg$N&kCsbTH8c*ndCQF?p|L%wZLjjQ}|J^7I(L@mn7OrJ>j2uyf0we47 z-6vlhD2mXZj*ZUi#&|1P|A;6;N$x+Y+<$C}Scx5!n7kf|07c5eU!%Hcs*4__E_!^l z8wu>7KN+7nF;K)&9GHlaYKV6zHDIJljD?Y4LAUs0VqX{JZiOvD5_<~C%J^218S zYY3`^g-*98*m`j5b0Gp`J;58QFx*SR*dh1n5)+W+>XV?*MAE;%8D|2$j36v-9hV!$M$oVPs zLptZDm+DNMpVV!z>6G8YK7#|g4HD-kaegMnibu|sp)y85qo&I;m7+&V3 zyX5?=aB9p{&QJ0A9yve7RRcLc#W_Ihm&o}k)a?My&(P4)IX^vja3JR=KWP}}XE0!y z^ON6!I6natWx}9=ud97@j^>S~=#kWp+4QCSV z8gYL5ttE=rbh`!ArPr=;C$pqZ0CymRh8r}8hx1ct!4%HV3gaFn&QFN7Ci#z~5l1OO zO6h??oS)zTP(+R*a*qe+XIS)e&d*!%0A1DVsWjFA9dUjh+M*5O{B&(+g9Uylo1U`i zU1$`A+|CmSswHuLdg409`RTP+CC<-1X9XN~`+xsR_Br;($!U3$OxCX@&I#>o={=+P zuyl9jBekFEdyE{fNrIRvm8!^PXJ4+qghULC+rPjL1Z`A_^AlPD5$7i?4+v}J7*qyy zusP`Q(lF{E>X(geeC=YdmjMm(M&SGuB76kSPjMx2ekR3zk8mvHv6lSA`AM9g!{bFs zv5KS0`RQd+kEc?VXf+NkLbHEzpJ+9RR)c6Yh*o1yoZW}K1Z#Y>8VQ`AE+dv zIZ@zAo6ubUAF7=vW(!<`NO4>VtN{oInJ9E(hDu)j;WZcpR!%1zUEaXn}Ess zOVg<~>!XHHCHCzqFL!{-XN?U@7Q(^%F@}T*Gn0mEud73HLnDmwiDs(b{Jn7gXJ9F0&3-MYFB94D^B9oX^xT+_Kqj zX}V>$I$8sG4eNS&1^=rpf34Soyh|i9Z1wtZ&8xcQ7~tuUADvjI7;XAh*}qm?#QI6B zpKR;<&YAie?tR;E`!+dsobzD!6bKV=>L$(l=@*?yte-+*N=1cUaCxjng~a+vZq}o^ zS$k7v6JXyE=X+%R6ju#o{S@b^J8(v3?TkC#<_w?y(ar zTAonA^bsq46AhZf!}=+-U<&JJl~_N^N4RKbU;SZX{RHThSU-vN(p0vB6J?WI@Sms z(S9D<2p!RW`lT5ZRimid1fyz+w4WY8_k?mB0%qAcUVpPLqGUw-Nwl9vI9GEY(*xIu z_VbY_Gg(52wq;m7!wzNJ61bEve=x4Z%cN-DB*3K+Xg`GrAA$B$TuHQ_NrB%Z4E^k1 zf%OQIpJ+dc_LFEoSxNlvN>y))nEK*)N>z#W^UxwR`zQB_r;B*H$`U(BrNWXTwKzS9 z`Xb9^Hvor*FbQ^p2ve98?^Zrq$`#CxTdP8jd(&($A@zO(K=59F<9!?1uqKy8DW&1B zQT6kLg87F$dvb!r8ntoqbPBn$yh3k6 zCy4fwXg|B*2;N$i=Z8n!U84O2LeL`1UMQ*G*+=eDLMNPYr0G+%nsM|(5a+FK$0(K5 z2jegBoj|mDa_9t@6MLckj0LSF8cz}JkdCL&fqM4GE@LS-M84E`tQDgDq*%(M^3gm# z;0FQU1bmN^_A|Gg+c(0u}W}uR7L+5}8CX`FH`Ve zz}ojLC<6m#x8MyG8B&pBpfRZhKW z=pAGhcrwZ*6B=4N?WgAs4y66$Ck>L!<#WIX2qq z+HH;bYia?}#CG>SP!wn&1x&pIl{Fo`ZEWD5+~;!zJPdt=cT&OWk(DVMLCj*QuCH*d$Kn*Z)u035=fX5@MsK zfIIyiBz)O*%x*uUgGBr3_0CRGKI>~$UTcK=f;X|_YKQF;U*UY5zmxw$PB8K?B~&bI z7fR4t$x}fe=ilzTa1Okuh#W=ad|z5tD~ZZT@~+21`xzGfg7)*46ro!c8IWU*&=KwD zp^eZH?I+QGy4dZ(F{wN+08J&EdY6(v$iTnGF9AFwJ@Q z@-Sq=Te-3J<>~?=5nkRN=N@@?*PapUg|313*eoA`^gyEhWM1H-fn!AbdC&@}67BgB zXg`GrAA$B$TuHQ_MEe<1WBZsGK8oZg+E1eW94%P!h&0V-m6L$Z*Z}4qUzy5(Mwx@2 zy|#hTGnNyL2PIn`jC7;Z#498to!RdOzuAH^%dAFi)6!e~?4M?^5cRz+y=RF2EDOqf z=yNRc3NaTymiEH;B=yuB?b7CpSlag<( z@sPQujY6g}IdkIX>NsgXy$IxZh_ncuAar7v>8Ghd*2jlPi_8f!Cv4r`DZjJBGG}Vb zYh}yaO|m|j6J$;RfmArG_ZXWKd!hY|1+C-so?f&=I-WuYy5@z?A=>g4-Rg^IOZFMw z9r?m$xWkaP=CKNl$66uUPqJ1HkF`RypG5mPil@4$@a)sf?Do&S|E9O+wqM$QY2Ro+ zJ;6RA?WgC&6746^eiH4c0I>GPl(uhdnMC`Unuks)$jG`Ia+(f#$T=|uS*||`N=^1k z`{@;(NVK0qVM;}XUas0$iwcSMliaLFb+h)S%qEZ#lZNQkZt~txoN1SnSF=*|0zU$* z)G6#I9~YqX95|uxLhboSP?F9_DL2^P$&t8FoytM0Q<0LIWd)I_M*^)O_U_-Y&DVgQ|O0u+E1@4ooGLa_H!4spSO6Y`PU$qKc&^e-BfFN zKy7F}nN+KKiRZmNfsPH=B!2^I$Xek>HHOC0qJg$r%tz8T^lI1+HmRVSMI!X`;C|1cpq+Y%5iP8 z$*OOTtsKi|l+)r0R(u1+I+D^`P|h?>iw(m&Co4u$H(VXaxTfI0cqKm647KcTzlSXo zm=+Wj;cT2ZMJ$z+J$L4K{@)&nl7wlI zA5HqH;KAMlN-sl$;jst%52^|^W-UNZBK#a*FXftM8;M11cLU5Cd;_O#R3~Rl z$j)4)#(CqSL*$)^xPSR^0m~)9?eqsryl2FF=JTE<8WXa|Il#I6wX*}XXeEX0a#e%M zzb(ONHiNHaJ^5dV>~e<`&@HS_{e_JfGuwEAXqnS*`HxDJc> z!BQ?tw2U7B)ABD(uIo7)fwtN2fc4$&Ks`stuyr@mmA<+ry3)f*>TP_6sD1*~#DGX3 zh{b<>9Fmh_ZYPsq0xvV^1JVb+K9EI!qK1%sn3z{6d1n9Y9OG9&H%IU*TtU}s(O>8q zVnSiHkiE+Ntjmsm-++>gO~clJ1I)0>4s(`Pg!VYW-o`z_#8Lk?G|eOWWM1wfpfS## zhQcJ*(`~)&Xpn(vFzJ`qG;v7FtIvC|-18_#nP|EWzuonl>o0gZs)`aP7yMcV64tX{ zOr62|tKanQQH~4sk>Q^QHd?+-;K&w(3I~(9X$<8&?VhD+Y7qw^`iy$lp};L(!#XHz zChAp0-5>uz6LG2vpE7@hI6!ZN$s_y5_(x`TLvvQC-r}qji9h2yjt`m+EafW8LPh3- z6C=Cx<8S7dIW$o;p*L%P{Nok@x=2^0N@cam8!0fM5b94*U30=E_;@%#1oqawBT6e|H$7W4eN|@ywSB--`IagPL=Oe zkqSiBnXG7zunMfL3Kinpogqu6#>5#11lcasx;>}ZhBhNw&+fLsf?CS)zHS8{9K#znx}b&} zu&+5MM)2vha#>jb2%~8tjsX2|>Q=XLFZleJ@~m=E`kcF0;j=!YU`imw%)?-)T0-wtQSMli$Z06|vp$uvaf&NSQxL>tXkCpdl# zkF#hQcF_tShY4#M%o@d3K==s3r_;(6B^&!xyI%>P0FT`?dhXT(6XTxJtQ3^R_{Yi5 znpI9Tdrm7fJ<|Z4&KdAqX-(i4#13D<_a0MVK8@|3-U(fBOvxz6jh)bG)5^SZY_k!* z4(2gjByi@&n&VBsObTHey_rslvLZUrAqD+|iZmL5KI45x$*jP*N<{F3Yo zI8qk%;P6 z{iE(zgHNZGobnk6szVnv_-mWn2}=}yJNT^Kk4ziSK$>dyhs~^hy`LXsu@9*L literal 0 HcmV?d00001 diff --git a/modules/image/Image_gan/gan/stgan_bald/processor.py b/modules/image/Image_gan/gan/stgan_bald/processor.py new file mode 100644 index 00000000..7846df26 --- /dev/null +++ b/modules/image/Image_gan/gan/stgan_bald/processor.py @@ -0,0 +1,81 @@ +# -*- coding:utf-8 -*- +import os +import time +import base64 + +import cv2 +from PIL import Image +import numpy as np + +__all__ = ['cv2_to_base64', 'base64_to_cv2', 'postprocess'] + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def postprocess(data_out, org_im, org_im_path, output_dir, visualization, thresh=120): + """ + Postprocess output of network. one image at a time. + + Args: + data_out (numpy.ndarray): output of network. + org_im (numpy.ndarray): original image. + org_im_shape (list): shape pf original image. + org_im_path (list): path of riginal image. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + thresh (float): threshold. + + Returns: + result (dict): The data of processed image. + """ + result = dict() + for i, img in enumerate(data_out): + + img = np.squeeze(img[0].as_ndarray(), 0).transpose((1, 2, 0)) + img = ((img + 1) * 127.5).astype(np.uint8) + img = cv2.resize(img, (256, 341), cv2.INTER_CUBIC) + fake_image = Image.fromarray(img) + + if visualization: + check_dir(output_dir) + save_im_path = get_save_image_name(org_im_path, output_dir, i) + img_name = '{}.png'.format(i) + fake_image.save(os.path.join(output_dir, img_name)) + + result['data_{}'.format(i)] = img + + return result + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(org_im_path, output_dir, num): + """ + Get save image name from source image path. + """ + # name prefix of orginal image + org_im_name = os.path.split(org_im_path)[-1] + im_prefix = os.path.splitext(org_im_name)[0] + ext = '.png' + # save image path + save_im_path = os.path.join(output_dir, im_prefix + ext) + if os.path.exists(save_im_path): + save_im_path = os.path.join(output_dir, im_prefix + str(num) + ext) + + return save_im_path diff --git a/modules/image/Image_gan/gan/stgan_bald/requirements.txt b/modules/image/Image_gan/gan/stgan_bald/requirements.txt new file mode 100644 index 00000000..2d8443d0 --- /dev/null +++ b/modules/image/Image_gan/gan/stgan_bald/requirements.txt @@ -0,0 +1,2 @@ +paddlepaddle>=1.8.4 +paddlehub>=1.8.0 diff --git a/modules/image/Image_gan/style_transfer/UGATIT_100w/README.md b/modules/image/Image_gan/style_transfer/UGATIT_100w/README.md new file mode 100644 index 00000000..d2f07a18 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/UGATIT_100w/README.md @@ -0,0 +1,122 @@ +## 模型概述 +UGATIT 图像风格转换模型 + +模型可将输入的人脸图像转换成动漫风格 + +模型权重来自UGATIT-Paddle开源项目 + +模型所使用的权重为genA2B_1000000 + +模型详情请参考[UGATIT-Paddle开源项目](https://github.com/miraiwk/UGATIT-paddle) + +## 模型安装 + +```shell +$hub install UGATIT_100w +``` + + +## API 说明 + +```python +def style_transfer( + self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False +) +``` + +风格转换API,将输入的人脸图像转换成动漫风格。 + +转换效果图如下: + +![输入图像](https://ai-studio-static-online.cdn.bcebos.com/d130fabd8bd34e53b2f942b3766eb6bbd3c19c0676d04abfbd5cc4b83b66f8b6) +![输出图像](https://ai-studio-static-online.cdn.bcebos.com/8538af03b3f14b1884fcf4eec48965baf939e35a783d40129085102057438c77) + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],默认为 None; +* paths (list\[str\]): 图片的路径,默认为 None; +* batch\_size (int): batch 的大小,默认设为 1; +* visualization (bool): 是否将识别结果保存为图片文件,默认设为 False; +* output\_dir (str): 图片的保存路径,默认设为 output。 + + +**返回** + +* res (list\[numpy.ndarray\]): 输出图像数据,ndarray.shape 为 \[H, W, C\]。 + + +## 预测代码示例 + +```python +import cv2 +import paddlehub as hub + +# 模型加载 +# use_gpu:是否使用GPU进行预测 +model = hub.Module(name='UGATIT_100w', use_gpu=False) + +# 模型预测 +result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + +# or +# result = model.style_transfer(paths=['/PATH/TO/IMAGE']) +``` + +## 服务部署 + +PaddleHub Serving可以部署一个在线图像风格转换服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: +```shell +$ hub serving start -m UGATIT_100w +``` + +这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import cv2 +import base64 + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +# 发送HTTP请求 +data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/UGATIT_100w" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +# 打印预测结果 +print(r.json()["results"]) +``` + + +## 模型相关信息 + +### 模型代码 + +https://github.com/miraiwk/UGATIT-paddle + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.8.0 diff --git a/modules/image/Image_gan/style_transfer/UGATIT_100w/model.py b/modules/image/Image_gan/style_transfer/UGATIT_100w/model.py new file mode 100644 index 00000000..4c691d93 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/UGATIT_100w/model.py @@ -0,0 +1,68 @@ +import os +import numpy as np + +from paddle.fluid.core import AnalysisConfig, create_paddle_predictor + +__all__ = ['Model'] + + +class Model(): + # 初始化函数 + def __init__(self, modelpath, use_gpu): + # 加载模型预测器 + self.predictor = self.load_model(modelpath, use_gpu) + + # 获取模型的输入输出 + self.input_names = self.predictor.get_input_names() + self.output_names = self.predictor.get_output_names() + self.input_tensor = self.predictor.get_input_tensor(self.input_names[0]) + self.output_tensor = self.predictor.get_output_tensor(self.output_names[0]) + + # 模型加载函数 + def load_model(self, modelpath, use_gpu): + # 对运行位置进行配置 + if use_gpu: + try: + places = os.environ["CUDA_VISIBLE_DEVICES"] + places = int(places[0]) + except Exception as e: + print('Error: %s. Please set the environment variables "CUDA_VISIBLE_DEVICES".' % e) + use_gpu = False + + # 加载模型参数 + config = AnalysisConfig(modelpath) + + # 设置参数 + if use_gpu: + config.enable_use_gpu(100, places) + else: + config.disable_gpu() + config.disable_glog_info() + config.switch_ir_optim(True) + config.enable_memory_optim() + config.switch_use_feed_fetch_ops(False) + config.switch_specify_input_names(True) + + # 通过参数加载模型预测器 + predictor = create_paddle_predictor(config) + + # 返回预测器 + return predictor + + # 模型预测函数 + def predict(self, input_datas): + outputs = [] + + # 遍历输入数据进行预测 + for input_data in input_datas: + inputs = input_data.copy() + self.input_tensor.copy_from_cpu(inputs) + self.predictor.zero_copy_run() + output = self.output_tensor.copy_to_cpu() + outputs.append(output) + + # 预测结果合并 + outputs = np.concatenate(outputs, 0) + + # 返回预测结果 + return outputs diff --git a/modules/image/Image_gan/style_transfer/UGATIT_100w/module.py b/modules/image/Image_gan/style_transfer/UGATIT_100w/module.py new file mode 100644 index 00000000..f916c3f9 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/UGATIT_100w/module.py @@ -0,0 +1,57 @@ +import os + +from paddlehub import Module +from paddlehub.module.module import moduleinfo, serving + +from UGATIT_100w.model import Model +from UGATIT_100w.processor import base64_to_cv2, cv2_to_base64, Processor + + +@moduleinfo( + name="UGATIT_100w", # 模型名称 + type="CV/style_transfer", # 模型类型 + author="jm12138", # 作者名称 + author_email="jm12138@qq.com", # 作者邮箱 + summary="UGATIT_100w", # 模型介绍 + version="1.0.0" # 版本号 +) +class UGATIT_100w(Module): + # 初始化函数 + def _initialize(self, use_gpu=False): + # 设置模型路径 + self.model_path = os.path.join(self.directory, "UGATIT_100w") + + # 加载模型 + self.model = Model(self.model_path, use_gpu) + + # 关键点检测函数 + def style_transfer(self, images=None, paths=None, batch_size=1, output_dir='output', visualization=False): + # 加载数据处理器 + processor = Processor(images, paths, output_dir, batch_size) + + # 模型预测 + outputs = self.model.predict(processor.input_datas) + + # 结果后处理 + results = processor.postprocess(outputs, visualization) + + # 返回结果 + return results + + # Hub Serving + @serving + def serving_method(self, images, **kwargs): + # 获取输入数据 + images_decode = [base64_to_cv2(image) for image in images] + + # 图片风格转换 + results = self.style_transfer(images_decode, **kwargs) + + # 对输出图片进行编码 + encodes = [] + for result in results: + encode = cv2_to_base64(result) + encodes.append(encode) + + # 返回结果 + return encodes diff --git a/modules/image/Image_gan/style_transfer/UGATIT_100w/processor.py b/modules/image/Image_gan/style_transfer/UGATIT_100w/processor.py new file mode 100644 index 00000000..e3e879c6 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/UGATIT_100w/processor.py @@ -0,0 +1,119 @@ +import os +import cv2 +import time +import base64 +import numpy as np + +__all__ = ['base64_to_cv2', 'cv2_to_base64', 'Processor'] + + +def check_dir(dir_path): + # 目录检查函数 + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def base64_to_cv2(b64str): + # base64转cv2函数 + data = base64.b64decode(b64str.encode('utf8')) + data = np.frombuffer(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def cv2_to_base64(image): + # cv2转base64函数 + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +class Processor(): + # 初始化函数 + def __init__(self, images=None, paths=None, output_dir='output', batch_size=1): + # 变量设置 + self.images = images + self.paths = paths + self.output_dir = output_dir + self.batch_size = batch_size + + # 获取原始输入数据 + self.datas = self.load_datas() + + # 对原始输入数据进行预处理 + self.input_datas = self.preprocess() + + # 读取数据函数 + def load_datas(self): + datas = [] + + # 读取数据列表 + if self.paths is not None: + for im_path in self.paths: + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path) + datas.append(im) + + if self.images is not None: + datas = self.images + + # 返回数据列表 + return datas + + # 数据预处理函数 + def preprocess(self): + input_datas = [] + + # 数据预处理 + for i, img in enumerate(self.datas): + # 图像缩放 + img = cv2.resize(img, (256, 256)) + + # 归一化 + img = (img.astype('float32') / 255.0 - 0.5) / 0.5 + + # 转置 + img = img.transpose((2, 0, 1)) + + # 增加维度 + img = np.expand_dims(img, axis=0) + + # 加入输入数据列表 + input_datas.append(img) + + # 数据按batch_size切分 + input_datas = np.concatenate(input_datas, 0) + split_num = len(self.datas) // self.batch_size + 1 if len(self.datas) % self.batch_size != 0 else len( + self.datas) // self.batch_size + input_datas = np.array_split(input_datas, split_num) + + # 返回预处理完成的数据 + return input_datas + + def postprocess(self, outputs, visualization): + results = [] + + for im_id, output in enumerate(outputs): + # 图像后处理 + img = (output * 0.5 + 0.5) * 255. + + # 限幅 + img = np.clip(img, 0, 255).astype(np.uint8) + + # 转置 + img = img.transpose((1, 2, 0)) + + # 可视化 + if visualization: + # 检查输出目录 + check_dir(self.output_dir) + + # 写入输出图片 + cv2.imwrite(os.path.join(self.output_dir, '%d_%d.jpg' % (im_id, time.time())), img) + + results.append(img) + + # 返回结果 + return results diff --git a/modules/image/Image_gan/style_transfer/UGATIT_83w/README.md b/modules/image/Image_gan/style_transfer/UGATIT_83w/README.md new file mode 100644 index 00000000..493b8eaf --- /dev/null +++ b/modules/image/Image_gan/style_transfer/UGATIT_83w/README.md @@ -0,0 +1,122 @@ +## 模型概述 +UGATIT 图像风格转换模型 + +模型可将输入的人脸图像转换成动漫风格 + +模型权重来自UGATIT-Paddle开源项目 + +模型所使用的权重为genA2B_0835000 + +模型详情请参考[UGATIT-Paddle开源项目](https://github.com/miraiwk/UGATIT-paddle) + +## 模型安装 + +```shell +$hub install UGATIT_83w +``` + + +## API 说明 + +```python +def style_transfer( + self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False +) +``` + +风格转换API,将输入的人脸图像转换成动漫风格。 + +转换效果图如下: + +![输入图像](https://ai-studio-static-online.cdn.bcebos.com/d130fabd8bd34e53b2f942b3766eb6bbd3c19c0676d04abfbd5cc4b83b66f8b6) +![输出图像](https://ai-studio-static-online.cdn.bcebos.com/78653331ee2d472b81ff5bbccd6a904a80d2c5208f9c42c789b4f09a1ef46332) + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],默认为 None; +* paths (list\[str\]): 图片的路径,默认为 None; +* batch\_size (int): batch 的大小,默认设为 1; +* visualization (bool): 是否将识别结果保存为图片文件,默认设为 False; +* output\_dir (str): 图片的保存路径,默认设为 output。 + + +**返回** + +* res (list\[numpy.ndarray\]): 输出图像数据,ndarray.shape 为 \[H, W, C\]。 + + +## 预测代码示例 + +```python +import cv2 +import paddlehub as hub + +# 模型加载 +# use_gpu:是否使用GPU进行预测 +model = hub.Module('UGATIT_83w', use_gpu=False) + +# 模型预测 +result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + +# or +# result = model.style_transfer(paths=['/PATH/TO/IMAGE']) +``` + +## 服务部署 + +PaddleHub Serving可以部署一个在线图像风格转换服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: +```shell +$ hub serving start -m UGATIT_w83 +``` + +这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import cv2 +import base64 + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +# 发送HTTP请求 +data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/UGATIT_w83" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +# 打印预测结果 +print(r.json()["results"]) +``` + + +## 模型相关信息 + +### 模型代码 + +https://github.com/miraiwk/UGATIT-paddle + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.8.0 diff --git a/modules/image/Image_gan/style_transfer/UGATIT_83w/model.py b/modules/image/Image_gan/style_transfer/UGATIT_83w/model.py new file mode 100644 index 00000000..4c691d93 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/UGATIT_83w/model.py @@ -0,0 +1,68 @@ +import os +import numpy as np + +from paddle.fluid.core import AnalysisConfig, create_paddle_predictor + +__all__ = ['Model'] + + +class Model(): + # 初始化函数 + def __init__(self, modelpath, use_gpu): + # 加载模型预测器 + self.predictor = self.load_model(modelpath, use_gpu) + + # 获取模型的输入输出 + self.input_names = self.predictor.get_input_names() + self.output_names = self.predictor.get_output_names() + self.input_tensor = self.predictor.get_input_tensor(self.input_names[0]) + self.output_tensor = self.predictor.get_output_tensor(self.output_names[0]) + + # 模型加载函数 + def load_model(self, modelpath, use_gpu): + # 对运行位置进行配置 + if use_gpu: + try: + places = os.environ["CUDA_VISIBLE_DEVICES"] + places = int(places[0]) + except Exception as e: + print('Error: %s. Please set the environment variables "CUDA_VISIBLE_DEVICES".' % e) + use_gpu = False + + # 加载模型参数 + config = AnalysisConfig(modelpath) + + # 设置参数 + if use_gpu: + config.enable_use_gpu(100, places) + else: + config.disable_gpu() + config.disable_glog_info() + config.switch_ir_optim(True) + config.enable_memory_optim() + config.switch_use_feed_fetch_ops(False) + config.switch_specify_input_names(True) + + # 通过参数加载模型预测器 + predictor = create_paddle_predictor(config) + + # 返回预测器 + return predictor + + # 模型预测函数 + def predict(self, input_datas): + outputs = [] + + # 遍历输入数据进行预测 + for input_data in input_datas: + inputs = input_data.copy() + self.input_tensor.copy_from_cpu(inputs) + self.predictor.zero_copy_run() + output = self.output_tensor.copy_to_cpu() + outputs.append(output) + + # 预测结果合并 + outputs = np.concatenate(outputs, 0) + + # 返回预测结果 + return outputs diff --git a/modules/image/Image_gan/style_transfer/UGATIT_83w/module.py b/modules/image/Image_gan/style_transfer/UGATIT_83w/module.py new file mode 100644 index 00000000..50fbf560 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/UGATIT_83w/module.py @@ -0,0 +1,57 @@ +import os + +from paddlehub import Module +from paddlehub.module.module import moduleinfo, serving + +from UGATIT_83w.model import Model +from UGATIT_83w.processor import base64_to_cv2, cv2_to_base64, Processor + + +@moduleinfo( + name="UGATIT_83w", # 模型名称 + type="CV/style_transfer", # 模型类型 + author="jm12138", # 作者名称 + author_email="jm12138@qq.com", # 作者邮箱 + summary="UGATIT", # 模型介绍 + version="1.0.0" # 版本号 +) +class UGATIT_83w(Module): + # 初始化函数 + def _initialize(self, use_gpu=False): + # 设置模型路径 + self.model_path = os.path.join(self.directory, "UGATIT_83w") + + # 加载模型 + self.model = Model(self.model_path, use_gpu) + + # 关键点检测函数 + def style_transfer(self, images=None, paths=None, batch_size=1, output_dir='output', visualization=False): + # 加载数据处理器 + processor = Processor(images, paths, output_dir, batch_size) + + # 模型预测 + outputs = self.model.predict(processor.input_datas) + + # 结果后处理 + results = processor.postprocess(outputs, visualization) + + # 返回结果 + return results + + # Hub Serving + @serving + def serving_method(self, images, **kwargs): + # 获取输入数据 + images_decode = [base64_to_cv2(image) for image in images] + + # 图片风格转换 + results = self.style_transfer(images_decode, **kwargs) + + # 对输出图片进行编码 + encodes = [] + for result in results: + encode = cv2_to_base64(result) + encodes.append(encode) + + # 返回结果 + return encodes diff --git a/modules/image/Image_gan/style_transfer/UGATIT_83w/processor.py b/modules/image/Image_gan/style_transfer/UGATIT_83w/processor.py new file mode 100644 index 00000000..e3e879c6 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/UGATIT_83w/processor.py @@ -0,0 +1,119 @@ +import os +import cv2 +import time +import base64 +import numpy as np + +__all__ = ['base64_to_cv2', 'cv2_to_base64', 'Processor'] + + +def check_dir(dir_path): + # 目录检查函数 + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def base64_to_cv2(b64str): + # base64转cv2函数 + data = base64.b64decode(b64str.encode('utf8')) + data = np.frombuffer(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def cv2_to_base64(image): + # cv2转base64函数 + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +class Processor(): + # 初始化函数 + def __init__(self, images=None, paths=None, output_dir='output', batch_size=1): + # 变量设置 + self.images = images + self.paths = paths + self.output_dir = output_dir + self.batch_size = batch_size + + # 获取原始输入数据 + self.datas = self.load_datas() + + # 对原始输入数据进行预处理 + self.input_datas = self.preprocess() + + # 读取数据函数 + def load_datas(self): + datas = [] + + # 读取数据列表 + if self.paths is not None: + for im_path in self.paths: + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path) + datas.append(im) + + if self.images is not None: + datas = self.images + + # 返回数据列表 + return datas + + # 数据预处理函数 + def preprocess(self): + input_datas = [] + + # 数据预处理 + for i, img in enumerate(self.datas): + # 图像缩放 + img = cv2.resize(img, (256, 256)) + + # 归一化 + img = (img.astype('float32') / 255.0 - 0.5) / 0.5 + + # 转置 + img = img.transpose((2, 0, 1)) + + # 增加维度 + img = np.expand_dims(img, axis=0) + + # 加入输入数据列表 + input_datas.append(img) + + # 数据按batch_size切分 + input_datas = np.concatenate(input_datas, 0) + split_num = len(self.datas) // self.batch_size + 1 if len(self.datas) % self.batch_size != 0 else len( + self.datas) // self.batch_size + input_datas = np.array_split(input_datas, split_num) + + # 返回预处理完成的数据 + return input_datas + + def postprocess(self, outputs, visualization): + results = [] + + for im_id, output in enumerate(outputs): + # 图像后处理 + img = (output * 0.5 + 0.5) * 255. + + # 限幅 + img = np.clip(img, 0, 255).astype(np.uint8) + + # 转置 + img = img.transpose((1, 2, 0)) + + # 可视化 + if visualization: + # 检查输出目录 + check_dir(self.output_dir) + + # 写入输出图片 + cv2.imwrite(os.path.join(self.output_dir, '%d_%d.jpg' % (im_id, time.time())), img) + + results.append(img) + + # 返回结果 + return results diff --git a/modules/image/Image_gan/style_transfer/UGATIT_92w/README.md b/modules/image/Image_gan/style_transfer/UGATIT_92w/README.md new file mode 100644 index 00000000..084188af --- /dev/null +++ b/modules/image/Image_gan/style_transfer/UGATIT_92w/README.md @@ -0,0 +1,122 @@ +## 模型概述 +UGATIT 图像风格转换模型 + +模型可将输入的人脸图像转换成动漫风格 + +模型权重来自UGATIT-Paddle开源项目 + +模型所使用的权重为genA2B_0924000 + +模型详情请参考[UGATIT-Paddle开源项目](https://github.com/miraiwk/UGATIT-paddle) + +## 模型安装 + +```shell +$hub install UGATIT_92w +``` + + +## API 说明 + +```python +def style_transfer( + self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False +) +``` + +风格转换API,将输入的人脸图像转换成动漫风格。 + +转换效果图如下: + +![输入图像](https://ai-studio-static-online.cdn.bcebos.com/d130fabd8bd34e53b2f942b3766eb6bbd3c19c0676d04abfbd5cc4b83b66f8b6) +![输出图像](https://ai-studio-static-online.cdn.bcebos.com/b7305162ff6345e9b04507a196ebe854907b446936934844be8aae4b0297db18) + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],默认为 None; +* paths (list\[str\]): 图片的路径,默认为 None; +* batch\_size (int): batch 的大小,默认设为 1; +* visualization (bool): 是否将识别结果保存为图片文件,默认设为 False; +* output\_dir (str): 图片的保存路径,默认设为 output。 + + +**返回** + +* res (list\[numpy.ndarray\]): 输出图像数据,ndarray.shape 为 \[H, W, C\]。 + + +## 预测代码示例 + +```python +import cv2 +import paddlehub as hub + +# 模型加载 +# use_gpu:是否使用GPU进行预测 +model = hub.Module(name='UGATIT_92w', use_gpu=False) + +# 模型预测 +result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + +# or +# result = model.style_transfer(paths=['/PATH/TO/IMAGE']) +``` + +## 服务部署 + +PaddleHub Serving可以部署一个在线图像风格转换服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: +```shell +$ hub serving start -m UGATIT_92w +``` + +这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import cv2 +import base64 + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +# 发送HTTP请求 +data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/UGATIT_92w" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +# 打印预测结果 +print(r.json()["results"]) +``` + + +## 模型相关信息 + +### 模型代码 + +https://github.com/miraiwk/UGATIT-paddle + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.8.0 diff --git a/modules/image/Image_gan/style_transfer/UGATIT_92w/model.py b/modules/image/Image_gan/style_transfer/UGATIT_92w/model.py new file mode 100644 index 00000000..4c691d93 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/UGATIT_92w/model.py @@ -0,0 +1,68 @@ +import os +import numpy as np + +from paddle.fluid.core import AnalysisConfig, create_paddle_predictor + +__all__ = ['Model'] + + +class Model(): + # 初始化函数 + def __init__(self, modelpath, use_gpu): + # 加载模型预测器 + self.predictor = self.load_model(modelpath, use_gpu) + + # 获取模型的输入输出 + self.input_names = self.predictor.get_input_names() + self.output_names = self.predictor.get_output_names() + self.input_tensor = self.predictor.get_input_tensor(self.input_names[0]) + self.output_tensor = self.predictor.get_output_tensor(self.output_names[0]) + + # 模型加载函数 + def load_model(self, modelpath, use_gpu): + # 对运行位置进行配置 + if use_gpu: + try: + places = os.environ["CUDA_VISIBLE_DEVICES"] + places = int(places[0]) + except Exception as e: + print('Error: %s. Please set the environment variables "CUDA_VISIBLE_DEVICES".' % e) + use_gpu = False + + # 加载模型参数 + config = AnalysisConfig(modelpath) + + # 设置参数 + if use_gpu: + config.enable_use_gpu(100, places) + else: + config.disable_gpu() + config.disable_glog_info() + config.switch_ir_optim(True) + config.enable_memory_optim() + config.switch_use_feed_fetch_ops(False) + config.switch_specify_input_names(True) + + # 通过参数加载模型预测器 + predictor = create_paddle_predictor(config) + + # 返回预测器 + return predictor + + # 模型预测函数 + def predict(self, input_datas): + outputs = [] + + # 遍历输入数据进行预测 + for input_data in input_datas: + inputs = input_data.copy() + self.input_tensor.copy_from_cpu(inputs) + self.predictor.zero_copy_run() + output = self.output_tensor.copy_to_cpu() + outputs.append(output) + + # 预测结果合并 + outputs = np.concatenate(outputs, 0) + + # 返回预测结果 + return outputs diff --git a/modules/image/Image_gan/style_transfer/UGATIT_92w/module.py b/modules/image/Image_gan/style_transfer/UGATIT_92w/module.py new file mode 100644 index 00000000..8271307c --- /dev/null +++ b/modules/image/Image_gan/style_transfer/UGATIT_92w/module.py @@ -0,0 +1,57 @@ +import os + +from paddlehub import Module +from paddlehub.module.module import moduleinfo, serving + +from UGATIT_92w.model import Model +from UGATIT_92w.processor import base64_to_cv2, cv2_to_base64, Processor + + +@moduleinfo( + name="UGATIT_92w", # 模型名称 + type="CV/style_transfer", # 模型类型 + author="jm12138", # 作者名称 + author_email="jm12138@qq.com", # 作者邮箱 + summary="UGATIT_92w", # 模型介绍 + version="1.0.0" # 版本号 +) +class UGATIT_92w(Module): + # 初始化函数 + def _initialize(self, use_gpu=False): + # 设置模型路径 + self.model_path = os.path.join(self.directory, "UGATIT_92w") + + # 加载模型 + self.model = Model(self.model_path, use_gpu) + + # 关键点检测函数 + def style_transfer(self, images=None, paths=None, batch_size=1, output_dir='output', visualization=False): + # 加载数据处理器 + processor = Processor(images, paths, output_dir, batch_size) + + # 模型预测 + outputs = self.model.predict(processor.input_datas) + + # 结果后处理 + results = processor.postprocess(outputs, visualization) + + # 返回结果 + return results + + # Hub Serving + @serving + def serving_method(self, images, **kwargs): + # 获取输入数据 + images_decode = [base64_to_cv2(image) for image in images] + + # 图片风格转换 + results = self.style_transfer(images_decode, **kwargs) + + # 对输出图片进行编码 + encodes = [] + for result in results: + encode = cv2_to_base64(result) + encodes.append(encode) + + # 返回结果 + return encodes diff --git a/modules/image/Image_gan/style_transfer/UGATIT_92w/processor.py b/modules/image/Image_gan/style_transfer/UGATIT_92w/processor.py new file mode 100644 index 00000000..e3e879c6 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/UGATIT_92w/processor.py @@ -0,0 +1,119 @@ +import os +import cv2 +import time +import base64 +import numpy as np + +__all__ = ['base64_to_cv2', 'cv2_to_base64', 'Processor'] + + +def check_dir(dir_path): + # 目录检查函数 + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def base64_to_cv2(b64str): + # base64转cv2函数 + data = base64.b64decode(b64str.encode('utf8')) + data = np.frombuffer(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def cv2_to_base64(image): + # cv2转base64函数 + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +class Processor(): + # 初始化函数 + def __init__(self, images=None, paths=None, output_dir='output', batch_size=1): + # 变量设置 + self.images = images + self.paths = paths + self.output_dir = output_dir + self.batch_size = batch_size + + # 获取原始输入数据 + self.datas = self.load_datas() + + # 对原始输入数据进行预处理 + self.input_datas = self.preprocess() + + # 读取数据函数 + def load_datas(self): + datas = [] + + # 读取数据列表 + if self.paths is not None: + for im_path in self.paths: + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path) + datas.append(im) + + if self.images is not None: + datas = self.images + + # 返回数据列表 + return datas + + # 数据预处理函数 + def preprocess(self): + input_datas = [] + + # 数据预处理 + for i, img in enumerate(self.datas): + # 图像缩放 + img = cv2.resize(img, (256, 256)) + + # 归一化 + img = (img.astype('float32') / 255.0 - 0.5) / 0.5 + + # 转置 + img = img.transpose((2, 0, 1)) + + # 增加维度 + img = np.expand_dims(img, axis=0) + + # 加入输入数据列表 + input_datas.append(img) + + # 数据按batch_size切分 + input_datas = np.concatenate(input_datas, 0) + split_num = len(self.datas) // self.batch_size + 1 if len(self.datas) % self.batch_size != 0 else len( + self.datas) // self.batch_size + input_datas = np.array_split(input_datas, split_num) + + # 返回预处理完成的数据 + return input_datas + + def postprocess(self, outputs, visualization): + results = [] + + for im_id, output in enumerate(outputs): + # 图像后处理 + img = (output * 0.5 + 0.5) * 255. + + # 限幅 + img = np.clip(img, 0, 255).astype(np.uint8) + + # 转置 + img = img.transpose((1, 2, 0)) + + # 可视化 + if visualization: + # 检查输出目录 + check_dir(self.output_dir) + + # 写入输出图片 + cv2.imwrite(os.path.join(self.output_dir, '%d_%d.jpg' % (im_id, time.time())), img) + + results.append(img) + + # 返回结果 + return results diff --git a/modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/README.md b/modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/README.md new file mode 100644 index 00000000..ac03c3bc --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/README.md @@ -0,0 +1,127 @@ +## 模型概述 +AnimeGAN V1 图像风格转换模型 + +模型可将输入的图像转换成Hayao风格 + +模型权重转换自AnimeGAN V1官方开源项目 + +模型所使用的权重为Hayao-60.ckpt + +模型详情请参考[AnimeGAN V1 开源项目](https://github.com/TachibanaYoshino/AnimeGAN) + +## 模型安装 + +```shell +$hub install animegan_v1_hayao_60 +``` + + +## API 说明 + +```python +def style_transfer( + self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024 +) +``` + +风格转换API,将输入的图片转换为漫画风格。 + +转换效果图如下: + +![输入图像](https://ai-studio-static-online.cdn.bcebos.com/bd002c4bb6a7427daf26988770bb18648b7d8d2bfd6746bfb9a429db4867727f) +![输出图像](https://ai-studio-static-online.cdn.bcebos.com/10175bb964e94ce18608a84b0ab6ebfe154b523df42f44a3a851b2d91dd17a63) + + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],默认为 None; +* paths (list\[str\]): 图片的路径,默认为 None; +* batch\_size (int): batch 的大小,默认设为 1; +* visualization (bool): 是否将识别结果保存为图片文件,默认设为 False; +* output\_dir (str): 图片的保存路径,默认设为 output; +* min\_size (int): 输入图片的短边最小尺寸,默认设为 32; +* max\_size (int): 输入图片的短边最大尺寸,默认设为 1024。 + + +**返回** + +* res (list\[numpy.ndarray\]): 输出图像数据,ndarray.shape 为 \[H, W, C\]。 + + +## 预测代码示例 + +```python +import cv2 +import paddlehub as hub + +# 模型加载 +# use_gpu:是否使用GPU进行预测 +model = hub.Module(name='animegan_v1_hayao_60', use_gpu=False) + +# 模型预测 +result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + +# or +# result = model.style_transfer(paths=['/PATH/TO/IMAGE']) +``` + +## 服务部署 + +PaddleHub Serving可以部署一个在线图像风格转换服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: +```shell +$ hub serving start -m animegan_v1_hayao_60 +``` + +这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import cv2 +import base64 + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +# 发送HTTP请求 +data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/animegan_v1_hayao_60" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +# 打印预测结果 +print(r.json()["results"]) +``` + + +## 模型相关信息 + +### 模型代码 + +https://github.com/TachibanaYoshino/AnimeGAN + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.8.0 diff --git a/modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/model.py b/modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/model.py new file mode 100644 index 00000000..29d4f83b --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/model.py @@ -0,0 +1,68 @@ +import os +import numpy as np + +from paddle.fluid.core import AnalysisConfig, create_paddle_predictor + +__all__ = ['Model'] + + +class Model(): + # 初始化函数 + def __init__(self, modelpath, use_gpu): + # 加载模型预测器 + self.predictor = self.load_model(modelpath, use_gpu) + + # 获取模型的输入输出 + self.input_names = self.predictor.get_input_names() + self.output_names = self.predictor.get_output_names() + self.input_tensor = self.predictor.get_input_tensor(self.input_names[0]) + self.output_tensor = self.predictor.get_output_tensor(self.output_names[0]) + + # 模型加载函数 + def load_model(self, modelpath, use_gpu): + # 对运行位置进行配置 + if use_gpu: + try: + places = os.environ["CUDA_VISIBLE_DEVICES"] + places = int(places[0]) + except Exception as e: + print('Error: %s. Please set the environment variables "CUDA_VISIBLE_DEVICES".' % e) + use_gpu = False + + # 加载模型参数 + config = AnalysisConfig(modelpath) + + # 设置参数 + if use_gpu: + config.enable_use_gpu(100, places) + else: + config.disable_gpu() + config.enable_mkldnn() + config.disable_glog_info() + config.switch_ir_optim(True) + config.enable_memory_optim() + config.switch_use_feed_fetch_ops(False) + config.switch_specify_input_names(True) + + # 通过参数加载模型预测器 + predictor = create_paddle_predictor(config) + + # 返回预测器 + return predictor + + # 模型预测函数 + def predict(self, input_datas): + outputs = [] + + # 遍历输入数据进行预测 + for input_data in input_datas: + self.input_tensor.copy_from_cpu(input_data) + self.predictor.zero_copy_run() + output = self.output_tensor.copy_to_cpu() + outputs.append(output) + + # 预测结果合并 + outputs = np.concatenate(outputs, 0) + + # 返回预测结果 + return outputs diff --git a/modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/module.py b/modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/module.py new file mode 100644 index 00000000..39ac4d5d --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/module.py @@ -0,0 +1,64 @@ +import os + +from paddlehub import Module +from paddlehub.module.module import moduleinfo, serving + +from animegan_v1_hayao_60.model import Model +from animegan_v1_hayao_60.processor import base64_to_cv2, cv2_to_base64, Processor + + +@moduleinfo( + name="animegan_v1_hayao_60", # 模型名称 + type="CV/style_transfer", # 模型类型 + author="jm12138", # 作者名称 + author_email="jm12138@qq.com", # 作者邮箱 + summary="animegan_v1_hayao_60", # 模型介绍 + version="1.0.0" # 版本号 +) +class Animegan_V1_Hayao_60(Module): + # 初始化函数 + def _initialize(self, use_gpu=False): + # 设置模型路径 + self.model_path = os.path.join(self.directory, "animegan_v1_hayao_60") + + # 加载模型 + self.model = Model(self.model_path, use_gpu) + + # 关键点检测函数 + def style_transfer(self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024): + # 加载数据处理器 + processor = Processor(images, paths, batch_size, output_dir, min_size, max_size) + + # 模型预测 + outputs = self.model.predict(processor.input_datas) + + # 结果后处理 + results = processor.postprocess(outputs, visualization) + + # 返回结果 + return results + + # Hub Serving + @serving + def serving_method(self, images, **kwargs): + # 获取输入数据 + images_decode = [base64_to_cv2(image) for image in images] + + # 图片风格转换 + results = self.style_transfer(images_decode, **kwargs) + + # 对输出图片进行编码 + encodes = [] + for result in results: + encode = cv2_to_base64(result) + encodes.append(encode) + + # 返回结果 + return encodes diff --git a/modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/processor.py b/modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/processor.py new file mode 100644 index 00000000..b0e39154 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/processor.py @@ -0,0 +1,132 @@ +import os +import cv2 +import time +import base64 +import numpy as np + +__all__ = ['base64_to_cv2', 'cv2_to_base64', 'Processor'] + + +def check_dir(dir_path): + # 目录检查函数 + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def base64_to_cv2(b64str): + # base64转cv2函数 + data = base64.b64decode(b64str.encode('utf8')) + data = np.frombuffer(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def cv2_to_base64(image): + # cv2转base64函数 + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +class Processor(): + # 初始化函数 + def __init__(self, images=None, paths=None, batch_size=1, output_dir='output', min_size=32, max_size=1024): + # 变量设置 + self.min_size = min_size + self.max_size = max_size + + self.images = images + self.paths = paths + self.batch_size = batch_size + self.output_dir = output_dir + + # 获取原始输入数据 + self.datas = self.load_datas() + + # 对原始输入数据进行预处理 + self.input_datas = self.preprocess() + + # 读取数据函数 + def load_datas(self): + datas = [] + + # 读取数据列表 + if self.paths is not None: + for im_path in self.paths: + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path) + datas.append(im) + + if self.images is not None: + datas = self.images + + # 返回数据列表 + return datas + + # 数据预处理函数 + def preprocess(self): + input_datas = [] + + # 数据预处理 + for i, img in enumerate(self.datas): + # 格式转换 + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + # 缩放图片 + h, w = img.shape[:2] + if max(h, w) > self.max_size: + img = cv2.resize(img, (self.max_size, int(h / w * self.max_size))) if h < w else cv2.resize( + img, (int(w / h * self.max_size), self.max_size)) + elif min(h, w) < self.min_size: + img = cv2.resize(img, (self.min_size, int(h / w * self.min_size))) if h > w else cv2.resize( + img, (int(w / h * self.min_size), self.min_size)) + + # 裁剪图片 + h, w = img.shape[:2] + img = img[:h - (h % 32), :w - (w % 32), :] + + # 归一化 + img = img / 127.5 - 1.0 + + # 新建维度 + img = np.expand_dims(img, axis=0).astype('float32') + + # 加入输入数据列表 + input_datas.append(img) + + # 数据按batch_size切分 + input_datas = np.concatenate(input_datas, 0) + split_num = len(self.datas) // self.batch_size + 1 if len(self.datas) % self.batch_size != 0 else len( + self.datas) // self.batch_size + input_datas = np.array_split(input_datas, split_num) + + # 返回预处理完成的数据 + return input_datas + + def postprocess(self, outputs, visualization): + results = [] + + for im_id, output in enumerate(outputs): + # 反归一化 + image = (output.squeeze() + 1.) / 2 * 255 + + # 限幅 + image = np.clip(image, 0, 255).astype(np.uint8) + + # 格式转换 + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + # 可视化 + if visualization: + # 检查输出目录 + check_dir(self.output_dir) + + # 写入输出图片 + cv2.imwrite(os.path.join(self.output_dir, '%d_%d.jpg' % (im_id, time.time())), image) + + results.append(image) + + # 返回结果 + return results diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/README.md b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/README.md new file mode 100644 index 00000000..1aa4c086 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/README.md @@ -0,0 +1,127 @@ +## 模型概述 +AnimeGAN V2 图像风格转换模型 + +模型可将输入的图像转换成Hayao风格 + +模型权重转换自AnimeGAN V2官方开源项目 + +模型所使用的权重为Hayao-64.ckpt + +模型详情请参考[AnimeGAN V2 开源项目](https://github.com/TachibanaYoshino/AnimeGANv2) + +## 模型安装 + +```shell +$hub install animegan_v2_hayao_64 +``` + + +## API 说明 + +```python +def style_transfer( + self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024 +) +``` + +风格转换API,将输入的图片转换为漫画风格。 + +转换效果图如下: + +![输入图像](https://ai-studio-static-online.cdn.bcebos.com/bd002c4bb6a7427daf26988770bb18648b7d8d2bfd6746bfb9a429db4867727f) +![输出图像](https://ai-studio-static-online.cdn.bcebos.com/49620341f1fe4f00af4d93c22694897a1ae578a235844a1db1bbb4bd37bf750b) + + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],默认为 None; +* paths (list\[str\]): 图片的路径,默认为 None; +* batch\_size (int): batch 的大小,默认设为 1; +* visualization (bool): 是否将识别结果保存为图片文件,默认设为 False; +* output\_dir (str): 图片的保存路径,默认设为 output; +* min\_size (int): 输入图片的短边最小尺寸,默认设为 32; +* max\_size (int): 输入图片的短边最大尺寸,默认设为 1024。 + + +**返回** + +* res (list\[numpy.ndarray\]): 输出图像数据,ndarray.shape 为 \[H, W, C\]。 + + +## 预测代码示例 + +```python +import cv2 +import paddlehub as hub + +# 模型加载 +# use_gpu:是否使用GPU进行预测 +model = hub.Module(name='animegan_v2_hayao_64', use_gpu=False) + +# 模型预测 +result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + +# or +# result = model.style_transfer(paths=['/PATH/TO/IMAGE']) +``` + +## 服务部署 + +PaddleHub Serving可以部署一个在线图像风格转换服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: +```shell +$ hub serving start -m animegan_v2_hayao_64 +``` + +这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import cv2 +import base64 + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +# 发送HTTP请求 +data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/animegan_v2_hayao_64" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +# 打印预测结果 +print(r.json()["results"]) +``` + + +## 模型相关信息 + +### 模型代码 + +https://github.com/TachibanaYoshino/AnimeGANv2 + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.8.0 diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/model.py b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/model.py new file mode 100644 index 00000000..29d4f83b --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/model.py @@ -0,0 +1,68 @@ +import os +import numpy as np + +from paddle.fluid.core import AnalysisConfig, create_paddle_predictor + +__all__ = ['Model'] + + +class Model(): + # 初始化函数 + def __init__(self, modelpath, use_gpu): + # 加载模型预测器 + self.predictor = self.load_model(modelpath, use_gpu) + + # 获取模型的输入输出 + self.input_names = self.predictor.get_input_names() + self.output_names = self.predictor.get_output_names() + self.input_tensor = self.predictor.get_input_tensor(self.input_names[0]) + self.output_tensor = self.predictor.get_output_tensor(self.output_names[0]) + + # 模型加载函数 + def load_model(self, modelpath, use_gpu): + # 对运行位置进行配置 + if use_gpu: + try: + places = os.environ["CUDA_VISIBLE_DEVICES"] + places = int(places[0]) + except Exception as e: + print('Error: %s. Please set the environment variables "CUDA_VISIBLE_DEVICES".' % e) + use_gpu = False + + # 加载模型参数 + config = AnalysisConfig(modelpath) + + # 设置参数 + if use_gpu: + config.enable_use_gpu(100, places) + else: + config.disable_gpu() + config.enable_mkldnn() + config.disable_glog_info() + config.switch_ir_optim(True) + config.enable_memory_optim() + config.switch_use_feed_fetch_ops(False) + config.switch_specify_input_names(True) + + # 通过参数加载模型预测器 + predictor = create_paddle_predictor(config) + + # 返回预测器 + return predictor + + # 模型预测函数 + def predict(self, input_datas): + outputs = [] + + # 遍历输入数据进行预测 + for input_data in input_datas: + self.input_tensor.copy_from_cpu(input_data) + self.predictor.zero_copy_run() + output = self.output_tensor.copy_to_cpu() + outputs.append(output) + + # 预测结果合并 + outputs = np.concatenate(outputs, 0) + + # 返回预测结果 + return outputs diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/module.py b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/module.py new file mode 100644 index 00000000..142a9599 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/module.py @@ -0,0 +1,64 @@ +import os + +from paddlehub import Module +from paddlehub.module.module import moduleinfo, serving + +from animegan_v2_hayao_64.model import Model +from animegan_v2_hayao_64.processor import base64_to_cv2, cv2_to_base64, Processor + + +@moduleinfo( + name="animegan_v2_hayao_64", # 模型名称 + type="CV/style_transfer", # 模型类型 + author="jm12138", # 作者名称 + author_email="jm12138@qq.com", # 作者邮箱 + summary="animegan_v2_hayao_64", # 模型介绍 + version="1.0.0" # 版本号 +) +class Animegan_V2_Hayao_64(Module): + # 初始化函数 + def _initialize(self, use_gpu=False): + # 设置模型路径 + self.model_path = os.path.join(self.directory, "animegan_v2_hayao_64") + + # 加载模型 + self.model = Model(self.model_path, use_gpu) + + # 关键点检测函数 + def style_transfer(self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024): + # 加载数据处理器 + processor = Processor(images, paths, batch_size, output_dir, min_size, max_size) + + # 模型预测 + outputs = self.model.predict(processor.input_datas) + + # 结果后处理 + results = processor.postprocess(outputs, visualization) + + # 返回结果 + return results + + # Hub Serving + @serving + def serving_method(self, images, **kwargs): + # 获取输入数据 + images_decode = [base64_to_cv2(image) for image in images] + + # 图片风格转换 + results = self.style_transfer(images_decode, **kwargs) + + # 对输出图片进行编码 + encodes = [] + for result in results: + encode = cv2_to_base64(result) + encodes.append(encode) + + # 返回结果 + return encodes diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/processor.py b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/processor.py new file mode 100644 index 00000000..b0e39154 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/processor.py @@ -0,0 +1,132 @@ +import os +import cv2 +import time +import base64 +import numpy as np + +__all__ = ['base64_to_cv2', 'cv2_to_base64', 'Processor'] + + +def check_dir(dir_path): + # 目录检查函数 + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def base64_to_cv2(b64str): + # base64转cv2函数 + data = base64.b64decode(b64str.encode('utf8')) + data = np.frombuffer(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def cv2_to_base64(image): + # cv2转base64函数 + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +class Processor(): + # 初始化函数 + def __init__(self, images=None, paths=None, batch_size=1, output_dir='output', min_size=32, max_size=1024): + # 变量设置 + self.min_size = min_size + self.max_size = max_size + + self.images = images + self.paths = paths + self.batch_size = batch_size + self.output_dir = output_dir + + # 获取原始输入数据 + self.datas = self.load_datas() + + # 对原始输入数据进行预处理 + self.input_datas = self.preprocess() + + # 读取数据函数 + def load_datas(self): + datas = [] + + # 读取数据列表 + if self.paths is not None: + for im_path in self.paths: + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path) + datas.append(im) + + if self.images is not None: + datas = self.images + + # 返回数据列表 + return datas + + # 数据预处理函数 + def preprocess(self): + input_datas = [] + + # 数据预处理 + for i, img in enumerate(self.datas): + # 格式转换 + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + # 缩放图片 + h, w = img.shape[:2] + if max(h, w) > self.max_size: + img = cv2.resize(img, (self.max_size, int(h / w * self.max_size))) if h < w else cv2.resize( + img, (int(w / h * self.max_size), self.max_size)) + elif min(h, w) < self.min_size: + img = cv2.resize(img, (self.min_size, int(h / w * self.min_size))) if h > w else cv2.resize( + img, (int(w / h * self.min_size), self.min_size)) + + # 裁剪图片 + h, w = img.shape[:2] + img = img[:h - (h % 32), :w - (w % 32), :] + + # 归一化 + img = img / 127.5 - 1.0 + + # 新建维度 + img = np.expand_dims(img, axis=0).astype('float32') + + # 加入输入数据列表 + input_datas.append(img) + + # 数据按batch_size切分 + input_datas = np.concatenate(input_datas, 0) + split_num = len(self.datas) // self.batch_size + 1 if len(self.datas) % self.batch_size != 0 else len( + self.datas) // self.batch_size + input_datas = np.array_split(input_datas, split_num) + + # 返回预处理完成的数据 + return input_datas + + def postprocess(self, outputs, visualization): + results = [] + + for im_id, output in enumerate(outputs): + # 反归一化 + image = (output.squeeze() + 1.) / 2 * 255 + + # 限幅 + image = np.clip(image, 0, 255).astype(np.uint8) + + # 格式转换 + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + # 可视化 + if visualization: + # 检查输出目录 + check_dir(self.output_dir) + + # 写入输出图片 + cv2.imwrite(os.path.join(self.output_dir, '%d_%d.jpg' % (im_id, time.time())), image) + + results.append(image) + + # 返回结果 + return results diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/README.md b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/README.md new file mode 100644 index 00000000..be08aeee --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/README.md @@ -0,0 +1,127 @@ +## 模型概述 +AnimeGAN V2 图像风格转换模型 + +模型可将输入的图像转换成Hayao风格 + +模型权重转换自AnimeGAN V2官方开源项目 + +模型所使用的权重为Hayao-99.ckpt + +模型详情请参考[AnimeGAN V2 开源项目](https://github.com/TachibanaYoshino/AnimeGANv2) + +## 模型安装 + +```shell +$hub install animegan_v2_hayao_99 +``` + + +## API 说明 + +```python +def style_transfer( + self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024 +) +``` + +风格转换API,将输入的图片转换为漫画风格。 + +转换效果图如下: + +![输入图像](https://ai-studio-static-online.cdn.bcebos.com/bd002c4bb6a7427daf26988770bb18648b7d8d2bfd6746bfb9a429db4867727f) +![输出图像](https://ai-studio-static-online.cdn.bcebos.com/16195e03d7e0412d990349587c587a26d9ae9e2ed1ec4fa1b4dc994e948d1f7d) + + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],默认为 None; +* paths (list\[str\]): 图片的路径,默认为 None; +* batch\_size (int): batch 的大小,默认设为 1; +* visualization (bool): 是否将识别结果保存为图片文件,默认设为 False; +* output\_dir (str): 图片的保存路径,默认设为 output; +* min\_size (int): 输入图片的短边最小尺寸,默认设为 32; +* max\_size (int): 输入图片的短边最大尺寸,默认设为 1024。 + + +**返回** + +* res (list\[numpy.ndarray\]): 输出图像数据,ndarray.shape 为 \[H, W, C\]。 + + +## 预测代码示例 + +```python +import cv2 +import paddlehub as hub + +# 模型加载 +# use_gpu:是否使用GPU进行预测 +model = hub.Module(name='animegan_v2_hayao_99', use_gpu=False) + +# 模型预测 +result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + +# or +# result = model.style_transfer(paths=['/PATH/TO/IMAGE']) +``` + +## 服务部署 + +PaddleHub Serving可以部署一个在线图像风格转换服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: +```shell +$ hub serving start -m animegan_v2_hayao_99 +``` + +这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import cv2 +import base64 + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +# 发送HTTP请求 +data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/animegan_v2_hayao_99" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +# 打印预测结果 +print(r.json()["results"]) +``` + + +## 模型相关信息 + +### 模型代码 + +https://github.com/TachibanaYoshino/AnimeGANv2 + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.8.0 diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/model.py b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/model.py new file mode 100644 index 00000000..29d4f83b --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/model.py @@ -0,0 +1,68 @@ +import os +import numpy as np + +from paddle.fluid.core import AnalysisConfig, create_paddle_predictor + +__all__ = ['Model'] + + +class Model(): + # 初始化函数 + def __init__(self, modelpath, use_gpu): + # 加载模型预测器 + self.predictor = self.load_model(modelpath, use_gpu) + + # 获取模型的输入输出 + self.input_names = self.predictor.get_input_names() + self.output_names = self.predictor.get_output_names() + self.input_tensor = self.predictor.get_input_tensor(self.input_names[0]) + self.output_tensor = self.predictor.get_output_tensor(self.output_names[0]) + + # 模型加载函数 + def load_model(self, modelpath, use_gpu): + # 对运行位置进行配置 + if use_gpu: + try: + places = os.environ["CUDA_VISIBLE_DEVICES"] + places = int(places[0]) + except Exception as e: + print('Error: %s. Please set the environment variables "CUDA_VISIBLE_DEVICES".' % e) + use_gpu = False + + # 加载模型参数 + config = AnalysisConfig(modelpath) + + # 设置参数 + if use_gpu: + config.enable_use_gpu(100, places) + else: + config.disable_gpu() + config.enable_mkldnn() + config.disable_glog_info() + config.switch_ir_optim(True) + config.enable_memory_optim() + config.switch_use_feed_fetch_ops(False) + config.switch_specify_input_names(True) + + # 通过参数加载模型预测器 + predictor = create_paddle_predictor(config) + + # 返回预测器 + return predictor + + # 模型预测函数 + def predict(self, input_datas): + outputs = [] + + # 遍历输入数据进行预测 + for input_data in input_datas: + self.input_tensor.copy_from_cpu(input_data) + self.predictor.zero_copy_run() + output = self.output_tensor.copy_to_cpu() + outputs.append(output) + + # 预测结果合并 + outputs = np.concatenate(outputs, 0) + + # 返回预测结果 + return outputs diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/module.py b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/module.py new file mode 100644 index 00000000..7e724833 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/module.py @@ -0,0 +1,64 @@ +import os + +from paddlehub import Module +from paddlehub.module.module import moduleinfo, serving + +from animegan_v2_hayao_99.model import Model +from animegan_v2_hayao_99.processor import base64_to_cv2, cv2_to_base64, Processor + + +@moduleinfo( + name="animegan_v2_hayao_99", # 模型名称 + type="CV/style_transfer", # 模型类型 + author="jm12138", # 作者名称 + author_email="jm12138@qq.com", # 作者邮箱 + summary="animegan_v2_hayao_99", # 模型介绍 + version="1.0.0" # 版本号 +) +class Animegan_V2_Hayao_99(Module): + # 初始化函数 + def _initialize(self, use_gpu=False): + # 设置模型路径 + self.model_path = os.path.join(self.directory, "animegan_v2_hayao_99") + + # 加载模型 + self.model = Model(self.model_path, use_gpu) + + # 关键点检测函数 + def style_transfer(self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024): + # 加载数据处理器 + processor = Processor(images, paths, batch_size, output_dir, min_size, max_size) + + # 模型预测 + outputs = self.model.predict(processor.input_datas) + + # 结果后处理 + results = processor.postprocess(outputs, visualization) + + # 返回结果 + return results + + # Hub Serving + @serving + def serving_method(self, images, **kwargs): + # 获取输入数据 + images_decode = [base64_to_cv2(image) for image in images] + + # 图片风格转换 + results = self.style_transfer(images_decode, **kwargs) + + # 对输出图片进行编码 + encodes = [] + for result in results: + encode = cv2_to_base64(result) + encodes.append(encode) + + # 返回结果 + return encodes diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/processor.py b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/processor.py new file mode 100644 index 00000000..b0e39154 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/processor.py @@ -0,0 +1,132 @@ +import os +import cv2 +import time +import base64 +import numpy as np + +__all__ = ['base64_to_cv2', 'cv2_to_base64', 'Processor'] + + +def check_dir(dir_path): + # 目录检查函数 + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def base64_to_cv2(b64str): + # base64转cv2函数 + data = base64.b64decode(b64str.encode('utf8')) + data = np.frombuffer(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def cv2_to_base64(image): + # cv2转base64函数 + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +class Processor(): + # 初始化函数 + def __init__(self, images=None, paths=None, batch_size=1, output_dir='output', min_size=32, max_size=1024): + # 变量设置 + self.min_size = min_size + self.max_size = max_size + + self.images = images + self.paths = paths + self.batch_size = batch_size + self.output_dir = output_dir + + # 获取原始输入数据 + self.datas = self.load_datas() + + # 对原始输入数据进行预处理 + self.input_datas = self.preprocess() + + # 读取数据函数 + def load_datas(self): + datas = [] + + # 读取数据列表 + if self.paths is not None: + for im_path in self.paths: + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path) + datas.append(im) + + if self.images is not None: + datas = self.images + + # 返回数据列表 + return datas + + # 数据预处理函数 + def preprocess(self): + input_datas = [] + + # 数据预处理 + for i, img in enumerate(self.datas): + # 格式转换 + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + # 缩放图片 + h, w = img.shape[:2] + if max(h, w) > self.max_size: + img = cv2.resize(img, (self.max_size, int(h / w * self.max_size))) if h < w else cv2.resize( + img, (int(w / h * self.max_size), self.max_size)) + elif min(h, w) < self.min_size: + img = cv2.resize(img, (self.min_size, int(h / w * self.min_size))) if h > w else cv2.resize( + img, (int(w / h * self.min_size), self.min_size)) + + # 裁剪图片 + h, w = img.shape[:2] + img = img[:h - (h % 32), :w - (w % 32), :] + + # 归一化 + img = img / 127.5 - 1.0 + + # 新建维度 + img = np.expand_dims(img, axis=0).astype('float32') + + # 加入输入数据列表 + input_datas.append(img) + + # 数据按batch_size切分 + input_datas = np.concatenate(input_datas, 0) + split_num = len(self.datas) // self.batch_size + 1 if len(self.datas) % self.batch_size != 0 else len( + self.datas) // self.batch_size + input_datas = np.array_split(input_datas, split_num) + + # 返回预处理完成的数据 + return input_datas + + def postprocess(self, outputs, visualization): + results = [] + + for im_id, output in enumerate(outputs): + # 反归一化 + image = (output.squeeze() + 1.) / 2 * 255 + + # 限幅 + image = np.clip(image, 0, 255).astype(np.uint8) + + # 格式转换 + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + # 可视化 + if visualization: + # 检查输出目录 + check_dir(self.output_dir) + + # 写入输出图片 + cv2.imwrite(os.path.join(self.output_dir, '%d_%d.jpg' % (im_id, time.time())), image) + + results.append(image) + + # 返回结果 + return results diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/README.md b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/README.md new file mode 100644 index 00000000..7bd3f2e5 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/README.md @@ -0,0 +1,127 @@ +## 模型概述 +AnimeGAN V2 图像风格转换模型 + +模型可将输入的图像转换成Paprika风格 + +模型权重转换自AnimeGAN V2官方开源项目 + +模型所使用的权重为Paprika-54.ckpt + +模型详情请参考[AnimeGAN V2 开源项目](https://github.com/TachibanaYoshino/AnimeGANv2) + +## 模型安装 + +```shell +$hub install animegan_v2_paprika_54 +``` + + +## API 说明 + +```python +def style_transfer( + self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024 +) +``` + +风格转换API,将输入的图片转换为漫画风格。 + +转换效果图如下: + +![输入图像](https://ai-studio-static-online.cdn.bcebos.com/bd002c4bb6a7427daf26988770bb18648b7d8d2bfd6746bfb9a429db4867727f) +![输出图像](https://ai-studio-static-online.cdn.bcebos.com/08ee95c94e0b4d4e8b2855a6ed40af5853b40c0047b3421aaa2f7c877fac5130) + + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],默认为 None; +* paths (list\[str\]): 图片的路径,默认为 None; +* batch\_size (int): batch 的大小,默认设为 1; +* visualization (bool): 是否将识别结果保存为图片文件,默认设为 False; +* output\_dir (str): 图片的保存路径,默认设为 output; +* min\_size (int): 输入图片的短边最小尺寸,默认设为 32; +* max\_size (int): 输入图片的短边最大尺寸,默认设为 1024。 + + +**返回** + +* res (list\[numpy.ndarray\]): 输出图像数据,ndarray.shape 为 \[H, W, C\]。 + + +## 预测代码示例 + +```python +import cv2 +import paddlehub as hub + +# 模型加载 +# use_gpu:是否使用GPU进行预测 +model = hub.Module(name='animegan_v2_paprika_54', use_gpu=False) + +# 模型预测 +result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + +# or +# result = model.style_transfer(paths=['/PATH/TO/IMAGE']) +``` + +## 服务部署 + +PaddleHub Serving可以部署一个在线图像风格转换服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: +```shell +$ hub serving start -m animegan_v2_paprika_54 +``` + +这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import cv2 +import base64 + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +# 发送HTTP请求 +data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/animegan_v2_paprika_54" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +# 打印预测结果 +print(r.json()["results"]) +``` + + +## 模型相关信息 + +### 模型代码 + +https://github.com/TachibanaYoshino/AnimeGANv2 + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.8.0 diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/model.py b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/model.py new file mode 100644 index 00000000..29d4f83b --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/model.py @@ -0,0 +1,68 @@ +import os +import numpy as np + +from paddle.fluid.core import AnalysisConfig, create_paddle_predictor + +__all__ = ['Model'] + + +class Model(): + # 初始化函数 + def __init__(self, modelpath, use_gpu): + # 加载模型预测器 + self.predictor = self.load_model(modelpath, use_gpu) + + # 获取模型的输入输出 + self.input_names = self.predictor.get_input_names() + self.output_names = self.predictor.get_output_names() + self.input_tensor = self.predictor.get_input_tensor(self.input_names[0]) + self.output_tensor = self.predictor.get_output_tensor(self.output_names[0]) + + # 模型加载函数 + def load_model(self, modelpath, use_gpu): + # 对运行位置进行配置 + if use_gpu: + try: + places = os.environ["CUDA_VISIBLE_DEVICES"] + places = int(places[0]) + except Exception as e: + print('Error: %s. Please set the environment variables "CUDA_VISIBLE_DEVICES".' % e) + use_gpu = False + + # 加载模型参数 + config = AnalysisConfig(modelpath) + + # 设置参数 + if use_gpu: + config.enable_use_gpu(100, places) + else: + config.disable_gpu() + config.enable_mkldnn() + config.disable_glog_info() + config.switch_ir_optim(True) + config.enable_memory_optim() + config.switch_use_feed_fetch_ops(False) + config.switch_specify_input_names(True) + + # 通过参数加载模型预测器 + predictor = create_paddle_predictor(config) + + # 返回预测器 + return predictor + + # 模型预测函数 + def predict(self, input_datas): + outputs = [] + + # 遍历输入数据进行预测 + for input_data in input_datas: + self.input_tensor.copy_from_cpu(input_data) + self.predictor.zero_copy_run() + output = self.output_tensor.copy_to_cpu() + outputs.append(output) + + # 预测结果合并 + outputs = np.concatenate(outputs, 0) + + # 返回预测结果 + return outputs diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/module.py b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/module.py new file mode 100644 index 00000000..f3e02e0d --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/module.py @@ -0,0 +1,64 @@ +import os + +from paddlehub import Module +from paddlehub.module.module import moduleinfo, serving + +from animegan_v2_paprika_54.model import Model +from animegan_v2_paprika_54.processor import base64_to_cv2, cv2_to_base64, Processor + + +@moduleinfo( + name="animegan_v2_paprika_54", # 模型名称 + type="CV/style_transfer", # 模型类型 + author="jm12138", # 作者名称 + author_email="jm12138@qq.com", # 作者邮箱 + summary="animegan_v2_paprika_54", # 模型介绍 + version="1.0.0" # 版本号 +) +class Animegan_V2_Paprika_54(Module): + # 初始化函数 + def _initialize(self, use_gpu=False): + # 设置模型路径 + self.model_path = os.path.join(self.directory, "animegan_v2_paprika_54") + + # 加载模型 + self.model = Model(self.model_path, use_gpu) + + # 关键点检测函数 + def style_transfer(self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024): + # 加载数据处理器 + processor = Processor(images, paths, batch_size, output_dir, min_size, max_size) + + # 模型预测 + outputs = self.model.predict(processor.input_datas) + + # 结果后处理 + results = processor.postprocess(outputs, visualization) + + # 返回结果 + return results + + # Hub Serving + @serving + def serving_method(self, images, **kwargs): + # 获取输入数据 + images_decode = [base64_to_cv2(image) for image in images] + + # 图片风格转换 + results = self.style_transfer(images_decode, **kwargs) + + # 对输出图片进行编码 + encodes = [] + for result in results: + encode = cv2_to_base64(result) + encodes.append(encode) + + # 返回结果 + return encodes diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/processor.py b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/processor.py new file mode 100644 index 00000000..b0e39154 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/processor.py @@ -0,0 +1,132 @@ +import os +import cv2 +import time +import base64 +import numpy as np + +__all__ = ['base64_to_cv2', 'cv2_to_base64', 'Processor'] + + +def check_dir(dir_path): + # 目录检查函数 + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def base64_to_cv2(b64str): + # base64转cv2函数 + data = base64.b64decode(b64str.encode('utf8')) + data = np.frombuffer(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def cv2_to_base64(image): + # cv2转base64函数 + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +class Processor(): + # 初始化函数 + def __init__(self, images=None, paths=None, batch_size=1, output_dir='output', min_size=32, max_size=1024): + # 变量设置 + self.min_size = min_size + self.max_size = max_size + + self.images = images + self.paths = paths + self.batch_size = batch_size + self.output_dir = output_dir + + # 获取原始输入数据 + self.datas = self.load_datas() + + # 对原始输入数据进行预处理 + self.input_datas = self.preprocess() + + # 读取数据函数 + def load_datas(self): + datas = [] + + # 读取数据列表 + if self.paths is not None: + for im_path in self.paths: + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path) + datas.append(im) + + if self.images is not None: + datas = self.images + + # 返回数据列表 + return datas + + # 数据预处理函数 + def preprocess(self): + input_datas = [] + + # 数据预处理 + for i, img in enumerate(self.datas): + # 格式转换 + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + # 缩放图片 + h, w = img.shape[:2] + if max(h, w) > self.max_size: + img = cv2.resize(img, (self.max_size, int(h / w * self.max_size))) if h < w else cv2.resize( + img, (int(w / h * self.max_size), self.max_size)) + elif min(h, w) < self.min_size: + img = cv2.resize(img, (self.min_size, int(h / w * self.min_size))) if h > w else cv2.resize( + img, (int(w / h * self.min_size), self.min_size)) + + # 裁剪图片 + h, w = img.shape[:2] + img = img[:h - (h % 32), :w - (w % 32), :] + + # 归一化 + img = img / 127.5 - 1.0 + + # 新建维度 + img = np.expand_dims(img, axis=0).astype('float32') + + # 加入输入数据列表 + input_datas.append(img) + + # 数据按batch_size切分 + input_datas = np.concatenate(input_datas, 0) + split_num = len(self.datas) // self.batch_size + 1 if len(self.datas) % self.batch_size != 0 else len( + self.datas) // self.batch_size + input_datas = np.array_split(input_datas, split_num) + + # 返回预处理完成的数据 + return input_datas + + def postprocess(self, outputs, visualization): + results = [] + + for im_id, output in enumerate(outputs): + # 反归一化 + image = (output.squeeze() + 1.) / 2 * 255 + + # 限幅 + image = np.clip(image, 0, 255).astype(np.uint8) + + # 格式转换 + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + # 可视化 + if visualization: + # 检查输出目录 + check_dir(self.output_dir) + + # 写入输出图片 + cv2.imwrite(os.path.join(self.output_dir, '%d_%d.jpg' % (im_id, time.time())), image) + + results.append(image) + + # 返回结果 + return results diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/README.md b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/README.md new file mode 100644 index 00000000..9cd3941a --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/README.md @@ -0,0 +1,127 @@ +## 模型概述 +AnimeGAN V2 图像风格转换模型 + +模型可将输入的图像转换成Paprika风格 + +模型权重转换自AnimeGAN V2官方开源项目 + +模型所使用的权重为Paprika-74.ckpt + +模型详情请参考[AnimeGAN V2 开源项目](https://github.com/TachibanaYoshino/AnimeGANv2) + +## 模型安装 + +```shell +$hub install animegan_v2_paprika_74 +``` + + +## API 说明 + +```python +def style_transfer( + self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024 +) +``` + +风格转换API,将输入的图片转换为漫画风格。 + +转换效果图如下: + +![输入图像](https://ai-studio-static-online.cdn.bcebos.com/bd002c4bb6a7427daf26988770bb18648b7d8d2bfd6746bfb9a429db4867727f) +![输出图像](https://ai-studio-static-online.cdn.bcebos.com/6574669d87b24bab9627c6e33896528b4a0bf5af1cd84ca29655d68719f2d551) + + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],默认为 None; +* paths (list\[str\]): 图片的路径,默认为 None; +* batch\_size (int): batch 的大小,默认设为 1; +* visualization (bool): 是否将识别结果保存为图片文件,默认设为 False; +* output\_dir (str): 图片的保存路径,默认设为 output; +* min\_size (int): 输入图片的短边最小尺寸,默认设为 32; +* max\_size (int): 输入图片的短边最大尺寸,默认设为 1024。 + + +**返回** + +* res (list\[numpy.ndarray\]): 输出图像数据,ndarray.shape 为 \[H, W, C\]。 + + +## 预测代码示例 + +```python +import cv2 +import paddlehub as hub + +# 模型加载 +# use_gpu:是否使用GPU进行预测 +model = hub.Module(name='animegan_v2_paprika_74', use_gpu=False) + +# 模型预测 +result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + +# or +# result = model.style_transfer(paths=['/PATH/TO/IMAGE']) +``` + +## 服务部署 + +PaddleHub Serving可以部署一个在线图像风格转换服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: +```shell +$ hub serving start -m animegan_v2_paprika_74 +``` + +这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import cv2 +import base64 + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +# 发送HTTP请求 +data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/animegan_v2_paprika_74" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +# 打印预测结果 +print(r.json()["results"]) +``` + + +## 模型相关信息 + +### 模型代码 + +https://github.com/TachibanaYoshino/AnimeGANv2 + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.8.0 diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/model.py b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/model.py new file mode 100644 index 00000000..29d4f83b --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/model.py @@ -0,0 +1,68 @@ +import os +import numpy as np + +from paddle.fluid.core import AnalysisConfig, create_paddle_predictor + +__all__ = ['Model'] + + +class Model(): + # 初始化函数 + def __init__(self, modelpath, use_gpu): + # 加载模型预测器 + self.predictor = self.load_model(modelpath, use_gpu) + + # 获取模型的输入输出 + self.input_names = self.predictor.get_input_names() + self.output_names = self.predictor.get_output_names() + self.input_tensor = self.predictor.get_input_tensor(self.input_names[0]) + self.output_tensor = self.predictor.get_output_tensor(self.output_names[0]) + + # 模型加载函数 + def load_model(self, modelpath, use_gpu): + # 对运行位置进行配置 + if use_gpu: + try: + places = os.environ["CUDA_VISIBLE_DEVICES"] + places = int(places[0]) + except Exception as e: + print('Error: %s. Please set the environment variables "CUDA_VISIBLE_DEVICES".' % e) + use_gpu = False + + # 加载模型参数 + config = AnalysisConfig(modelpath) + + # 设置参数 + if use_gpu: + config.enable_use_gpu(100, places) + else: + config.disable_gpu() + config.enable_mkldnn() + config.disable_glog_info() + config.switch_ir_optim(True) + config.enable_memory_optim() + config.switch_use_feed_fetch_ops(False) + config.switch_specify_input_names(True) + + # 通过参数加载模型预测器 + predictor = create_paddle_predictor(config) + + # 返回预测器 + return predictor + + # 模型预测函数 + def predict(self, input_datas): + outputs = [] + + # 遍历输入数据进行预测 + for input_data in input_datas: + self.input_tensor.copy_from_cpu(input_data) + self.predictor.zero_copy_run() + output = self.output_tensor.copy_to_cpu() + outputs.append(output) + + # 预测结果合并 + outputs = np.concatenate(outputs, 0) + + # 返回预测结果 + return outputs diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/module.py b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/module.py new file mode 100644 index 00000000..1081be3a --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/module.py @@ -0,0 +1,64 @@ +import os + +from paddlehub import Module +from paddlehub.module.module import moduleinfo, serving + +from animegan_v2_paprika_74.model import Model +from animegan_v2_paprika_74.processor import base64_to_cv2, cv2_to_base64, Processor + + +@moduleinfo( + name="animegan_v2_paprika_74", # 模型名称 + type="CV/style_transfer", # 模型类型 + author="jm12138", # 作者名称 + author_email="jm12138@qq.com", # 作者邮箱 + summary="animegan_v2_paprika_74", # 模型介绍 + version="1.0.0" # 版本号 +) +class Animegan_V2_Paprika_74(Module): + # 初始化函数 + def _initialize(self, use_gpu=False): + # 设置模型路径 + self.model_path = os.path.join(self.directory, "animegan_v2_paprika_74") + + # 加载模型 + self.model = Model(self.model_path, use_gpu) + + # 关键点检测函数 + def style_transfer(self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024): + # 加载数据处理器 + processor = Processor(images, paths, batch_size, output_dir, min_size, max_size) + + # 模型预测 + outputs = self.model.predict(processor.input_datas) + + # 结果后处理 + results = processor.postprocess(outputs, visualization) + + # 返回结果 + return results + + # Hub Serving + @serving + def serving_method(self, images, **kwargs): + # 获取输入数据 + images_decode = [base64_to_cv2(image) for image in images] + + # 图片风格转换 + results = self.style_transfer(images_decode, **kwargs) + + # 对输出图片进行编码 + encodes = [] + for result in results: + encode = cv2_to_base64(result) + encodes.append(encode) + + # 返回结果 + return encodes diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/processor.py b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/processor.py new file mode 100644 index 00000000..b0e39154 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/processor.py @@ -0,0 +1,132 @@ +import os +import cv2 +import time +import base64 +import numpy as np + +__all__ = ['base64_to_cv2', 'cv2_to_base64', 'Processor'] + + +def check_dir(dir_path): + # 目录检查函数 + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def base64_to_cv2(b64str): + # base64转cv2函数 + data = base64.b64decode(b64str.encode('utf8')) + data = np.frombuffer(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def cv2_to_base64(image): + # cv2转base64函数 + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +class Processor(): + # 初始化函数 + def __init__(self, images=None, paths=None, batch_size=1, output_dir='output', min_size=32, max_size=1024): + # 变量设置 + self.min_size = min_size + self.max_size = max_size + + self.images = images + self.paths = paths + self.batch_size = batch_size + self.output_dir = output_dir + + # 获取原始输入数据 + self.datas = self.load_datas() + + # 对原始输入数据进行预处理 + self.input_datas = self.preprocess() + + # 读取数据函数 + def load_datas(self): + datas = [] + + # 读取数据列表 + if self.paths is not None: + for im_path in self.paths: + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path) + datas.append(im) + + if self.images is not None: + datas = self.images + + # 返回数据列表 + return datas + + # 数据预处理函数 + def preprocess(self): + input_datas = [] + + # 数据预处理 + for i, img in enumerate(self.datas): + # 格式转换 + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + # 缩放图片 + h, w = img.shape[:2] + if max(h, w) > self.max_size: + img = cv2.resize(img, (self.max_size, int(h / w * self.max_size))) if h < w else cv2.resize( + img, (int(w / h * self.max_size), self.max_size)) + elif min(h, w) < self.min_size: + img = cv2.resize(img, (self.min_size, int(h / w * self.min_size))) if h > w else cv2.resize( + img, (int(w / h * self.min_size), self.min_size)) + + # 裁剪图片 + h, w = img.shape[:2] + img = img[:h - (h % 32), :w - (w % 32), :] + + # 归一化 + img = img / 127.5 - 1.0 + + # 新建维度 + img = np.expand_dims(img, axis=0).astype('float32') + + # 加入输入数据列表 + input_datas.append(img) + + # 数据按batch_size切分 + input_datas = np.concatenate(input_datas, 0) + split_num = len(self.datas) // self.batch_size + 1 if len(self.datas) % self.batch_size != 0 else len( + self.datas) // self.batch_size + input_datas = np.array_split(input_datas, split_num) + + # 返回预处理完成的数据 + return input_datas + + def postprocess(self, outputs, visualization): + results = [] + + for im_id, output in enumerate(outputs): + # 反归一化 + image = (output.squeeze() + 1.) / 2 * 255 + + # 限幅 + image = np.clip(image, 0, 255).astype(np.uint8) + + # 格式转换 + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + # 可视化 + if visualization: + # 检查输出目录 + check_dir(self.output_dir) + + # 写入输出图片 + cv2.imwrite(os.path.join(self.output_dir, '%d_%d.jpg' % (im_id, time.time())), image) + + results.append(image) + + # 返回结果 + return results diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/README.md b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/README.md new file mode 100644 index 00000000..eb1ef3a8 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/README.md @@ -0,0 +1,127 @@ +## 模型概述 +AnimeGAN V2 图像风格转换模型 + +模型可将输入的图像转换成Paprika风格 + +模型权重转换自AnimeGAN V2官方开源项目 + +模型所使用的权重为Paprika-97.ckpt + +模型详情请参考[AnimeGAN V2 开源项目](https://github.com/TachibanaYoshino/AnimeGANv2) + +## 模型安装 + +```shell +$hub install animegan_v2_paprika_97 +``` + + +## API 说明 + +```python +def style_transfer( + self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024 +) +``` + +风格转换API,将输入的图片转换为漫画风格。 + +转换效果图如下: + +![输入图像](https://ai-studio-static-online.cdn.bcebos.com/bd002c4bb6a7427daf26988770bb18648b7d8d2bfd6746bfb9a429db4867727f) +![输出图像](https://ai-studio-static-online.cdn.bcebos.com/3b962a18a22e43028cc5530db1c5adb1a42e6aae4bb74b8598ee30ed52b59c8b) + + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],默认为 None; +* paths (list\[str\]): 图片的路径,默认为 None; +* batch\_size (int): batch 的大小,默认设为 1; +* visualization (bool): 是否将识别结果保存为图片文件,默认设为 False; +* output\_dir (str): 图片的保存路径,默认设为 output; +* min\_size (int): 输入图片的短边最小尺寸,默认设为 32; +* max\_size (int): 输入图片的短边最大尺寸,默认设为 1024。 + + +**返回** + +* res (list\[numpy.ndarray\]): 输出图像数据,ndarray.shape 为 \[H, W, C\]。 + + +## 预测代码示例 + +```python +import cv2 +import paddlehub as hub + +# 模型加载 +# use_gpu:是否使用GPU进行预测 +model = hub.Module(name='animegan_v2_paprika_97', use_gpu=False) + +# 模型预测 +result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + +# or +# result = model.style_transfer(paths=['/PATH/TO/IMAGE']) +``` + +## 服务部署 + +PaddleHub Serving可以部署一个在线图像风格转换服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: +```shell +$ hub serving start -m animegan_v2_paprika_97 +``` + +这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import cv2 +import base64 + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +# 发送HTTP请求 +data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/animegan_v2_paprika_97" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +# 打印预测结果 +print(r.json()["results"]) +``` + + +## 模型相关信息 + +### 模型代码 + +https://github.com/TachibanaYoshino/AnimeGANv2 + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.8.0 diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/model.py b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/model.py new file mode 100644 index 00000000..29d4f83b --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/model.py @@ -0,0 +1,68 @@ +import os +import numpy as np + +from paddle.fluid.core import AnalysisConfig, create_paddle_predictor + +__all__ = ['Model'] + + +class Model(): + # 初始化函数 + def __init__(self, modelpath, use_gpu): + # 加载模型预测器 + self.predictor = self.load_model(modelpath, use_gpu) + + # 获取模型的输入输出 + self.input_names = self.predictor.get_input_names() + self.output_names = self.predictor.get_output_names() + self.input_tensor = self.predictor.get_input_tensor(self.input_names[0]) + self.output_tensor = self.predictor.get_output_tensor(self.output_names[0]) + + # 模型加载函数 + def load_model(self, modelpath, use_gpu): + # 对运行位置进行配置 + if use_gpu: + try: + places = os.environ["CUDA_VISIBLE_DEVICES"] + places = int(places[0]) + except Exception as e: + print('Error: %s. Please set the environment variables "CUDA_VISIBLE_DEVICES".' % e) + use_gpu = False + + # 加载模型参数 + config = AnalysisConfig(modelpath) + + # 设置参数 + if use_gpu: + config.enable_use_gpu(100, places) + else: + config.disable_gpu() + config.enable_mkldnn() + config.disable_glog_info() + config.switch_ir_optim(True) + config.enable_memory_optim() + config.switch_use_feed_fetch_ops(False) + config.switch_specify_input_names(True) + + # 通过参数加载模型预测器 + predictor = create_paddle_predictor(config) + + # 返回预测器 + return predictor + + # 模型预测函数 + def predict(self, input_datas): + outputs = [] + + # 遍历输入数据进行预测 + for input_data in input_datas: + self.input_tensor.copy_from_cpu(input_data) + self.predictor.zero_copy_run() + output = self.output_tensor.copy_to_cpu() + outputs.append(output) + + # 预测结果合并 + outputs = np.concatenate(outputs, 0) + + # 返回预测结果 + return outputs diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/module.py b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/module.py new file mode 100644 index 00000000..c059338d --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/module.py @@ -0,0 +1,64 @@ +import os + +from paddlehub import Module +from paddlehub.module.module import moduleinfo, serving + +from animegan_v2_paprika_97.model import Model +from animegan_v2_paprika_97.processor import base64_to_cv2, cv2_to_base64, Processor + + +@moduleinfo( + name="animegan_v2_paprika_97", # 模型名称 + type="CV/style_transfer", # 模型类型 + author="jm12138", # 作者名称 + author_email="jm12138@qq.com", # 作者邮箱 + summary="animegan_v2_paprika_97", # 模型介绍 + version="1.0.0" # 版本号 +) +class Animegan_V2_Paprika_97(Module): + # 初始化函数 + def _initialize(self, use_gpu=False): + # 设置模型路径 + self.model_path = os.path.join(self.directory, "animegan_v2_paprika_97") + + # 加载模型 + self.model = Model(self.model_path, use_gpu) + + # 关键点检测函数 + def style_transfer(self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024): + # 加载数据处理器 + processor = Processor(images, paths, batch_size, output_dir, min_size, max_size) + + # 模型预测 + outputs = self.model.predict(processor.input_datas) + + # 结果后处理 + results = processor.postprocess(outputs, visualization) + + # 返回结果 + return results + + # Hub Serving + @serving + def serving_method(self, images, **kwargs): + # 获取输入数据 + images_decode = [base64_to_cv2(image) for image in images] + + # 图片风格转换 + results = self.style_transfer(images_decode, **kwargs) + + # 对输出图片进行编码 + encodes = [] + for result in results: + encode = cv2_to_base64(result) + encodes.append(encode) + + # 返回结果 + return encodes diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/processor.py b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/processor.py new file mode 100644 index 00000000..b0e39154 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/processor.py @@ -0,0 +1,132 @@ +import os +import cv2 +import time +import base64 +import numpy as np + +__all__ = ['base64_to_cv2', 'cv2_to_base64', 'Processor'] + + +def check_dir(dir_path): + # 目录检查函数 + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def base64_to_cv2(b64str): + # base64转cv2函数 + data = base64.b64decode(b64str.encode('utf8')) + data = np.frombuffer(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def cv2_to_base64(image): + # cv2转base64函数 + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +class Processor(): + # 初始化函数 + def __init__(self, images=None, paths=None, batch_size=1, output_dir='output', min_size=32, max_size=1024): + # 变量设置 + self.min_size = min_size + self.max_size = max_size + + self.images = images + self.paths = paths + self.batch_size = batch_size + self.output_dir = output_dir + + # 获取原始输入数据 + self.datas = self.load_datas() + + # 对原始输入数据进行预处理 + self.input_datas = self.preprocess() + + # 读取数据函数 + def load_datas(self): + datas = [] + + # 读取数据列表 + if self.paths is not None: + for im_path in self.paths: + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path) + datas.append(im) + + if self.images is not None: + datas = self.images + + # 返回数据列表 + return datas + + # 数据预处理函数 + def preprocess(self): + input_datas = [] + + # 数据预处理 + for i, img in enumerate(self.datas): + # 格式转换 + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + # 缩放图片 + h, w = img.shape[:2] + if max(h, w) > self.max_size: + img = cv2.resize(img, (self.max_size, int(h / w * self.max_size))) if h < w else cv2.resize( + img, (int(w / h * self.max_size), self.max_size)) + elif min(h, w) < self.min_size: + img = cv2.resize(img, (self.min_size, int(h / w * self.min_size))) if h > w else cv2.resize( + img, (int(w / h * self.min_size), self.min_size)) + + # 裁剪图片 + h, w = img.shape[:2] + img = img[:h - (h % 32), :w - (w % 32), :] + + # 归一化 + img = img / 127.5 - 1.0 + + # 新建维度 + img = np.expand_dims(img, axis=0).astype('float32') + + # 加入输入数据列表 + input_datas.append(img) + + # 数据按batch_size切分 + input_datas = np.concatenate(input_datas, 0) + split_num = len(self.datas) // self.batch_size + 1 if len(self.datas) % self.batch_size != 0 else len( + self.datas) // self.batch_size + input_datas = np.array_split(input_datas, split_num) + + # 返回预处理完成的数据 + return input_datas + + def postprocess(self, outputs, visualization): + results = [] + + for im_id, output in enumerate(outputs): + # 反归一化 + image = (output.squeeze() + 1.) / 2 * 255 + + # 限幅 + image = np.clip(image, 0, 255).astype(np.uint8) + + # 格式转换 + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + # 可视化 + if visualization: + # 检查输出目录 + check_dir(self.output_dir) + + # 写入输出图片 + cv2.imwrite(os.path.join(self.output_dir, '%d_%d.jpg' % (im_id, time.time())), image) + + results.append(image) + + # 返回结果 + return results diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/README.md b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/README.md new file mode 100644 index 00000000..ee3501eb --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/README.md @@ -0,0 +1,127 @@ +## 模型概述 +AnimeGAN V2 图像风格转换模型 + +模型可将输入的图像转换成Paprika风格 + +模型权重转换自AnimeGAN V2官方开源项目 + +模型所使用的权重为Paprika-98.ckpt + +模型详情请参考[AnimeGAN V2 开源项目](https://github.com/TachibanaYoshino/AnimeGANv2) + +## 模型安装 + +```shell +$hub install animegan_v2_paprika_98 +``` + + +## API 说明 + +```python +def style_transfer( + self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024 +) +``` + +风格转换API,将输入的图片转换为漫画风格。 + +转换效果图如下: + +![输入图像](https://ai-studio-static-online.cdn.bcebos.com/bd002c4bb6a7427daf26988770bb18648b7d8d2bfd6746bfb9a429db4867727f) +![输出图像](https://ai-studio-static-online.cdn.bcebos.com/495436a627ef423ab572536c5f2ba6d0eb99b1ce098947a5ac02af36e7eb85f7) + + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],默认为 None; +* paths (list\[str\]): 图片的路径,默认为 None; +* batch\_size (int): batch 的大小,默认设为 1; +* visualization (bool): 是否将识别结果保存为图片文件,默认设为 False; +* output\_dir (str): 图片的保存路径,默认设为 output; +* min\_size (int): 输入图片的短边最小尺寸,默认设为 32; +* max\_size (int): 输入图片的短边最大尺寸,默认设为 1024。 + + +**返回** + +* res (list\[numpy.ndarray\]): 输出图像数据,ndarray.shape 为 \[H, W, C\]。 + + +## 预测代码示例 + +```python +import cv2 +import paddlehub as hub + +# 模型加载 +# use_gpu:是否使用GPU进行预测 +model = hub.Module(name='animegan_v2_paprika_98', use_gpu=False) + +# 模型预测 +result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + +# or +# result = model.style_transfer(paths=['/PATH/TO/IMAGE']) +``` + +## 服务部署 + +PaddleHub Serving可以部署一个在线图像风格转换服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: +```shell +$ hub serving start -m animegan_v2_paprika_98 +``` + +这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import cv2 +import base64 + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +# 发送HTTP请求 +data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/animegan_v2_paprika_98" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +# 打印预测结果 +print(r.json()["results"]) +``` + + +## 模型相关信息 + +### 模型代码 + +https://github.com/TachibanaYoshino/AnimeGANv2 + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.8.0 diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/model.py b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/model.py new file mode 100644 index 00000000..29d4f83b --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/model.py @@ -0,0 +1,68 @@ +import os +import numpy as np + +from paddle.fluid.core import AnalysisConfig, create_paddle_predictor + +__all__ = ['Model'] + + +class Model(): + # 初始化函数 + def __init__(self, modelpath, use_gpu): + # 加载模型预测器 + self.predictor = self.load_model(modelpath, use_gpu) + + # 获取模型的输入输出 + self.input_names = self.predictor.get_input_names() + self.output_names = self.predictor.get_output_names() + self.input_tensor = self.predictor.get_input_tensor(self.input_names[0]) + self.output_tensor = self.predictor.get_output_tensor(self.output_names[0]) + + # 模型加载函数 + def load_model(self, modelpath, use_gpu): + # 对运行位置进行配置 + if use_gpu: + try: + places = os.environ["CUDA_VISIBLE_DEVICES"] + places = int(places[0]) + except Exception as e: + print('Error: %s. Please set the environment variables "CUDA_VISIBLE_DEVICES".' % e) + use_gpu = False + + # 加载模型参数 + config = AnalysisConfig(modelpath) + + # 设置参数 + if use_gpu: + config.enable_use_gpu(100, places) + else: + config.disable_gpu() + config.enable_mkldnn() + config.disable_glog_info() + config.switch_ir_optim(True) + config.enable_memory_optim() + config.switch_use_feed_fetch_ops(False) + config.switch_specify_input_names(True) + + # 通过参数加载模型预测器 + predictor = create_paddle_predictor(config) + + # 返回预测器 + return predictor + + # 模型预测函数 + def predict(self, input_datas): + outputs = [] + + # 遍历输入数据进行预测 + for input_data in input_datas: + self.input_tensor.copy_from_cpu(input_data) + self.predictor.zero_copy_run() + output = self.output_tensor.copy_to_cpu() + outputs.append(output) + + # 预测结果合并 + outputs = np.concatenate(outputs, 0) + + # 返回预测结果 + return outputs diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/module.py b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/module.py new file mode 100644 index 00000000..2c549e62 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/module.py @@ -0,0 +1,64 @@ +import os + +from paddlehub import Module +from paddlehub.module.module import moduleinfo, serving + +from animegan_v2_paprika_98.model import Model +from animegan_v2_paprika_98.processor import base64_to_cv2, cv2_to_base64, Processor + + +@moduleinfo( + name="animegan_v2_paprika_98", # 模型名称 + type="CV/style_transfer", # 模型类型 + author="jm12138", # 作者名称 + author_email="jm12138@qq.com", # 作者邮箱 + summary="animegan_v2_paprika_98", # 模型介绍 + version="1.0.0" # 版本号 +) +class Animegan_V2_Paprika_98(Module): + # 初始化函数 + def _initialize(self, use_gpu=False): + # 设置模型路径 + self.model_path = os.path.join(self.directory, "animegan_v2_paprika_98") + + # 加载模型 + self.model = Model(self.model_path, use_gpu) + + # 关键点检测函数 + def style_transfer(self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024): + # 加载数据处理器 + processor = Processor(images, paths, batch_size, output_dir, min_size, max_size) + + # 模型预测 + outputs = self.model.predict(processor.input_datas) + + # 结果后处理 + results = processor.postprocess(outputs, visualization) + + # 返回结果 + return results + + # Hub Serving + @serving + def serving_method(self, images, **kwargs): + # 获取输入数据 + images_decode = [base64_to_cv2(image) for image in images] + + # 图片风格转换 + results = self.style_transfer(images_decode, **kwargs) + + # 对输出图片进行编码 + encodes = [] + for result in results: + encode = cv2_to_base64(result) + encodes.append(encode) + + # 返回结果 + return encodes diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/processor.py b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/processor.py new file mode 100644 index 00000000..b0e39154 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/processor.py @@ -0,0 +1,132 @@ +import os +import cv2 +import time +import base64 +import numpy as np + +__all__ = ['base64_to_cv2', 'cv2_to_base64', 'Processor'] + + +def check_dir(dir_path): + # 目录检查函数 + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def base64_to_cv2(b64str): + # base64转cv2函数 + data = base64.b64decode(b64str.encode('utf8')) + data = np.frombuffer(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def cv2_to_base64(image): + # cv2转base64函数 + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +class Processor(): + # 初始化函数 + def __init__(self, images=None, paths=None, batch_size=1, output_dir='output', min_size=32, max_size=1024): + # 变量设置 + self.min_size = min_size + self.max_size = max_size + + self.images = images + self.paths = paths + self.batch_size = batch_size + self.output_dir = output_dir + + # 获取原始输入数据 + self.datas = self.load_datas() + + # 对原始输入数据进行预处理 + self.input_datas = self.preprocess() + + # 读取数据函数 + def load_datas(self): + datas = [] + + # 读取数据列表 + if self.paths is not None: + for im_path in self.paths: + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path) + datas.append(im) + + if self.images is not None: + datas = self.images + + # 返回数据列表 + return datas + + # 数据预处理函数 + def preprocess(self): + input_datas = [] + + # 数据预处理 + for i, img in enumerate(self.datas): + # 格式转换 + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + # 缩放图片 + h, w = img.shape[:2] + if max(h, w) > self.max_size: + img = cv2.resize(img, (self.max_size, int(h / w * self.max_size))) if h < w else cv2.resize( + img, (int(w / h * self.max_size), self.max_size)) + elif min(h, w) < self.min_size: + img = cv2.resize(img, (self.min_size, int(h / w * self.min_size))) if h > w else cv2.resize( + img, (int(w / h * self.min_size), self.min_size)) + + # 裁剪图片 + h, w = img.shape[:2] + img = img[:h - (h % 32), :w - (w % 32), :] + + # 归一化 + img = img / 127.5 - 1.0 + + # 新建维度 + img = np.expand_dims(img, axis=0).astype('float32') + + # 加入输入数据列表 + input_datas.append(img) + + # 数据按batch_size切分 + input_datas = np.concatenate(input_datas, 0) + split_num = len(self.datas) // self.batch_size + 1 if len(self.datas) % self.batch_size != 0 else len( + self.datas) // self.batch_size + input_datas = np.array_split(input_datas, split_num) + + # 返回预处理完成的数据 + return input_datas + + def postprocess(self, outputs, visualization): + results = [] + + for im_id, output in enumerate(outputs): + # 反归一化 + image = (output.squeeze() + 1.) / 2 * 255 + + # 限幅 + image = np.clip(image, 0, 255).astype(np.uint8) + + # 格式转换 + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + # 可视化 + if visualization: + # 检查输出目录 + check_dir(self.output_dir) + + # 写入输出图片 + cv2.imwrite(os.path.join(self.output_dir, '%d_%d.jpg' % (im_id, time.time())), image) + + results.append(image) + + # 返回结果 + return results diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/README.md b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/README.md new file mode 100644 index 00000000..024ee3e5 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/README.md @@ -0,0 +1,127 @@ +## 模型概述 +AnimeGAN V2 图像风格转换模型 + +模型可将输入的图像转换成Shinkai风格 + +模型权重转换自AnimeGAN V2官方开源项目 + +模型所使用的权重为Shinkai-33.ckpt + +模型详情请参考[AnimeGAN V2 开源项目](https://github.com/TachibanaYoshino/AnimeGANv2) + +## 模型安装 + +```shell +$hub install animegan_v2_shinkai_33 +``` + + +## API 说明 + +```python +def style_transfer( + self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024 +) +``` + +风格转换API,将输入的图片转换为漫画风格。 + +转换效果图如下: + +![输入图像](https://ai-studio-static-online.cdn.bcebos.com/bd002c4bb6a7427daf26988770bb18648b7d8d2bfd6746bfb9a429db4867727f) +![输出图像](https://ai-studio-static-online.cdn.bcebos.com/776a84a0d97c452bbbe479592fbb8f5c6fe9c45f3b7e41fd8b7da80bf52ee668) + + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],默认为 None; +* paths (list\[str\]): 图片的路径,默认为 None; +* batch\_size (int): batch 的大小,默认设为 1; +* visualization (bool): 是否将识别结果保存为图片文件,默认设为 False; +* output\_dir (str): 图片的保存路径,默认设为 output; +* min\_size (int): 输入图片的短边最小尺寸,默认设为 32; +* max\_size (int): 输入图片的短边最大尺寸,默认设为 1024。 + + +**返回** + +* res (list\[numpy.ndarray\]): 输出图像数据,ndarray.shape 为 \[H, W, C\]。 + + +## 预测代码示例 + +```python +import cv2 +import paddlehub as hub + +# 模型加载 +# use_gpu:是否使用GPU进行预测 +model = hub.Module(name='animegan_v2_shinkai_33', use_gpu=False) + +# 模型预测 +result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + +# or +# result = model.style_transfer(paths=['/PATH/TO/IMAGE']) +``` + +## 服务部署 + +PaddleHub Serving可以部署一个在线图像风格转换服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: +```shell +$ hub serving start -m animegan_v2_shinkai_33 +``` + +这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import cv2 +import base64 + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +# 发送HTTP请求 +data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/animegan_v2_shinkai_33" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +# 打印预测结果 +print(r.json()["results"]) +``` + + +## 模型相关信息 + +### 模型代码 + +https://github.com/TachibanaYoshino/AnimeGANv2 + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.8.0 diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/model.py b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/model.py new file mode 100644 index 00000000..29d4f83b --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/model.py @@ -0,0 +1,68 @@ +import os +import numpy as np + +from paddle.fluid.core import AnalysisConfig, create_paddle_predictor + +__all__ = ['Model'] + + +class Model(): + # 初始化函数 + def __init__(self, modelpath, use_gpu): + # 加载模型预测器 + self.predictor = self.load_model(modelpath, use_gpu) + + # 获取模型的输入输出 + self.input_names = self.predictor.get_input_names() + self.output_names = self.predictor.get_output_names() + self.input_tensor = self.predictor.get_input_tensor(self.input_names[0]) + self.output_tensor = self.predictor.get_output_tensor(self.output_names[0]) + + # 模型加载函数 + def load_model(self, modelpath, use_gpu): + # 对运行位置进行配置 + if use_gpu: + try: + places = os.environ["CUDA_VISIBLE_DEVICES"] + places = int(places[0]) + except Exception as e: + print('Error: %s. Please set the environment variables "CUDA_VISIBLE_DEVICES".' % e) + use_gpu = False + + # 加载模型参数 + config = AnalysisConfig(modelpath) + + # 设置参数 + if use_gpu: + config.enable_use_gpu(100, places) + else: + config.disable_gpu() + config.enable_mkldnn() + config.disable_glog_info() + config.switch_ir_optim(True) + config.enable_memory_optim() + config.switch_use_feed_fetch_ops(False) + config.switch_specify_input_names(True) + + # 通过参数加载模型预测器 + predictor = create_paddle_predictor(config) + + # 返回预测器 + return predictor + + # 模型预测函数 + def predict(self, input_datas): + outputs = [] + + # 遍历输入数据进行预测 + for input_data in input_datas: + self.input_tensor.copy_from_cpu(input_data) + self.predictor.zero_copy_run() + output = self.output_tensor.copy_to_cpu() + outputs.append(output) + + # 预测结果合并 + outputs = np.concatenate(outputs, 0) + + # 返回预测结果 + return outputs diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/module.py b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/module.py new file mode 100644 index 00000000..674e576d --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/module.py @@ -0,0 +1,64 @@ +import os + +from paddlehub import Module +from paddlehub.module.module import moduleinfo, serving + +from animegan_v2_shinkai_33.model import Model +from animegan_v2_shinkai_33.processor import base64_to_cv2, cv2_to_base64, Processor + + +@moduleinfo( + name="animegan_v2_shinkai_33", # 模型名称 + type="CV/style_transfer", # 模型类型 + author="jm12138", # 作者名称 + author_email="jm12138@qq.com", # 作者邮箱 + summary="animegan_v2_shinkai_33", # 模型介绍 + version="1.0.0" # 版本号 +) +class Animegan_V2_Shinkai_33(Module): + # 初始化函数 + def _initialize(self, use_gpu=False): + # 设置模型路径 + self.model_path = os.path.join(self.directory, "animegan_v2_shinkai_33") + + # 加载模型 + self.model = Model(self.model_path, use_gpu) + + # 关键点检测函数 + def style_transfer(self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024): + # 加载数据处理器 + processor = Processor(images, paths, batch_size, output_dir, min_size, max_size) + + # 模型预测 + outputs = self.model.predict(processor.input_datas) + + # 结果后处理 + results = processor.postprocess(outputs, visualization) + + # 返回结果 + return results + + # Hub Serving + @serving + def serving_method(self, images, **kwargs): + # 获取输入数据 + images_decode = [base64_to_cv2(image) for image in images] + + # 图片风格转换 + results = self.style_transfer(images_decode, **kwargs) + + # 对输出图片进行编码 + encodes = [] + for result in results: + encode = cv2_to_base64(result) + encodes.append(encode) + + # 返回结果 + return encodes diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/processor.py b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/processor.py new file mode 100644 index 00000000..b0e39154 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/processor.py @@ -0,0 +1,132 @@ +import os +import cv2 +import time +import base64 +import numpy as np + +__all__ = ['base64_to_cv2', 'cv2_to_base64', 'Processor'] + + +def check_dir(dir_path): + # 目录检查函数 + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def base64_to_cv2(b64str): + # base64转cv2函数 + data = base64.b64decode(b64str.encode('utf8')) + data = np.frombuffer(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def cv2_to_base64(image): + # cv2转base64函数 + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +class Processor(): + # 初始化函数 + def __init__(self, images=None, paths=None, batch_size=1, output_dir='output', min_size=32, max_size=1024): + # 变量设置 + self.min_size = min_size + self.max_size = max_size + + self.images = images + self.paths = paths + self.batch_size = batch_size + self.output_dir = output_dir + + # 获取原始输入数据 + self.datas = self.load_datas() + + # 对原始输入数据进行预处理 + self.input_datas = self.preprocess() + + # 读取数据函数 + def load_datas(self): + datas = [] + + # 读取数据列表 + if self.paths is not None: + for im_path in self.paths: + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path) + datas.append(im) + + if self.images is not None: + datas = self.images + + # 返回数据列表 + return datas + + # 数据预处理函数 + def preprocess(self): + input_datas = [] + + # 数据预处理 + for i, img in enumerate(self.datas): + # 格式转换 + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + # 缩放图片 + h, w = img.shape[:2] + if max(h, w) > self.max_size: + img = cv2.resize(img, (self.max_size, int(h / w * self.max_size))) if h < w else cv2.resize( + img, (int(w / h * self.max_size), self.max_size)) + elif min(h, w) < self.min_size: + img = cv2.resize(img, (self.min_size, int(h / w * self.min_size))) if h > w else cv2.resize( + img, (int(w / h * self.min_size), self.min_size)) + + # 裁剪图片 + h, w = img.shape[:2] + img = img[:h - (h % 32), :w - (w % 32), :] + + # 归一化 + img = img / 127.5 - 1.0 + + # 新建维度 + img = np.expand_dims(img, axis=0).astype('float32') + + # 加入输入数据列表 + input_datas.append(img) + + # 数据按batch_size切分 + input_datas = np.concatenate(input_datas, 0) + split_num = len(self.datas) // self.batch_size + 1 if len(self.datas) % self.batch_size != 0 else len( + self.datas) // self.batch_size + input_datas = np.array_split(input_datas, split_num) + + # 返回预处理完成的数据 + return input_datas + + def postprocess(self, outputs, visualization): + results = [] + + for im_id, output in enumerate(outputs): + # 反归一化 + image = (output.squeeze() + 1.) / 2 * 255 + + # 限幅 + image = np.clip(image, 0, 255).astype(np.uint8) + + # 格式转换 + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + # 可视化 + if visualization: + # 检查输出目录 + check_dir(self.output_dir) + + # 写入输出图片 + cv2.imwrite(os.path.join(self.output_dir, '%d_%d.jpg' % (im_id, time.time())), image) + + results.append(image) + + # 返回结果 + return results diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/README.md b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/README.md new file mode 100644 index 00000000..3be87720 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/README.md @@ -0,0 +1,127 @@ +## 模型概述 +AnimeGAN V2 图像风格转换模型 + +模型可将输入的图像转换成Shinkai风格 + +模型权重转换自AnimeGAN V2官方开源项目 + +模型所使用的权重为Shinkai-53.ckpt + +模型详情请参考[AnimeGAN V2 开源项目](https://github.com/TachibanaYoshino/AnimeGANv2) + +## 模型安装 + +```shell +$hub install animegan_v2_shinkai_53 +``` + + +## API 说明 + +```python +def style_transfer( + self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024 +) +``` + +风格转换API,将输入的图片转换为漫画风格。 + +转换效果图如下: + +![输入图像](https://ai-studio-static-online.cdn.bcebos.com/bd002c4bb6a7427daf26988770bb18648b7d8d2bfd6746bfb9a429db4867727f) +![输出图像](https://ai-studio-static-online.cdn.bcebos.com/fa4ba157e73c48658c4c9c6b8b92f5c99231d1d19556472788b1e5dd58d5d6cc) + + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],默认为 None; +* paths (list\[str\]): 图片的路径,默认为 None; +* batch\_size (int): batch 的大小,默认设为 1; +* visualization (bool): 是否将识别结果保存为图片文件,默认设为 False; +* output\_dir (str): 图片的保存路径,默认设为 output; +* min\_size (int): 输入图片的短边最小尺寸,默认设为 32; +* max\_size (int): 输入图片的短边最大尺寸,默认设为 1024。 + + +**返回** + +* res (list\[numpy.ndarray\]): 输出图像数据,ndarray.shape 为 \[H, W, C\]。 + + +## 预测代码示例 + +```python +import cv2 +import paddlehub as hub + +# 模型加载 +# use_gpu:是否使用GPU进行预测 +model = hub.Module(name='animegan_v2_shinkai_53', use_gpu=False) + +# 模型预测 +result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + +# or +# result = model.style_transfer(paths=['/PATH/TO/IMAGE']) +``` + +## 服务部署 + +PaddleHub Serving可以部署一个在线图像风格转换服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: +```shell +$ hub serving start -m animegan_v2_shinkai_53 +``` + +这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import cv2 +import base64 + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +# 发送HTTP请求 +data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/animegan_v2_shinkai_53" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +# 打印预测结果 +print(r.json()["results"]) +``` + + +## 模型相关信息 + +### 模型代码 + +https://github.com/TachibanaYoshino/AnimeGANv2 + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.8.0 diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/model.py b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/model.py new file mode 100644 index 00000000..29d4f83b --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/model.py @@ -0,0 +1,68 @@ +import os +import numpy as np + +from paddle.fluid.core import AnalysisConfig, create_paddle_predictor + +__all__ = ['Model'] + + +class Model(): + # 初始化函数 + def __init__(self, modelpath, use_gpu): + # 加载模型预测器 + self.predictor = self.load_model(modelpath, use_gpu) + + # 获取模型的输入输出 + self.input_names = self.predictor.get_input_names() + self.output_names = self.predictor.get_output_names() + self.input_tensor = self.predictor.get_input_tensor(self.input_names[0]) + self.output_tensor = self.predictor.get_output_tensor(self.output_names[0]) + + # 模型加载函数 + def load_model(self, modelpath, use_gpu): + # 对运行位置进行配置 + if use_gpu: + try: + places = os.environ["CUDA_VISIBLE_DEVICES"] + places = int(places[0]) + except Exception as e: + print('Error: %s. Please set the environment variables "CUDA_VISIBLE_DEVICES".' % e) + use_gpu = False + + # 加载模型参数 + config = AnalysisConfig(modelpath) + + # 设置参数 + if use_gpu: + config.enable_use_gpu(100, places) + else: + config.disable_gpu() + config.enable_mkldnn() + config.disable_glog_info() + config.switch_ir_optim(True) + config.enable_memory_optim() + config.switch_use_feed_fetch_ops(False) + config.switch_specify_input_names(True) + + # 通过参数加载模型预测器 + predictor = create_paddle_predictor(config) + + # 返回预测器 + return predictor + + # 模型预测函数 + def predict(self, input_datas): + outputs = [] + + # 遍历输入数据进行预测 + for input_data in input_datas: + self.input_tensor.copy_from_cpu(input_data) + self.predictor.zero_copy_run() + output = self.output_tensor.copy_to_cpu() + outputs.append(output) + + # 预测结果合并 + outputs = np.concatenate(outputs, 0) + + # 返回预测结果 + return outputs diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/module.py b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/module.py new file mode 100644 index 00000000..e192d282 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/module.py @@ -0,0 +1,64 @@ +import os + +from paddlehub import Module +from paddlehub.module.module import moduleinfo, serving + +from animegan_v2_shinkai_53.model import Model +from animegan_v2_shinkai_53.processor import base64_to_cv2, cv2_to_base64, Processor + + +@moduleinfo( + name="animegan_v2_shinkai_53", # 模型名称 + type="CV/style_transfer", # 模型类型 + author="jm12138", # 作者名称 + author_email="jm12138@qq.com", # 作者邮箱 + summary="animegan_v2_shinkai_53", # 模型介绍 + version="1.0.0" # 版本号 +) +class Animegan_V2_Shinkai_53(Module): + # 初始化函数 + def _initialize(self, use_gpu=False): + # 设置模型路径 + self.model_path = os.path.join(self.directory, "animegan_v2_shinkai_53") + + # 加载模型 + self.model = Model(self.model_path, use_gpu) + + # 关键点检测函数 + def style_transfer(self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024): + # 加载数据处理器 + processor = Processor(images, paths, batch_size, output_dir, min_size, max_size) + + # 模型预测 + outputs = self.model.predict(processor.input_datas) + + # 结果后处理 + results = processor.postprocess(outputs, visualization) + + # 返回结果 + return results + + # Hub Serving + @serving + def serving_method(self, images, **kwargs): + # 获取输入数据 + images_decode = [base64_to_cv2(image) for image in images] + + # 图片风格转换 + results = self.style_transfer(images_decode, **kwargs) + + # 对输出图片进行编码 + encodes = [] + for result in results: + encode = cv2_to_base64(result) + encodes.append(encode) + + # 返回结果 + return encodes diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/processor.py b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/processor.py new file mode 100644 index 00000000..b0e39154 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/processor.py @@ -0,0 +1,132 @@ +import os +import cv2 +import time +import base64 +import numpy as np + +__all__ = ['base64_to_cv2', 'cv2_to_base64', 'Processor'] + + +def check_dir(dir_path): + # 目录检查函数 + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def base64_to_cv2(b64str): + # base64转cv2函数 + data = base64.b64decode(b64str.encode('utf8')) + data = np.frombuffer(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def cv2_to_base64(image): + # cv2转base64函数 + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +class Processor(): + # 初始化函数 + def __init__(self, images=None, paths=None, batch_size=1, output_dir='output', min_size=32, max_size=1024): + # 变量设置 + self.min_size = min_size + self.max_size = max_size + + self.images = images + self.paths = paths + self.batch_size = batch_size + self.output_dir = output_dir + + # 获取原始输入数据 + self.datas = self.load_datas() + + # 对原始输入数据进行预处理 + self.input_datas = self.preprocess() + + # 读取数据函数 + def load_datas(self): + datas = [] + + # 读取数据列表 + if self.paths is not None: + for im_path in self.paths: + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path) + datas.append(im) + + if self.images is not None: + datas = self.images + + # 返回数据列表 + return datas + + # 数据预处理函数 + def preprocess(self): + input_datas = [] + + # 数据预处理 + for i, img in enumerate(self.datas): + # 格式转换 + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + # 缩放图片 + h, w = img.shape[:2] + if max(h, w) > self.max_size: + img = cv2.resize(img, (self.max_size, int(h / w * self.max_size))) if h < w else cv2.resize( + img, (int(w / h * self.max_size), self.max_size)) + elif min(h, w) < self.min_size: + img = cv2.resize(img, (self.min_size, int(h / w * self.min_size))) if h > w else cv2.resize( + img, (int(w / h * self.min_size), self.min_size)) + + # 裁剪图片 + h, w = img.shape[:2] + img = img[:h - (h % 32), :w - (w % 32), :] + + # 归一化 + img = img / 127.5 - 1.0 + + # 新建维度 + img = np.expand_dims(img, axis=0).astype('float32') + + # 加入输入数据列表 + input_datas.append(img) + + # 数据按batch_size切分 + input_datas = np.concatenate(input_datas, 0) + split_num = len(self.datas) // self.batch_size + 1 if len(self.datas) % self.batch_size != 0 else len( + self.datas) // self.batch_size + input_datas = np.array_split(input_datas, split_num) + + # 返回预处理完成的数据 + return input_datas + + def postprocess(self, outputs, visualization): + results = [] + + for im_id, output in enumerate(outputs): + # 反归一化 + image = (output.squeeze() + 1.) / 2 * 255 + + # 限幅 + image = np.clip(image, 0, 255).astype(np.uint8) + + # 格式转换 + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + # 可视化 + if visualization: + # 检查输出目录 + check_dir(self.output_dir) + + # 写入输出图片 + cv2.imwrite(os.path.join(self.output_dir, '%d_%d.jpg' % (im_id, time.time())), image) + + results.append(image) + + # 返回结果 + return results diff --git a/modules/image/Image_gan/style_transfer/stylepro_artistic/decoder_network.py b/modules/image/Image_gan/style_transfer/stylepro_artistic/decoder_network.py index f349f554..99a67c0a 100644 --- a/modules/image/Image_gan/style_transfer/stylepro_artistic/decoder_network.py +++ b/modules/image/Image_gan/style_transfer/stylepro_artistic/decoder_network.py @@ -5,169 +5,140 @@ import paddle.fluid as fluid def decoder_net(): - x2paddle_22 = fluid.layers.create_parameter(dtype='float32', - shape=[4], - name='x2paddle_22', - attr='x2paddle_22', - default_initializer=Constant(0.0)) - x2paddle_36 = fluid.layers.create_parameter(dtype='float32', - shape=[4], - name='x2paddle_36', - attr='x2paddle_36', - default_initializer=Constant(0.0)) - x2paddle_44 = fluid.layers.create_parameter(dtype='float32', - shape=[4], - name='x2paddle_44', - attr='x2paddle_44', - default_initializer=Constant(0.0)) - x2paddle_input_1 = fluid.layers.data(dtype='float32', - shape=[1, 512, 64, 64], - name='x2paddle_input_1', - append_batch_size=False) - x2paddle_19 = fluid.layers.pad2d(x2paddle_input_1, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_19') - x2paddle_20 = fluid.layers.conv2d(x2paddle_19, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_1', - name='x2paddle_20', - bias_attr='x2paddle_2') + x2paddle_22 = fluid.layers.create_parameter( + dtype='float32', shape=[4], name='x2paddle_22', attr='x2paddle_22', default_initializer=Constant(0.0)) + x2paddle_36 = fluid.layers.create_parameter( + dtype='float32', shape=[4], name='x2paddle_36', attr='x2paddle_36', default_initializer=Constant(0.0)) + x2paddle_44 = fluid.layers.create_parameter( + dtype='float32', shape=[4], name='x2paddle_44', attr='x2paddle_44', default_initializer=Constant(0.0)) + x2paddle_input_1 = fluid.layers.data( + dtype='float32', shape=[1, 512, 64, 64], name='x2paddle_input_1', append_batch_size=False) + x2paddle_19 = fluid.layers.pad2d( + x2paddle_input_1, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_19') + x2paddle_20 = fluid.layers.conv2d( + x2paddle_19, + num_filters=256, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_1', + name='x2paddle_20', + bias_attr='x2paddle_2') x2paddle_21 = fluid.layers.relu(x2paddle_20, name='x2paddle_21') x2paddle_23 = fluid.layers.resize_nearest(x2paddle_21, name='x2paddle_23', out_shape=[128, 128]) - x2paddle_24 = fluid.layers.pad2d(x2paddle_23, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_24') - x2paddle_25 = fluid.layers.conv2d(x2paddle_24, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_3', - name='x2paddle_25', - bias_attr='x2paddle_4') + x2paddle_24 = fluid.layers.pad2d( + x2paddle_23, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_24') + x2paddle_25 = fluid.layers.conv2d( + x2paddle_24, + num_filters=256, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_3', + name='x2paddle_25', + bias_attr='x2paddle_4') x2paddle_26 = fluid.layers.relu(x2paddle_25, name='x2paddle_26') - x2paddle_27 = fluid.layers.pad2d(x2paddle_26, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_27') - x2paddle_28 = fluid.layers.conv2d(x2paddle_27, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_5', - name='x2paddle_28', - bias_attr='x2paddle_6') + x2paddle_27 = fluid.layers.pad2d( + x2paddle_26, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_27') + x2paddle_28 = fluid.layers.conv2d( + x2paddle_27, + num_filters=256, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_5', + name='x2paddle_28', + bias_attr='x2paddle_6') x2paddle_29 = fluid.layers.relu(x2paddle_28, name='x2paddle_29') - x2paddle_30 = fluid.layers.pad2d(x2paddle_29, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_30') - x2paddle_31 = fluid.layers.conv2d(x2paddle_30, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_7', - name='x2paddle_31', - bias_attr='x2paddle_8') + x2paddle_30 = fluid.layers.pad2d( + x2paddle_29, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_30') + x2paddle_31 = fluid.layers.conv2d( + x2paddle_30, + num_filters=256, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_7', + name='x2paddle_31', + bias_attr='x2paddle_8') x2paddle_32 = fluid.layers.relu(x2paddle_31, name='x2paddle_32') - x2paddle_33 = fluid.layers.pad2d(x2paddle_32, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_33') - x2paddle_34 = fluid.layers.conv2d(x2paddle_33, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_9', - name='x2paddle_34', - bias_attr='x2paddle_10') + x2paddle_33 = fluid.layers.pad2d( + x2paddle_32, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_33') + x2paddle_34 = fluid.layers.conv2d( + x2paddle_33, + num_filters=128, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_9', + name='x2paddle_34', + bias_attr='x2paddle_10') x2paddle_35 = fluid.layers.relu(x2paddle_34, name='x2paddle_35') x2paddle_37 = fluid.layers.resize_nearest(x2paddle_35, name='x2paddle_37', out_shape=[256, 256]) - x2paddle_38 = fluid.layers.pad2d(x2paddle_37, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_38') - x2paddle_39 = fluid.layers.conv2d(x2paddle_38, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_11', - name='x2paddle_39', - bias_attr='x2paddle_12') + x2paddle_38 = fluid.layers.pad2d( + x2paddle_37, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_38') + x2paddle_39 = fluid.layers.conv2d( + x2paddle_38, + num_filters=128, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_11', + name='x2paddle_39', + bias_attr='x2paddle_12') x2paddle_40 = fluid.layers.relu(x2paddle_39, name='x2paddle_40') - x2paddle_41 = fluid.layers.pad2d(x2paddle_40, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_41') - x2paddle_42 = fluid.layers.conv2d(x2paddle_41, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_13', - name='x2paddle_42', - bias_attr='x2paddle_14') + x2paddle_41 = fluid.layers.pad2d( + x2paddle_40, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_41') + x2paddle_42 = fluid.layers.conv2d( + x2paddle_41, + num_filters=64, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_13', + name='x2paddle_42', + bias_attr='x2paddle_14') x2paddle_43 = fluid.layers.relu(x2paddle_42, name='x2paddle_43') x2paddle_45 = fluid.layers.resize_nearest(x2paddle_43, name='x2paddle_45', out_shape=[512, 512]) - x2paddle_46 = fluid.layers.pad2d(x2paddle_45, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_46') - x2paddle_47 = fluid.layers.conv2d(x2paddle_46, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_15', - name='x2paddle_47', - bias_attr='x2paddle_16') + x2paddle_46 = fluid.layers.pad2d( + x2paddle_45, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_46') + x2paddle_47 = fluid.layers.conv2d( + x2paddle_46, + num_filters=64, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_15', + name='x2paddle_47', + bias_attr='x2paddle_16') x2paddle_48 = fluid.layers.relu(x2paddle_47, name='x2paddle_48') - x2paddle_49 = fluid.layers.pad2d(x2paddle_48, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_49') - x2paddle_50 = fluid.layers.conv2d(x2paddle_49, - num_filters=3, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_17', - name='x2paddle_50', - bias_attr='x2paddle_18') + x2paddle_49 = fluid.layers.pad2d( + x2paddle_48, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_49') + x2paddle_50 = fluid.layers.conv2d( + x2paddle_49, + num_filters=3, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_17', + name='x2paddle_50', + bias_attr='x2paddle_18') return x2paddle_input_1, x2paddle_50 diff --git a/modules/image/Image_gan/style_transfer/stylepro_artistic/encoder_network.py b/modules/image/Image_gan/style_transfer/stylepro_artistic/encoder_network.py index f006b8cc..0bff785c 100644 --- a/modules/image/Image_gan/style_transfer/stylepro_artistic/encoder_network.py +++ b/modules/image/Image_gan/style_transfer/stylepro_artistic/encoder_network.py @@ -6,182 +6,168 @@ import paddle.fluid as fluid def encoder_net(): x2paddle_0 = fluid.layers.data(dtype='float32', shape=[1, 3, 512, 512], name='x2paddle_0', append_batch_size=False) - x2paddle_21 = fluid.layers.conv2d(x2paddle_0, - num_filters=3, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_1', - name='x2paddle_21', - bias_attr='x2paddle_2') - x2paddle_22 = fluid.layers.pad2d(x2paddle_21, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_22') - x2paddle_23 = fluid.layers.conv2d(x2paddle_22, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_3', - name='x2paddle_23', - bias_attr='x2paddle_4') + x2paddle_21 = fluid.layers.conv2d( + x2paddle_0, + num_filters=3, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_1', + name='x2paddle_21', + bias_attr='x2paddle_2') + x2paddle_22 = fluid.layers.pad2d( + x2paddle_21, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_22') + x2paddle_23 = fluid.layers.conv2d( + x2paddle_22, + num_filters=64, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_3', + name='x2paddle_23', + bias_attr='x2paddle_4') x2paddle_24 = fluid.layers.relu(x2paddle_23, name='x2paddle_24') - x2paddle_25 = fluid.layers.pad2d(x2paddle_24, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_25') - x2paddle_26 = fluid.layers.conv2d(x2paddle_25, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_5', - name='x2paddle_26', - bias_attr='x2paddle_6') + x2paddle_25 = fluid.layers.pad2d( + x2paddle_24, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_25') + x2paddle_26 = fluid.layers.conv2d( + x2paddle_25, + num_filters=64, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_5', + name='x2paddle_26', + bias_attr='x2paddle_6') x2paddle_27 = fluid.layers.relu(x2paddle_26, name='x2paddle_27') - x2paddle_28 = fluid.layers.pool2d(x2paddle_27, - pool_size=[2, 2], - pool_type='max', - pool_stride=[2, 2], - pool_padding=[0, 0], - ceil_mode=False, - name='x2paddle_28', - exclusive=False) - x2paddle_29 = fluid.layers.pad2d(x2paddle_28, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_29') - x2paddle_30 = fluid.layers.conv2d(x2paddle_29, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_7', - name='x2paddle_30', - bias_attr='x2paddle_8') + x2paddle_28 = fluid.layers.pool2d( + x2paddle_27, + pool_size=[2, 2], + pool_type='max', + pool_stride=[2, 2], + pool_padding=[0, 0], + ceil_mode=False, + name='x2paddle_28', + exclusive=False) + x2paddle_29 = fluid.layers.pad2d( + x2paddle_28, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_29') + x2paddle_30 = fluid.layers.conv2d( + x2paddle_29, + num_filters=128, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_7', + name='x2paddle_30', + bias_attr='x2paddle_8') x2paddle_31 = fluid.layers.relu(x2paddle_30, name='x2paddle_31') - x2paddle_32 = fluid.layers.pad2d(x2paddle_31, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_32') - x2paddle_33 = fluid.layers.conv2d(x2paddle_32, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_9', - name='x2paddle_33', - bias_attr='x2paddle_10') + x2paddle_32 = fluid.layers.pad2d( + x2paddle_31, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_32') + x2paddle_33 = fluid.layers.conv2d( + x2paddle_32, + num_filters=128, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_9', + name='x2paddle_33', + bias_attr='x2paddle_10') x2paddle_34 = fluid.layers.relu(x2paddle_33, name='x2paddle_34') - x2paddle_35 = fluid.layers.pool2d(x2paddle_34, - pool_size=[2, 2], - pool_type='max', - pool_stride=[2, 2], - pool_padding=[0, 0], - ceil_mode=False, - name='x2paddle_35', - exclusive=False) - x2paddle_36 = fluid.layers.pad2d(x2paddle_35, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_36') - x2paddle_37 = fluid.layers.conv2d(x2paddle_36, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_11', - name='x2paddle_37', - bias_attr='x2paddle_12') + x2paddle_35 = fluid.layers.pool2d( + x2paddle_34, + pool_size=[2, 2], + pool_type='max', + pool_stride=[2, 2], + pool_padding=[0, 0], + ceil_mode=False, + name='x2paddle_35', + exclusive=False) + x2paddle_36 = fluid.layers.pad2d( + x2paddle_35, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_36') + x2paddle_37 = fluid.layers.conv2d( + x2paddle_36, + num_filters=256, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_11', + name='x2paddle_37', + bias_attr='x2paddle_12') x2paddle_38 = fluid.layers.relu(x2paddle_37, name='x2paddle_38') - x2paddle_39 = fluid.layers.pad2d(x2paddle_38, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_39') - x2paddle_40 = fluid.layers.conv2d(x2paddle_39, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_13', - name='x2paddle_40', - bias_attr='x2paddle_14') + x2paddle_39 = fluid.layers.pad2d( + x2paddle_38, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_39') + x2paddle_40 = fluid.layers.conv2d( + x2paddle_39, + num_filters=256, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_13', + name='x2paddle_40', + bias_attr='x2paddle_14') x2paddle_41 = fluid.layers.relu(x2paddle_40, name='x2paddle_41') - x2paddle_42 = fluid.layers.pad2d(x2paddle_41, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_42') - x2paddle_43 = fluid.layers.conv2d(x2paddle_42, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_15', - name='x2paddle_43', - bias_attr='x2paddle_16') + x2paddle_42 = fluid.layers.pad2d( + x2paddle_41, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_42') + x2paddle_43 = fluid.layers.conv2d( + x2paddle_42, + num_filters=256, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_15', + name='x2paddle_43', + bias_attr='x2paddle_16') x2paddle_44 = fluid.layers.relu(x2paddle_43, name='x2paddle_44') - x2paddle_45 = fluid.layers.pad2d(x2paddle_44, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_45') - x2paddle_46 = fluid.layers.conv2d(x2paddle_45, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_17', - name='x2paddle_46', - bias_attr='x2paddle_18') + x2paddle_45 = fluid.layers.pad2d( + x2paddle_44, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_45') + x2paddle_46 = fluid.layers.conv2d( + x2paddle_45, + num_filters=256, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_17', + name='x2paddle_46', + bias_attr='x2paddle_18') x2paddle_47 = fluid.layers.relu(x2paddle_46, name='x2paddle_47') - x2paddle_48 = fluid.layers.pool2d(x2paddle_47, - pool_size=[2, 2], - pool_type='max', - pool_stride=[2, 2], - pool_padding=[0, 0], - ceil_mode=False, - name='x2paddle_48', - exclusive=False) - x2paddle_49 = fluid.layers.pad2d(x2paddle_48, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_49') - x2paddle_50 = fluid.layers.conv2d(x2paddle_49, - num_filters=512, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_19', - name='x2paddle_50', - bias_attr='x2paddle_20') + x2paddle_48 = fluid.layers.pool2d( + x2paddle_47, + pool_size=[2, 2], + pool_type='max', + pool_stride=[2, 2], + pool_padding=[0, 0], + ceil_mode=False, + name='x2paddle_48', + exclusive=False) + x2paddle_49 = fluid.layers.pad2d( + x2paddle_48, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_49') + x2paddle_50 = fluid.layers.conv2d( + x2paddle_49, + num_filters=512, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_19', + name='x2paddle_50', + bias_attr='x2paddle_20') x2paddle_51 = fluid.layers.relu(x2paddle_50, name='x2paddle_51') return x2paddle_0, x2paddle_51 diff --git a/modules/image/Image_gan/style_transfer/stylepro_artistic/module.py b/modules/image/Image_gan/style_transfer/stylepro_artistic/module.py index 95ea138c..b6739014 100644 --- a/modules/image/Image_gan/style_transfer/stylepro_artistic/module.py +++ b/modules/image/Image_gan/style_transfer/stylepro_artistic/module.py @@ -140,13 +140,14 @@ class StyleProjection(hub.Module): encode_program, encode_feeded_var_names, encode_target_vars = fluid.io.load_inference_model( dirname=self.pretrained_encoder_net, executor=exe) - fluid.io.save_inference_model(dirname=dirname, - main_program=encode_program, - executor=exe, - feeded_var_names=encode_feeded_var_names, - target_vars=encode_target_vars, - model_filename=model_filename, - params_filename=params_filename) + fluid.io.save_inference_model( + dirname=dirname, + main_program=encode_program, + executor=exe, + feeded_var_names=encode_feeded_var_names, + target_vars=encode_target_vars, + model_filename=model_filename, + params_filename=params_filename) def _save_decode_model(self, dirname, model_filename=None, params_filename=None, combined=True): if combined: @@ -158,13 +159,14 @@ class StyleProjection(hub.Module): decode_program, decode_feeded_var_names, decode_target_vars = fluid.io.load_inference_model( dirname=self.pretrained_decoder_net, executor=exe) - fluid.io.save_inference_model(dirname=dirname, - main_program=decode_program, - executor=exe, - feeded_var_names=decode_feeded_var_names, - target_vars=decode_target_vars, - model_filename=model_filename, - params_filename=params_filename) + fluid.io.save_inference_model( + dirname=dirname, + main_program=decode_program, + executor=exe, + feeded_var_names=decode_feeded_var_names, + target_vars=decode_target_vars, + model_filename=model_filename, + params_filename=params_filename) @serving def serving_method(self, images, **kwargs): @@ -184,10 +186,11 @@ class StyleProjection(hub.Module): """ Run as a command. """ - self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( @@ -199,29 +202,20 @@ class StyleProjection(hub.Module): paths = [{'content': args.content, 'styles': args.styles.split(',')}] else: paths = [{'content': args.content, 'styles': args.styles.split(','), 'weights': list(args.weights)}] - results = self.style_transfer(paths=paths, - alpha=args.alpha, - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=True) + results = self.style_transfer( + paths=paths, alpha=args.alpha, use_gpu=args.use_gpu, output_dir=args.output_dir, visualization=True) return results def add_module_config_arg(self): """ Add the command config options. """ - self.arg_config_group.add_argument('--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument('--output_dir', - type=str, - default='transfer_result', - help="The directory to save output images.") - self.arg_config_group.add_argument('--visualization', - type=ast.literal_eval, - default=True, - help="whether to save output as images.") + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='transfer_result', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=True, help="whether to save output as images.") def add_module_input_arg(self): """ @@ -229,11 +223,7 @@ class StyleProjection(hub.Module): """ self.arg_input_group.add_argument('--content', type=str, help="path to content.") self.arg_input_group.add_argument('--styles', type=str, help="path to styles.") - self.arg_input_group.add_argument('--weights', - type=ast.literal_eval, - default=None, - help="interpolation weights of styles.") - self.arg_config_group.add_argument('--alpha', - type=ast.literal_eval, - default=1, - help="The parameter to control the tranform degree.") + self.arg_input_group.add_argument( + '--weights', type=ast.literal_eval, default=None, help="interpolation weights of styles.") + self.arg_config_group.add_argument( + '--alpha', type=ast.literal_eval, default=1, help="The parameter to control the tranform degree.") diff --git a/modules/image/keypoint_detection/hand_pose_localization/README.md b/modules/image/keypoint_detection/hand_pose_localization/README.md new file mode 100644 index 00000000..4e247d9a --- /dev/null +++ b/modules/image/keypoint_detection/hand_pose_localization/README.md @@ -0,0 +1,112 @@ +## 模型概述 +openpose 手部关键点检测模型 + +模型详情请参考[openpose开源项目](https://github.com/CMU-Perceptual-Computing-Lab/openpose) + +## 模型安装 + +```shell +$hub install hand_pose_localization +``` + +## API 说明 + +```python +def keypoint_detection( + self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False +) +``` + +预测API,识别出人体手部关键点。 + +![手部关键点](https://ai-studio-static-online.cdn.bcebos.com/97e1ae7c1e68477d85b37f53ee997fbc4ef0fc12c7634301bc08749bd003cac0) + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\], 默认设为 None; +* paths (list\[str\]): 图片的路径, 默认设为 None; +* batch\_size (int): batch 的大小,默认设为 1; +* visualization (bool): 是否将识别结果保存为图片文件,默认设为 False; +* output\_dir (str): 图片的保存路径,默认设为 output。 + +**返回** + +* res (list[list[list[int]]]): 每张图片识别到的21个手部关键点组成的列表,每个关键点的格式为[x, y],若有关键点未识别到则为None + + +## 预测代码示例 + +```python +import cv2 +import paddlehub as hub + +# use_gpu:是否使用GPU进行预测 +model = hub.Module(name='hand_pose_localization', use_gpu=False) + +# 调用关键点检测API +result = model.keypoint_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) + +# or +# result = model.keypoint_detection(paths=['/PATH/TO/IMAGE']) + +# 打印预测结果 +print(result) +``` + +## 服务部署 + +PaddleHub Serving可以部署一个在线人体手部关键点检测服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: +```shell +$ hub serving start -m hand_pose_localization +``` + +这样就完成了一个人体手部关键点检测的在线服务API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import cv2 +import base64 + +# 图片Base64编码函数 +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + +# 发送HTTP请求 +data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/hand_pose_localization" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +# 打印预测结果 +print(r.json()["results"]) +``` + + +## 模型相关信息 + +### 模型代码 + +https://github.com/CMU-Perceptual-Computing-Lab/openpose + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.8.0 diff --git a/modules/image/keypoint_detection/hand_pose_localization/model.py b/modules/image/keypoint_detection/hand_pose_localization/model.py new file mode 100644 index 00000000..05c40ac7 --- /dev/null +++ b/modules/image/keypoint_detection/hand_pose_localization/model.py @@ -0,0 +1,71 @@ +import os +import numpy as np + +from paddle.fluid.core import AnalysisConfig, create_paddle_predictor + +__all__ = ['Model'] + + +class Model(): + # 初始化函数 + def __init__(self, modelpath, use_gpu): + # 加载模型预测器 + self.predictor = self.load_model(modelpath, use_gpu) + + # 获取模型的输入输出 + self.input_names = self.predictor.get_input_names() + self.output_names = self.predictor.get_output_names() + self.input_tensor = self.predictor.get_input_tensor(self.input_names[0]) + self.output_tensor = self.predictor.get_output_tensor(self.output_names[0]) + + # 模型加载函数 + def load_model(self, modelpath, use_gpu): + # 对运行位置进行配置 + if use_gpu: + try: + places = os.environ["CUDA_VISIBLE_DEVICES"] + places = int(places[0]) + except Exception as e: + print('Error: %s. Please set the environment variables "CUDA_VISIBLE_DEVICES".' % e) + use_gpu = False + + # 预训练模型路径 + model = os.path.join(modelpath, "__model__") + params = os.path.join(modelpath, "__params__") + + # 加载模型参数 + config = AnalysisConfig(model, params) + + # 设置参数 + if use_gpu: + config.enable_use_gpu(100, places) + else: + config.disable_gpu() + config.enable_mkldnn() + config.disable_glog_info() + config.switch_ir_optim(True) + config.switch_use_feed_fetch_ops(False) + config.switch_specify_input_names(True) + + # 通过参数加载模型预测器 + predictor = create_paddle_predictor(config) + + # 返回预测器 + return predictor + + # 模型预测函数 + def predict(self, input_datas): + outputs = [] + + # 遍历输入数据进行预测 + for input_data in input_datas: + self.input_tensor.copy_from_cpu(input_data) + self.predictor.zero_copy_run() + output = self.output_tensor.copy_to_cpu() + outputs.append(output) + + # 预测结果合并 + outputs = np.concatenate(outputs, 0) + + # 返回预测结果 + return outputs diff --git a/modules/image/keypoint_detection/hand_pose_localization/module.py b/modules/image/keypoint_detection/hand_pose_localization/module.py new file mode 100644 index 00000000..26ff7f29 --- /dev/null +++ b/modules/image/keypoint_detection/hand_pose_localization/module.py @@ -0,0 +1,50 @@ +# coding=utf-8 +import os + +from paddlehub import Module +from paddlehub.module.module import moduleinfo, serving + +from hand_pose_localization.model import Model +from hand_pose_localization.processor import base64_to_cv2, Processor + + +@moduleinfo( + name="hand_pose_localization", # 模型名称 + type="CV/keypoint_detection", # 模型类型 + author="jm12138", # 作者名称 + author_email="jm12138@qq.com", # 作者邮箱 + summary="hand_pose_localization", # 模型介绍 + version="1.0.0" # 版本号 +) +class Hand_Pose_Localization(Module): + # 初始化函数 + def _initialize(self, use_gpu=False): + # 设置模型路径 + self.model_path = os.path.join(self.directory, "hand_pose_localization") + + # 加载模型 + self.model = Model(self.model_path, use_gpu) + + # 关键点检测函数 + def keypoint_detection(self, images=None, paths=None, batch_size=1, output_dir='output', visualization=False): + # 加载数据处理器 + processor = Processor(images, paths, batch_size, output_dir) + + # 模型预测 + outputs = self.model.predict(processor.input_datas) + + # 结果后处理 + results = processor.postprocess(outputs, visualization) + + # 返回结果 + return results + + # Hub Serving + @serving + def serving_method(self, images, **kwargs): + # 获取输入数据 + images_decode = [base64_to_cv2(image) for image in images] + # 关键点检测 + results = self.keypoint_detection(images_decode, **kwargs) + # 返回结果 + return results diff --git a/modules/image/keypoint_detection/hand_pose_localization/processor.py b/modules/image/keypoint_detection/hand_pose_localization/processor.py new file mode 100644 index 00000000..99aed8ea --- /dev/null +++ b/modules/image/keypoint_detection/hand_pose_localization/processor.py @@ -0,0 +1,129 @@ +import os +import cv2 +import time +import base64 +import numpy as np + +__all__ = ['base64_to_cv2', 'Processor'] + + +def check_dir(dir_path): + # 目录检查函数 + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def base64_to_cv2(b64str): + # base64转cv2函数 + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +class Processor(): + # 初始化函数 + def __init__(self, images=None, paths=None, batch_size=1, output_dir='output'): + # 变量设置 + self.num_points = 21 + self.inHeight = 368 + self.threshold = 0.1 + self.point_pairs = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], [10, 11], + [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]] + + self.images = images + self.paths = paths + self.batch_size = batch_size + self.output_dir = output_dir + + # 获取原始输入数据 + self.datas = self.load_datas() + + # 对原始输入数据进行预处理 + self.input_datas = self.preprocess() + + # 读取数据函数 + def load_datas(self): + datas = [] + + # 读取数据列表 + if self.paths is not None: + for im_path in self.paths: + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path).astype('float32') + datas.append(im) + + if self.images is not None: + datas = self.images + + # 返回数据列表 + return datas + + # 数据预处理函数 + def preprocess(self): + input_datas = [] + + # 数据预处理 + for i, img in enumerate(self.datas): + img_height, img_width, _ = img.shape + aspect_ratio = img_width / img_height + inWidth = int(((aspect_ratio * self.inHeight) * 8) // 8) + inpBlob = cv2.dnn.blobFromImage( + img, 1.0 / 255, (inWidth, self.inHeight), (0, 0, 0), swapRB=False, crop=False) + input_datas.append(inpBlob) + + # 数据按batch_size切分 + input_datas = np.concatenate(input_datas, 0) + split_num = len(self.datas) // self.batch_size + 1 if len(self.datas) % self.batch_size != 0 else len( + self.datas) // self.batch_size + input_datas = np.array_split(input_datas, split_num) + + # 返回预处理完成的数据 + return input_datas + + # 结果后处理函数 + def postprocess(self, outputs, visualization): + all_points = [] + + # 结果后处理 + for im_id, img in enumerate(self.datas): + points = [] + for idx in range(self.num_points): + probMap = outputs[im_id, idx, :, :] + img_height, img_width, _ = img.shape + probMap = cv2.resize(probMap, (img_width, img_height)) + minVal, prob, minLoc, point = cv2.minMaxLoc(probMap) + + if prob > self.threshold: + points.append([int(point[0]), int(point[1])]) + else: + points.append(None) + + all_points.append(points) + + # 结果可视化 + if visualization: + # 检查输出目录 + check_dir(self.output_dir) + # 结果可视化 + self.vis_pose(img, points, im_id) + + # 返回后处理结果 + return all_points + + # 结果可视化 + def vis_pose(self, img, points, im_id): + # 根据结果绘制关键点到原图像上 + for pair in self.point_pairs: + partA = pair[0] + partB = pair[1] + + if points[partA] and points[partB]: + cv2.line(img, tuple(points[partA]), tuple(points[partB]), (0, 255, 255), 3) + cv2.circle(img, tuple(points[partA]), 8, (0, 0, 255), thickness=-1, lineType=cv2.FILLED) + + # 可视化图像保存 + cv2.imwrite(os.path.join(self.output_dir, '%d_%d.jpg' % (im_id, time.time())), img) diff --git a/modules/image/semantic_segmentation/humanseg_lite/README.md b/modules/image/semantic_segmentation/humanseg_lite/README.md new file mode 100644 index 00000000..1ddb1e18 --- /dev/null +++ b/modules/image/semantic_segmentation/humanseg_lite/README.md @@ -0,0 +1,205 @@ +## 模型概述 + +HumanSeg_lite是基于ShuffleNetV2网络结构的基础上进行优化的人像分割模型,进一步减小了网络规模,网络大小只有541K,量化后只有187K,适用于手机自拍人像分割等实时分割场景。 + + +## 命令行预测 + +``` +hub run humanseg_lite --input_path "/PATH/TO/IMAGE" + +``` + + +## API + +```python +def segment(images=None, + paths=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='humanseg_lite_output') +``` + +预测API,用于人像分割。 + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; +* paths (list\[str\]): 图片的路径; +* batch\_size (int): batch 的大小; +* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; +* visualization (bool): 是否将识别结果保存为图片文件; +* output\_dir (str): 图片的保存路径。 + +**返回** + +* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: + * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); + * data (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-255 (0为全透明,255为不透明),也即取值越大的像素点越可能为人体,取值越小的像素点越可能为背景。 + +```python +def video_stream_segment(self, + frame_org, + frame_id, + prev_gray, + prev_cfd, + use_gpu=False): +``` + +预测API,用于逐帧对视频人像分割。 + +**参数** + +* frame_org (numpy.ndarray): 单帧图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; +* frame_id (int): 当前帧的编号; +* prev_gray (numpy.ndarray): 前一帧输入网络图像的灰度图; +* prev_cfd (numpy.ndarray): 前一帧光流追踪图和预测结果融合图 +* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; + + +**返回** + +* img_matting (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-1 (0为全透明,1为不透明)。 +* cur_gray (numpy.ndarray): 当前帧输入网络图像的灰度图; +* optflow_map (numpy.ndarray): 当前帧光流追踪图和预测结果融合图 + + +```python +def video_segment(self, + video_path=None, + use_gpu=False, + save_dir='humanseg_lite_video_result'): +``` + +预测API,用于视频人像分割。 + +**参数** + +* video\_path (str): 待分割视频路径。若为None,则从本地摄像头获取视频,并弹出窗口显示在线分割结果。 +* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; +* save\_dir (str): 视频保存路径,仅在video\_path不为None时启用,保存离线视频处理结果。 + + +```python +def save_inference_model(dirname='humanseg_lite_model', + model_filename=None, + params_filename=None, + combined=True) +``` + +将模型保存到指定路径。 + +**参数** + +* dirname: 存在模型的目录名称 +* model\_filename: 模型文件名称,默认为\_\_model\_\_ +* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) +* combined: 是否将参数保存到统一的一个文件中 + +## 代码示例 + +图片分割及视频分割代码示例: +```python +import cv2 +import paddlehub as hub + +human_seg = hub.Module(name='humanseg_lite') +im = cv2.imread('/PATH/TO/IMAGE') +#visualization=True可以用于查看超分图片效果,可设置为False提升运行速度。 +res = human_seg.segment(images=[im],visualization=True) +print(res[0]['data']) +human_seg.video_segment('/PATH/TO/VIDEO') +human_seg.save_inference_model('/PATH/TO/SAVE/MODEL') + +``` +视频流预测代码示例: +```python +import cv2 +import numpy as np +import paddlehub as hub + +human_seg = hub.Module('humanseg_lite') +cap_video = cv2.VideoCapture('\PATH\TO\VIDEO') +fps = cap_video.get(cv2.CAP_PROP_FPS) +save_path = 'humanseg_lite_video.avi' +width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) +height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) +cap_out = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) +prev_gray = None +prev_cfd = None +while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + [img_matting, prev_gray, prev_cfd] = human_seg.video_stream_segment(frame_org=frame_org, frame_id=cap_video.get(1), prev_gray=prev_gray, prev_cfd=prev_cfd) + img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) + cap_out.write(comb) + else: + break + +cap_video.release() +cap_out.release() + +``` +## 服务部署 + +PaddleHub Serving可以部署一个人像分割的在线服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: +```shell +$ hub serving start -m humanseg_lite +``` + +这样就完成了一个人像分割的服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import base64 + +import cv2 +import numpy as np + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/humanseg_lite" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +# 保存图片 +mask =cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY) +rgba = np.concatenate((org_im, np.expand_dims(mask, axis=2)), axis=2) +cv2.imwrite("segment_human_lite.png", rgba) +``` +### 查看代码 + +https://github.com/PaddlePaddle/PaddleSeg/tree/develop/contrib/HumanSeg + + + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.7.1 diff --git a/modules/image/semantic_segmentation/humanseg_lite/__init__.py b/modules/image/semantic_segmentation/humanseg_lite/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modules/image/semantic_segmentation/humanseg_lite/data_feed.py b/modules/image/semantic_segmentation/humanseg_lite/data_feed.py new file mode 100644 index 00000000..7f903397 --- /dev/null +++ b/modules/image/semantic_segmentation/humanseg_lite/data_feed.py @@ -0,0 +1,63 @@ +# -*- coding:utf-8 -*- +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader', 'preprocess_v'] + + +def preprocess_v(img, w, h): + img = cv2.resize(img, (w, h), cv2.INTER_LINEAR).astype(np.float32) + img_mean = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img_std = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img = img.transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + #print(im_path) + im = cv2.imread(im_path).astype('float32') + each['org_im'] = im + each['org_im_path'] = im_path + each['org_im_shape'] = im.shape + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + each = OrderedDict() + each['org_im'] = im + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_shape'] = im.shape + component.append(each) + + for element in component: + img = element['org_im'].copy() + img = cv2.resize(img, (192, 192)).astype(np.float32) + img_mean = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img_std = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img = img.transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + element['image'] = img + yield element diff --git a/modules/image/semantic_segmentation/humanseg_lite/module.py b/modules/image/semantic_segmentation/humanseg_lite/module.py new file mode 100644 index 00000000..b8ba8685 --- /dev/null +++ b/modules/image/semantic_segmentation/humanseg_lite/module.py @@ -0,0 +1,396 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import ast +import os +import os.path as osp +import argparse + +import cv2 +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from humanseg_lite.processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir +from humanseg_lite.data_feed import reader, preprocess_v +from humanseg_lite.optimal import postprocess_v, threshold_mask + + +@moduleinfo( + name="humanseg_lite", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="humanseg_lite is a semantic segmentation model.", + version="1.1.0") +class ShufflenetHumanSeg(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "humanseg_lite_inference") + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + self.model_file_path = os.path.join(self.default_pretrained_model_path, '__model__') + self.params_file_path = os.path.join(self.default_pretrained_model_path, '__params__') + cpu_config = AnalysisConfig(self.model_file_path, self.params_file_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + + if use_gpu: + gpu_config = AnalysisConfig(self.model_file_path, self.params_file_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def segment(self, + images=None, + paths=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='humanseg_lite_output'): + """ + API for human segmentation. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + visualization (bool): Whether to save image or not. + output_dir (str): The path to store output images. + + Returns: + res (list[dict]): each element in the list is a dict, the keys and values are: + save_path (str, optional): the path to save images. (Exists only if visualization is True) + data (numpy.ndarray): data of post processed image. + """ + + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + batch_image = PaddleTensor(batch_image.copy()) + output = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run([batch_image]) + output = output[1].as_ndarray() + output = np.expand_dims(output[:, 1, :, :], axis=1) + # postprocess one by one + for i in range(len(batch_data)): + out = postprocess( + data_out=output[i], + org_im=batch_data[i]['org_im'], + org_im_shape=batch_data[i]['org_im_shape'], + org_im_path=batch_data[i]['org_im_path'], + output_dir=output_dir, + visualization=visualization) + res.append(out) + return res + + def video_stream_segment(self, frame_org, frame_id, prev_gray, prev_cfd, use_gpu=False): + """ + API for human video segmentation. + + Args: + frame_org (numpy.ndarray): frame data, shape of each is [H, W, C], the color space is BGR. + frame_id (int): index of the frame to be decoded. + prev_gray (numpy.ndarray): gray scale image of last frame, shape of each is [H, W] + prev_cfd (numpy.ndarray): fusion image from optical flow image and segment result, shape of each is [H, W] + use_gpu (bool): Whether to use gpu. + + Returns: + img_matting (numpy.ndarray): data of segmentation mask. + cur_gray (numpy.ndarray): gray scale image of current frame, shape of each is [H, W] + optflow_map (numpy.ndarray): optical flow image of current frame, shape of each is [H, W] + + """ + resize_h = 192 + resize_w = 192 + is_init = True + width = int(frame_org.shape[0]) + height = int(frame_org.shape[1]) + disflow = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) + frame = preprocess_v(frame_org, resize_w, resize_h) + image = PaddleTensor(np.array([frame.copy()])) + output = self.gpu_predictor.run([image]) if use_gpu else self.cpu_predictor.run([image]) + score_map = output[1].as_ndarray() + frame = np.transpose(frame, axes=[1, 2, 0]) + score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) + cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) + score_map = 255 * score_map[:, :, 1] + if frame_id == 1: + prev_gray = np.zeros((resize_h, resize_w), np.uint8) + prev_cfd = np.zeros((resize_h, resize_w), np.float32) + optflow_map = postprocess_v(cur_gray, score_map, prev_gray, prev_cfd, disflow, is_init) + else: + optflow_map = postprocess_v(cur_gray, score_map, prev_gray, prev_cfd, disflow, is_init) + + optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) + optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8) + img_matting = cv2.resize(optflow_map, (height, width), cv2.INTER_LINEAR) + + return [img_matting, cur_gray, optflow_map] + + def video_segment(self, video_path=None, use_gpu=False, save_dir='humanseg_lite_video_result'): + """ + API for human video segmentation. + + Args: + video_path (str): The path to take the video under preprocess. If video_path is None, it will capture + the vedio from your camera. + use_gpu (bool): Whether to use gpu. + save_dir (str): The path to store output video. + + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError("Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. " + "If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id.") + + resize_h = 192 + resize_w = 192 + if not video_path: + cap_video = cv2.VideoCapture(0) + else: + cap_video = cv2.VideoCapture(video_path) + + if not cap_video.isOpened(): + raise IOError("Error opening video stream or file, " + "--video_path whether existing: {}" + " or camera whether working".format(video_path)) + + width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + disflow = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) + prev_gray = np.zeros((resize_h, resize_w), np.uint8) + prev_cfd = np.zeros((resize_h, resize_w), np.float32) + is_init = True + fps = cap_video.get(cv2.CAP_PROP_FPS) + + if video_path is not None: + print('Please wait. It is computing......') + if not osp.exists(save_dir): + os.makedirs(save_dir) + save_path = osp.join(save_dir, 'result' + '.avi') + cap_out = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) + + while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + frame = preprocess_v(frame_org, resize_w, resize_h) + image = PaddleTensor(np.array([frame.copy()])) + output = self.gpu_predictor.run([image]) if use_gpu else self.cpu_predictor.run([image]) + score_map = output[1].as_ndarray() + frame = np.transpose(frame, axes=[1, 2, 0]) + score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) + cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) + score_map = 255 * score_map[:, :, 1] + optflow_map = postprocess_v(cur_gray, score_map, prev_gray, prev_cfd, disflow, is_init) + prev_gray = cur_gray.copy() + prev_cfd = optflow_map.copy() + + optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) + optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8) + img_matting = cv2.resize(optflow_map, (width, height), cv2.INTER_LINEAR) + img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) + cap_out.write(comb) + else: + break + cap_video.release() + cap_out.release() + else: + while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + frame = preprocess_v(frame_org, resize_w, resize_h) + image = PaddleTensor(np.array([frame.copy()])) + output = self.gpu_predictor.run([image]) if use_gpu else self.cpu_predictor.run([image]) + score_map = output[1].as_ndarray() + frame = np.transpose(frame, axes=[1, 2, 0]) + score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) + cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) + score_map = 255 * score_map[:, :, 1] + optflow_map = postprocess_v(cur_gray, score_map, prev_gray, prev_cfd, disflow, is_init) + prev_gray = cur_gray.copy() + prev_cfd = optflow_map.copy() + optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) + optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8) + img_matting = cv2.resize(optflow_map, (width, height), cv2.INTER_LINEAR) + img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) + cv2.imshow('HumanSegmentation', comb) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + else: + break + cap_video.release() + + def save_inference_model(self, + dirname='humanseg_lite_model', + model_filename=None, + params_filename=None, + combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, + model_filename=model_filename, + params_filename=params_filename, + executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.segment(images=images_decode, **kwargs) + results = [{'data': cv2_to_base64(result['data'])} for result in results] + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.segment( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) + if args.save_dir is not None: + check_dir(args.save_dir) + self.save_inference_model(args.save_dir) + + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='humanseg_lite_output', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--save_dir', type=str, default='humanseg_lite_model', help="The directory to save model.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + + +if __name__ == "__main__": + m = ShufflenetHumanSeg() + #shuffle.video_segment() + img = cv2.imread('photo.jpg') + # res = m.segment(images=[img], visualization=True) + # print(res[0]['data']) + # m.video_segment('') + cap_video = cv2.VideoCapture('video_test.mp4') + fps = cap_video.get(cv2.CAP_PROP_FPS) + save_path = 'result_frame.avi' + width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + cap_out = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) + prev_gray = None + prev_cfd = None + while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + [img_matting, prev_gray, prev_cfd] = m.video_stream_segment( + frame_org=frame_org, frame_id=cap_video.get(1), prev_gray=prev_gray, prev_cfd=prev_cfd) + img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) + cap_out.write(comb) + else: + break + + cap_video.release() + cap_out.release() diff --git a/modules/image/semantic_segmentation/humanseg_lite/optimal.py b/modules/image/semantic_segmentation/humanseg_lite/optimal.py new file mode 100644 index 00000000..da7aa514 --- /dev/null +++ b/modules/image/semantic_segmentation/humanseg_lite/optimal.py @@ -0,0 +1,103 @@ +# -*- coding:utf-8 -* +import numpy as np + + +def human_seg_tracking(pre_gray, cur_gray, prev_cfd, dl_weights, disflow): + """计算光流跟踪匹配点和光流图 + 输入参数: + pre_gray: 上一帧灰度图 + cur_gray: 当前帧灰度图 + prev_cfd: 上一帧光流图 + dl_weights: 融合权重图 + disflow: 光流数据结构 + 返回值: + is_track: 光流点跟踪二值图,即是否具有光流点匹配 + track_cfd: 光流跟踪图 + """ + check_thres = 8 + h, w = pre_gray.shape[:2] + track_cfd = np.zeros_like(prev_cfd) + is_track = np.zeros_like(pre_gray) + flow_fw = disflow.calc(pre_gray, cur_gray, None) + flow_bw = disflow.calc(cur_gray, pre_gray, None) + flow_fw = np.round(flow_fw).astype(np.int) + flow_bw = np.round(flow_bw).astype(np.int) + y_list = np.array(range(h)) + x_list = np.array(range(w)) + yv, xv = np.meshgrid(y_list, x_list) + yv, xv = yv.T, xv.T + cur_x = xv + flow_fw[:, :, 0] + cur_y = yv + flow_fw[:, :, 1] + + # 超出边界不跟踪 + not_track = (cur_x < 0) + (cur_x >= w) + (cur_y < 0) + (cur_y >= h) + flow_bw[~not_track] = flow_bw[cur_y[~not_track], cur_x[~not_track]] + not_track += ( + np.square(flow_fw[:, :, 0] + flow_bw[:, :, 0]) + np.square(flow_fw[:, :, 1] + flow_bw[:, :, 1])) >= check_thres + track_cfd[cur_y[~not_track], cur_x[~not_track]] = prev_cfd[~not_track] + + is_track[cur_y[~not_track], cur_x[~not_track]] = 1 + + not_flow = np.all(np.abs(flow_fw) == 0, axis=-1) * np.all(np.abs(flow_bw) == 0, axis=-1) + dl_weights[cur_y[not_flow], cur_x[not_flow]] = 0.05 + return track_cfd, is_track, dl_weights + + +def human_seg_track_fuse(track_cfd, dl_cfd, dl_weights, is_track): + """光流追踪图和人像分割结构融合 + 输入参数: + track_cfd: 光流追踪图 + dl_cfd: 当前帧分割结果 + dl_weights: 融合权重图 + is_track: 光流点匹配二值图 + 返回 + cur_cfd: 光流跟踪图和人像分割结果融合图 + """ + fusion_cfd = dl_cfd.copy() + is_track = is_track.astype(np.bool) + fusion_cfd[is_track] = dl_weights[is_track] * dl_cfd[is_track] + (1 - dl_weights[is_track]) * track_cfd[is_track] + # 确定区域 + index_certain = ((dl_cfd > 0.9) + (dl_cfd < 0.1)) * is_track + index_less01 = (dl_weights < 0.1) * index_certain + fusion_cfd[index_less01] = 0.3 * dl_cfd[index_less01] + 0.7 * track_cfd[index_less01] + index_larger09 = (dl_weights >= 0.1) * index_certain + fusion_cfd[index_larger09] = 0.4 * dl_cfd[index_larger09] + 0.6 * track_cfd[index_larger09] + return fusion_cfd + + +def threshold_mask(img, thresh_bg, thresh_fg): + dst = (img / 255.0 - thresh_bg) / (thresh_fg - thresh_bg) + dst[np.where(dst > 1)] = 1 + dst[np.where(dst < 0)] = 0 + return dst.astype(np.float32) + + +def postprocess_v(cur_gray, scoremap, prev_gray, pre_cfd, disflow, is_init): + """光流优化 + Args: + cur_gray : 当前帧灰度图 + pre_gray : 前一帧灰度图 + pre_cfd :前一帧融合结果 + scoremap : 当前帧分割结果 + difflow : 光流 + is_init : 是否第一帧 + Returns: + fusion_cfd : 光流追踪图和预测结果融合图 + """ + h, w = scoremap.shape + cur_cfd = scoremap.copy() + + if is_init: + if h <= 64 or w <= 64: + disflow.setFinestScale(1) + elif h <= 160 or w <= 160: + disflow.setFinestScale(2) + else: + disflow.setFinestScale(3) + fusion_cfd = cur_cfd + else: + weights = np.ones((h, w), np.float32) * 0.3 + track_cfd, is_track, weights = human_seg_tracking(prev_gray, cur_gray, pre_cfd, weights, disflow) + fusion_cfd = human_seg_track_fuse(track_cfd, cur_cfd, weights, is_track) + + return fusion_cfd diff --git a/modules/image/semantic_segmentation/humanseg_lite/processor.py b/modules/image/semantic_segmentation/humanseg_lite/processor.py new file mode 100644 index 00000000..e4911ff4 --- /dev/null +++ b/modules/image/semantic_segmentation/humanseg_lite/processor.py @@ -0,0 +1,78 @@ +# -*- coding:utf-8 -*- +import os +import time +import base64 + +import cv2 +import numpy as np + +__all__ = ['cv2_to_base64', 'base64_to_cv2', 'postprocess'] + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def postprocess(data_out, org_im, org_im_shape, org_im_path, output_dir, visualization): + """ + Postprocess output of network. one image at a time. + + Args: + data_out (numpy.ndarray): output of network. + org_im (numpy.ndarray): original image. + org_im_shape (list): shape pf original image. + org_im_path (list): path of riginal image. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + + Returns: + result (dict): The data of processed image. + """ + result = dict() + for logit in data_out: + logit = (logit * 255).astype(np.uint8) + logit = cv2.resize(logit, (org_im_shape[1], org_im_shape[0])) + rgba = np.concatenate((org_im, np.expand_dims(logit, axis=2)), axis=2) + + if visualization: + check_dir(output_dir) + save_im_path = get_save_image_name(org_im, org_im_path, output_dir) + cv2.imwrite(save_im_path, rgba) + result['save_path'] = save_im_path + result['data'] = logit + else: + result['data'] = logit + print("result['data'] shape", result['data'].shape) + return result + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(org_im, org_im_path, output_dir): + """ + Get save image name from source image path. + """ + # name prefix of orginal image + org_im_name = os.path.split(org_im_path)[-1] + im_prefix = os.path.splitext(org_im_name)[0] + ext = '.png' + # save image path + save_im_path = os.path.join(output_dir, im_prefix + ext) + if os.path.exists(save_im_path): + save_im_path = os.path.join(output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) + + return save_im_path diff --git a/modules/image/semantic_segmentation/humanseg_mobile/README.md b/modules/image/semantic_segmentation/humanseg_mobile/README.md new file mode 100644 index 00000000..ae767d34 --- /dev/null +++ b/modules/image/semantic_segmentation/humanseg_mobile/README.md @@ -0,0 +1,208 @@ +## 模型概述 + +HumanSeg-mobile是基于HRNet(Deep High-Resolution Representation Learning for Visual Recognition)的人像分割网络。HRNet在特征提取过程中保持了高分辨率的信息,保持了物体的细节信息,并可通过控制每个分支的通道数调整模型的大小。HumanSeg-mobile采用了HRNet_w18_small_v1的网络结构,模型大小只有5.8M, 适用于移动端或服务端CPU的前置摄像头场景。 + +## 命令行预测 + +``` +hub run humanseg_mobile --input_path "/PATH/TO/IMAGE" + +``` + +## API + +```python +def segment(images=None, + paths=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='humanseg_mobile_output') +``` + +预测API,用于人像分割。 + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; +* paths (list\[str\]): 图片的路径; +* batch\_size (int): batch 的大小; +* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; +* visualization (bool): 是否将识别结果保存为图片文件; +* output\_dir (str): 图片的保存路径。 + +**返回** + +* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: + * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); + * data (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-255 (0为全透明,255为不透明),也即取值越大的像素点越可能为人体,取值越小的像素点越可能为背景。 + + +```python +def video_stream_segment(self, + frame_org, + frame_id, + prev_gray, + prev_cfd, + use_gpu=False): +``` + +预测API,用于逐帧对视频人像分割。 + +**参数** + +* frame_org (numpy.ndarray): 单帧图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; +* frame_id (int): 当前帧的编号; +* prev_gray (numpy.ndarray): 前一帧输入网络图像的灰度图; +* prev_cfd (numpy.ndarray): 前一帧光流追踪图和预测结果融合图 +* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; + + +**返回** + +* img_matting (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-1 (0为全透明,1为不透明)。 +* cur_gray (numpy.ndarray): 当前帧输入网络图像的灰度图; +* optflow_map (numpy.ndarray): 当前帧光流追踪图和预测结果融合图 + + +```python +def video_segment(self, + video_path=None, + use_gpu=False, + save_dir='humanseg_mobile_video_result'): +``` + +预测API,用于视频人像分割。 + +**参数** + +* video\_path (str): 待分割视频路径。若为None,则从本地摄像头获取视频,并弹出窗口显示在线分割结果。 +* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; +* save\_dir (str): 视频保存路径,仅在video\_path不为None时启用,保存离线视频处理结果。 + + +```python +def save_inference_model(dirname='humanseg_mobile_model', + model_filename=None, + params_filename=None, + combined=True) +``` + +将模型保存到指定路径。 + +**参数** + +* dirname: 存在模型的目录名称 +* model\_filename: 模型文件名称,默认为\_\_model\_\_ +* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) +* combined: 是否将参数保存到统一的一个文件中 + +## 代码示例 + +图片分割及视频分割代码示例: + +```python +import cv2 +import paddlehub as hub + +human_seg = hub.Module(name='humanseg_mobile') +im = cv2.imread('/PATH/TO/IMAGE') +#visualization=True可以用于查看超分图片效果,可设置为False提升运行速度。 +res = human_seg.segment(images=[im],visualization=True) +print(res[0]['data']) +human_seg.video_segment('/PATH/TO/VIDEO') +human_seg.save_inference_model('/PATH/TO/SAVE/MODEL') + +``` +视频流预测代码示例: + +```python +import cv2 +import numpy as np +import paddlehub as hub + +human_seg = hub.Module('humanseg_mobile') +cap_video = cv2.VideoCapture('\PATH\TO\VIDEO') +fps = cap_video.get(cv2.CAP_PROP_FPS) +save_path = 'humanseg_mobile_video.avi' +width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) +height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) +cap_out = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) +prev_gray = None +prev_cfd = None +while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + [img_matting, prev_gray, prev_cfd] = human_seg.video_stream_segment(frame_org=frame_org, frame_id=cap_video.get(1), prev_gray=prev_gray, prev_cfd=prev_cfd) + img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) + cap_out.write(comb) + else: + break + +cap_video.release() +cap_out.release() + +``` + +## 服务部署 + +PaddleHub Serving可以部署一个人像分割的在线服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: + +```shell +$ hub serving start -m humanseg_mobile +``` + +这样就完成了一个人像分割的服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import base64 + +import cv2 +import numpy as np + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('/PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/humanseg_mobile" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +# 保存图片 +mask =cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY) +rgba = np.concatenate((org_im, np.expand_dims(mask, axis=2)), axis=2) +cv2.imwrite("segment_human_mobile.png", rgba) +``` + +### 查看代码 + + + + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.7.1 diff --git a/modules/image/semantic_segmentation/humanseg_mobile/__init__.py b/modules/image/semantic_segmentation/humanseg_mobile/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modules/image/semantic_segmentation/humanseg_mobile/data_feed.py b/modules/image/semantic_segmentation/humanseg_mobile/data_feed.py new file mode 100644 index 00000000..f7fbb0e2 --- /dev/null +++ b/modules/image/semantic_segmentation/humanseg_mobile/data_feed.py @@ -0,0 +1,62 @@ +# -*- coding:utf-8 -*- +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np + +__all__ = ['reader', 'preprocess_v'] + + +def preprocess_v(img, w, h): + img = cv2.resize(img, (w, h), cv2.INTER_LINEAR).astype(np.float32) + img_mean = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img_std = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img = img.transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + #print(im_path) + im = cv2.imread(im_path).astype('float32') + each['org_im'] = im + each['org_im_path'] = im_path + each['org_im_shape'] = im.shape + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + each = OrderedDict() + each['org_im'] = im + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_shape'] = im.shape + component.append(each) + + for element in component: + img = element['org_im'].copy() + img = cv2.resize(img, (192, 192)).astype(np.float32) + img_mean = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img_std = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img = img.transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + element['image'] = img + yield element diff --git a/modules/image/semantic_segmentation/humanseg_mobile/module.py b/modules/image/semantic_segmentation/humanseg_mobile/module.py new file mode 100644 index 00000000..f7ac6796 --- /dev/null +++ b/modules/image/semantic_segmentation/humanseg_mobile/module.py @@ -0,0 +1,380 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import ast +import os +import os.path as osp +import argparse + +import cv2 +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from humanseg_mobile.processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir +from humanseg_mobile.data_feed import reader, preprocess_v +from humanseg_mobile.optimal import postprocess_v, threshold_mask + + +@moduleinfo( + name="humanseg_mobile", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="HRNet_w18_samll_v1 is a semantic segmentation model.", + version="1.1.0") +class HRNetw18samllv1humanseg(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "humanseg_mobile_inference") + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + self.model_file_path = os.path.join(self.default_pretrained_model_path, '__model__') + self.params_file_path = os.path.join(self.default_pretrained_model_path, '__params__') + cpu_config = AnalysisConfig(self.model_file_path, self.params_file_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.model_file_path, self.params_file_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def segment(self, + images=None, + paths=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='humanseg_mobile_output'): + """ + API for human segmentation. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + visualization (bool): Whether to save image or not. + output_dir (str): The path to store output images. + + Returns: + res (list[dict]): each element in the list is a dict, the keys and values are: + save_path (str, optional): the path to save images. (Exists only if visualization is True) + data (numpy.ndarray): data of post processed image. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError("Environment Variable CUDA_VISIBLE_DEVICES is not set correctly." + "If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id.") + + # compatibility with older versions + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + batch_image = PaddleTensor(batch_image.copy()) + output = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run([batch_image]) + output = output[1].as_ndarray() + output = np.expand_dims(output[:, 1, :, :], axis=1) + # postprocess one by one + for i in range(len(batch_data)): + out = postprocess( + data_out=output[i], + org_im=batch_data[i]['org_im'], + org_im_shape=batch_data[i]['org_im_shape'], + org_im_path=batch_data[i]['org_im_path'], + output_dir=output_dir, + visualization=visualization) + res.append(out) + return res + + def video_stream_segment(self, frame_org, frame_id, prev_gray, prev_cfd, use_gpu=False): + """ + API for human video segmentation. + + Args: + frame_org (numpy.ndarray): frame data, shape of each is [H, W, C], the color space is BGR. + frame_id (int): index of the frame to be decoded. + prev_gray (numpy.ndarray): gray scale image of last frame, shape of each is [H, W] + prev_cfd (numpy.ndarray): fusion image from optical flow image and segment result, shape of each is [H, W] + use_gpu (bool): Whether to use gpu. + + Returns: + img_matting (numpy.ndarray): data of segmentation mask. + cur_gray (numpy.ndarray): gray scale image of current frame, shape of each is [H, W] + optflow_map (numpy.ndarray): optical flow image of current frame, shape of each is [H, W] + + """ + resize_h = 192 + resize_w = 192 + is_init = True + width = int(frame_org.shape[0]) + height = int(frame_org.shape[1]) + disflow = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) + frame = preprocess_v(frame_org, resize_w, resize_h) + image = PaddleTensor(np.array([frame.copy()])) + output = self.gpu_predictor.run([image]) if use_gpu else self.cpu_predictor.run([image]) + score_map = output[1].as_ndarray() + frame = np.transpose(frame, axes=[1, 2, 0]) + score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) + cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) + score_map = 255 * score_map[:, :, 1] + if frame_id == 1: + prev_gray = np.zeros((resize_h, resize_w), np.uint8) + prev_cfd = np.zeros((resize_h, resize_w), np.float32) + optflow_map = postprocess_v(cur_gray, score_map, prev_gray, prev_cfd, disflow, is_init) + else: + optflow_map = postprocess_v(cur_gray, score_map, prev_gray, prev_cfd, disflow, is_init) + optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) + optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8) + img_matting = cv2.resize(optflow_map, (height, width), cv2.INTER_LINEAR) + return [img_matting, cur_gray, optflow_map] + + def video_segment(self, video_path=None, use_gpu=False, save_dir='humanseg_mobile_video_result'): + """ + API for human video segmentation. + + Args: + video_path (str): The path to take the video under preprocess. If video_path is None, it will capture + the vedio from your camera. + use_gpu (bool): Whether to use gpu. + save_dir (str): The path to store output video. + + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError("Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. " + "If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id.") + + resize_h = 192 + resize_w = 192 + if not video_path: + cap_video = cv2.VideoCapture(0) + else: + cap_video = cv2.VideoCapture(video_path) + if not cap_video.isOpened(): + raise IOError("Error opening video stream or file, " + "--video_path whether existing: {}" + " or camera whether working".format(video_path)) + width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + disflow = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) + prev_gray = np.zeros((resize_h, resize_w), np.uint8) + prev_cfd = np.zeros((resize_h, resize_w), np.float32) + is_init = True + fps = cap_video.get(cv2.CAP_PROP_FPS) + if video_path is not None: + print('Please wait. It is computing......') + if not osp.exists(save_dir): + os.makedirs(save_dir) + save_path = osp.join(save_dir, 'result' + '.avi') + cap_out = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) + while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + frame = preprocess_v(frame_org, resize_w, resize_h) + image = PaddleTensor(np.array([frame.copy()])) + output = self.gpu_predictor.run([image]) if use_gpu else self.cpu_predictor.run([image]) + score_map = output[1].as_ndarray() + frame = np.transpose(frame, axes=[1, 2, 0]) + score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) + cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) + score_map = 255 * score_map[:, :, 1] + optflow_map = postprocess_v(cur_gray, score_map, prev_gray, prev_cfd, disflow, is_init) + prev_gray = cur_gray.copy() + prev_cfd = optflow_map.copy() + optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) + optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8) + img_matting = cv2.resize(optflow_map, (width, height), cv2.INTER_LINEAR) + img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) + cap_out.write(comb) + else: + break + cap_video.release() + cap_out.release() + else: + while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + frame = preprocess_v(frame_org, resize_w, resize_h) + image = PaddleTensor(np.array([frame.copy()])) + output = self.gpu_predictor.run([image]) if use_gpu else self.cpu_predictor.run([image]) + score_map = output[1].as_ndarray() + frame = np.transpose(frame, axes=[1, 2, 0]) + score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) + cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) + score_map = 255 * score_map[:, :, 1] + optflow_map = postprocess_v(cur_gray, score_map, prev_gray, prev_cfd, disflow, is_init) + prev_gray = cur_gray.copy() + prev_cfd = optflow_map.copy() + optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) + optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8) + img_matting = cv2.resize(optflow_map, (width, height), cv2.INTER_LINEAR) + img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) + cv2.imshow('HumanSegmentation', comb) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + else: + break + cap_video.release() + + def save_inference_model(self, + dirname='humanseg_mobile_model', + model_filename=None, + params_filename=None, + combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, + model_filename=model_filename, + params_filename=params_filename, + executor=exe) + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.segment(images=images_decode, **kwargs) + results = [{'data': cv2_to_base64(result['data'])} for result in results] + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.segment( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) + if args.save_dir is not None: + check_dir(args.save_dir) + self.save_inference_model(args.save_dir) + + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='humanseg_mobile_output', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--save_dir', type=str, default='humanseg_mobile_model', help="The directory to save model.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + + +if __name__ == "__main__": + m = HRNetw18samllv1humanseg() + img = cv2.imread('photo.jpg') + #res = m.segment(images=[img], visualization=True) + #print(res[0]['data']) + #m.video_segment('') + cap_video = cv2.VideoCapture('video_test.mp4') + fps = cap_video.get(cv2.CAP_PROP_FPS) + save_path = 'result_frame.avi' + width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + cap_out = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) + prev_gray = None + prev_cfd = None + while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + [img_matting, prev_gray, prev_cfd] = m.video_stream_segment( + frame_org=frame_org, frame_id=cap_video.get(1), prev_gray=prev_gray, prev_cfd=prev_cfd) + img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) + cap_out.write(comb) + else: + break + cap_video.release() + cap_out.release() diff --git a/modules/image/semantic_segmentation/humanseg_mobile/optimal.py b/modules/image/semantic_segmentation/humanseg_mobile/optimal.py new file mode 100644 index 00000000..fbdc750f --- /dev/null +++ b/modules/image/semantic_segmentation/humanseg_mobile/optimal.py @@ -0,0 +1,104 @@ +# -*- coding:utf-8 -*- + +import numpy as np + + +def human_seg_tracking(pre_gray, cur_gray, prev_cfd, dl_weights, disflow): + """计算光流跟踪匹配点和光流图 + 输入参数: + pre_gray: 上一帧灰度图 + cur_gray: 当前帧灰度图 + prev_cfd: 上一帧光流图 + dl_weights: 融合权重图 + disflow: 光流数据结构 + 返回值: + is_track: 光流点跟踪二值图,即是否具有光流点匹配 + track_cfd: 光流跟踪图 + """ + check_thres = 8 + h, w = pre_gray.shape[:2] + track_cfd = np.zeros_like(prev_cfd) + is_track = np.zeros_like(pre_gray) + flow_fw = disflow.calc(pre_gray, cur_gray, None) + flow_bw = disflow.calc(cur_gray, pre_gray, None) + flow_fw = np.round(flow_fw).astype(np.int) + flow_bw = np.round(flow_bw).astype(np.int) + y_list = np.array(range(h)) + x_list = np.array(range(w)) + yv, xv = np.meshgrid(y_list, x_list) + yv, xv = yv.T, xv.T + cur_x = xv + flow_fw[:, :, 0] + cur_y = yv + flow_fw[:, :, 1] + + # 超出边界不跟踪 + not_track = (cur_x < 0) + (cur_x >= w) + (cur_y < 0) + (cur_y >= h) + flow_bw[~not_track] = flow_bw[cur_y[~not_track], cur_x[~not_track]] + not_track += ( + np.square(flow_fw[:, :, 0] + flow_bw[:, :, 0]) + np.square(flow_fw[:, :, 1] + flow_bw[:, :, 1])) >= check_thres + track_cfd[cur_y[~not_track], cur_x[~not_track]] = prev_cfd[~not_track] + + is_track[cur_y[~not_track], cur_x[~not_track]] = 1 + + not_flow = np.all(np.abs(flow_fw) == 0, axis=-1) * np.all(np.abs(flow_bw) == 0, axis=-1) + dl_weights[cur_y[not_flow], cur_x[not_flow]] = 0.05 + return track_cfd, is_track, dl_weights + + +def human_seg_track_fuse(track_cfd, dl_cfd, dl_weights, is_track): + """光流追踪图和人像分割结构融合 + 输入参数: + track_cfd: 光流追踪图 + dl_cfd: 当前帧分割结果 + dl_weights: 融合权重图 + is_track: 光流点匹配二值图 + 返回 + cur_cfd: 光流跟踪图和人像分割结果融合图 + """ + fusion_cfd = dl_cfd.copy() + is_track = is_track.astype(np.bool) + fusion_cfd[is_track] = dl_weights[is_track] * dl_cfd[is_track] + (1 - dl_weights[is_track]) * track_cfd[is_track] + # 确定区域 + index_certain = ((dl_cfd > 0.9) + (dl_cfd < 0.1)) * is_track + index_less01 = (dl_weights < 0.1) * index_certain + fusion_cfd[index_less01] = 0.3 * dl_cfd[index_less01] + 0.7 * track_cfd[index_less01] + index_larger09 = (dl_weights >= 0.1) * index_certain + fusion_cfd[index_larger09] = 0.4 * dl_cfd[index_larger09] + 0.6 * track_cfd[index_larger09] + return fusion_cfd + + +def threshold_mask(img, thresh_bg, thresh_fg): + dst = (img / 255.0 - thresh_bg) / (thresh_fg - thresh_bg) + dst[np.where(dst > 1)] = 1 + dst[np.where(dst < 0)] = 0 + return dst.astype(np.float32) + + +def postprocess_v(cur_gray, scoremap, prev_gray, pre_cfd, disflow, is_init): + """光流优化 + Args: + cur_gray : 当前帧灰度图 + pre_gray : 前一帧灰度图 + pre_cfd :前一帧融合结果 + scoremap : 当前帧分割结果 + difflow : 光流 + is_init : 是否第一帧 + Returns: + fusion_cfd : 光流追踪图和预测结果融合图 + """ + h, w = scoremap.shape + cur_cfd = scoremap.copy() + + if is_init: + if h <= 64 or w <= 64: + disflow.setFinestScale(1) + elif h <= 160 or w <= 160: + disflow.setFinestScale(2) + else: + disflow.setFinestScale(3) + fusion_cfd = cur_cfd + else: + weights = np.ones((h, w), np.float32) * 0.3 + track_cfd, is_track, weights = human_seg_tracking(prev_gray, cur_gray, pre_cfd, weights, disflow) + fusion_cfd = human_seg_track_fuse(track_cfd, cur_cfd, weights, is_track) + + return fusion_cfd diff --git a/modules/image/semantic_segmentation/humanseg_mobile/processor.py b/modules/image/semantic_segmentation/humanseg_mobile/processor.py new file mode 100644 index 00000000..6a81997c --- /dev/null +++ b/modules/image/semantic_segmentation/humanseg_mobile/processor.py @@ -0,0 +1,78 @@ +# -*- coding:utf-8 -*- +import os +import time +import base64 + +import cv2 +import numpy as np + +__all__ = ['cv2_to_base64', 'base64_to_cv2', 'postprocess'] + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def postprocess(data_out, org_im, org_im_shape, org_im_path, output_dir, visualization, thresh=120): + """ + Postprocess output of network. one image at a time. + + Args: + data_out (numpy.ndarray): output of network. + org_im (numpy.ndarray): original image. + org_im_shape (list): shape pf original image. + org_im_path (list): path of riginal image. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + thresh (float): threshold. + + Returns: + result (dict): The data of processed image. + """ + result = dict() + for logit in data_out: + logit = (logit * 255).astype(np.uint8) + logit = cv2.resize(logit, (org_im_shape[1], org_im_shape[0])) + rgba = np.concatenate((org_im, np.expand_dims(logit, axis=2)), axis=2) + + if visualization: + check_dir(output_dir) + save_im_path = get_save_image_name(org_im, org_im_path, output_dir) + cv2.imwrite(save_im_path, rgba) + result['save_path'] = save_im_path + result['data'] = logit + else: + result['data'] = logit + return result + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(org_im, org_im_path, output_dir): + """ + Get save image name from source image path. + """ + # name prefix of orginal image + org_im_name = os.path.split(org_im_path)[-1] + im_prefix = os.path.splitext(org_im_name)[0] + ext = '.png' + # save image path + save_im_path = os.path.join(output_dir, im_prefix + ext) + if os.path.exists(save_im_path): + save_im_path = os.path.join(output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) + + return save_im_path diff --git a/modules/image/semantic_segmentation/humanseg_server/README.md b/modules/image/semantic_segmentation/humanseg_server/README.md new file mode 100644 index 00000000..bf1b0a4c --- /dev/null +++ b/modules/image/semantic_segmentation/humanseg_server/README.md @@ -0,0 +1,210 @@ +## 模型概述 + +高精度模型,适用于服务端GPU且背景复杂的人像场景, 模型结构为Deeplabv3+/Xcetion65, 模型大小为158M,网络结构如图: +

+
+

+ +## 命令行预测 + +``` +hub run humanseg_server --input_path "/PATH/TO/IMAGE" +``` + + + +## API + +```python +def segment(self, + images=None, + paths=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='humanseg_server_output'): +``` + +预测API,用于人像分割。 + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; +* paths (list\[str\]): 图片的路径; +* batch\_size (int): batch 的大小; +* use\_gpu (bool): 是否使用 GPU; +* visualization (bool): 是否将识别结果保存为图片文件; +* output\_dir (str): 图片的保存路径。 + +**返回** + +* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: + * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); + * data (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-255 (0为全透明,255为不透明),也即取值越大的像素点越可能为人体,取值越小的像素点越可能为背景。 + +```python +def video_stream_segment(self, + frame_org, + frame_id, + prev_gray, + prev_cfd, + use_gpu=False): +``` + +预测API,用于逐帧对视频人像分割。 + +**参数** + +* frame_org (numpy.ndarray): 单帧图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; +* frame_id (int): 当前帧的编号; +* prev_gray (numpy.ndarray): 前一帧输入网络图像的灰度图; +* prev_cfd (numpy.ndarray): 前一帧光流追踪图和预测结果融合图; +* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + + +**返回** + +* img_matting (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-1 (0为全透明,1为不透明); +* cur_gray (numpy.ndarray): 当前帧输入分割网络图像的灰度图; +* optflow_map (numpy.ndarray): 当前帧光流追踪图和预测结果融合图。 + + +```python +def video_segment(self, + video_path=None, + use_gpu=False, + save_dir='humanseg_server_video'): +``` + +预测API,用于视频人像分割。 + +**参数** + +* video\_path (str): 待分割视频路径。若为None,则从本地摄像头获取视频,并弹出窗口显示在线分割结果; +* use\_gpu (bool): 是否使用GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; +* save\_dir (str): 视频保存路径,仅在video\_path不为None时启用,保存离线视频处理结果。 + + +```python +def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True): +``` + +将模型保存到指定路径。 + +**参数** + +* dirname: 存在模型的目录名称 +* model\_filename: 模型文件名称,默认为\_\_model\_\_ +* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) +* combined: 是否将参数保存到统一的一个文件中 + +## 代码示例 + +图片分割及视频分割代码示例: +```python +import cv2 +import paddlehub as hub + +human_seg = hub.Module(name='humanseg_server') +im = cv2.imread('/PATH/TO/IMAGE') +#visualization=True可以用于查看超分图片效果,可设置为False提升运行速度。 +res = human_seg.segment(images=[im],visualization=True) +print(res[0]['data']) +human_seg.video_segment('/PATH/TO/VIDEO') +human_seg.save_inference_model('/PATH/TO/SAVE/MODEL') + +``` +视频流预测代码示例: +```python +import cv2 +import numpy as np +import paddlehub as hub + +human_seg = hub.Module('humanseg_server') +cap_video = cv2.VideoCapture('\PATH\TO\VIDEO') +fps = cap_video.get(cv2.CAP_PROP_FPS) +save_path = 'humanseg_server_video.avi' +width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) +height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) +cap_out = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) +prev_gray = None +prev_cfd = None +while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + [img_matting, prev_gray, prev_cfd] = human_seg.video_stream_segment(frame_org=frame_org, frame_id=cap_video.get(1), prev_gray=prev_gray, prev_cfd=prev_cfd) + img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) + cap_out.write(comb) + else: + break + +cap_video.release() +cap_out.release() + +``` + +## 服务部署 + +PaddleHub Serving可以部署一个人像分割的在线服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: +```shell +$ hub serving start -m humanseg_server +``` + +这样就完成了一个人像分割的服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import base64 + +import cv2 +import numpy as np + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/humanseg_server" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +# 保存图片 +mask =cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY) +rgba = np.concatenate((org_im, np.expand_dims(mask, axis=2)), axis=2) +cv2.imwrite("segment_human_server.png", rgba) +``` + + +### 查看代码 + +https://github.com/PaddlePaddle/PaddleSeg/tree/develop/contrib/HumanSeg + + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.7.1 diff --git a/modules/image/semantic_segmentation/humanseg_server/__init__.py b/modules/image/semantic_segmentation/humanseg_server/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modules/image/semantic_segmentation/humanseg_server/data_feed.py b/modules/image/semantic_segmentation/humanseg_server/data_feed.py new file mode 100644 index 00000000..85639d02 --- /dev/null +++ b/modules/image/semantic_segmentation/humanseg_server/data_feed.py @@ -0,0 +1,62 @@ +# coding=utf-8 +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader', 'preprocess_v'] + + +def preprocess_v(img, w, h): + img = cv2.resize(img, (w, h), cv2.INTER_LINEAR).astype(np.float32) + img_mean = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img_std = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img = img.transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path).astype('float32') + each['org_im'] = im + each['org_im_path'] = im_path + each['org_im_shape'] = im.shape + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + each = OrderedDict() + each['org_im'] = im + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_shape'] = im.shape + component.append(each) + + for element in component: + img = element['org_im'].copy() + img = cv2.resize(img, (513, 513)).astype(np.float32) + img_mean = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img_std = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img = img.transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + element['image'] = img + yield element diff --git a/modules/image/semantic_segmentation/humanseg_server/module.py b/modules/image/semantic_segmentation/humanseg_server/module.py new file mode 100644 index 00000000..f266f59e --- /dev/null +++ b/modules/image/semantic_segmentation/humanseg_server/module.py @@ -0,0 +1,368 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ast +import os +import os.path as osp +import argparse + +import cv2 +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from humanseg_server.processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir +from humanseg_server.data_feed import reader, preprocess_v +from humanseg_server.optimal import postprocess_v, threshold_mask + + +@moduleinfo( + name="humanseg_server", + type="CV/semantic_segmentation", + author="baidu-vis", + author_email="", + summary="DeepLabv3+ is a semantic segmentation model.", + version="1.1.0") +class DeeplabV3pXception65HumanSeg(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "humanseg_server_inference") + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + self.model_file_path = os.path.join(self.default_pretrained_model_path, '__model__') + self.params_file_path = os.path.join(self.default_pretrained_model_path, '__params__') + cpu_config = AnalysisConfig(self.model_file_path, self.params_file_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.model_file_path, self.params_file_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def segment(self, + images=None, + paths=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='humanseg_server_output'): + """ + API for human segmentation. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + visualization (bool): Whether to save image or not. + output_dir (str): The path to store output images. + + Returns: + res (list[dict]): each element in the list is a dict, the keys and values are: + save_path (str, optional): the path to save images. (Exists only if visualization is True) + data (numpy.ndarray): data of post processed image. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + # compatibility with older versions + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + batch_image = PaddleTensor(batch_image.copy()) + output = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run([batch_image]) + output = output[1].as_ndarray() + output = np.expand_dims(output[:, 1, :, :], axis=1) + # postprocess one by one + for i in range(len(batch_data)): + out = postprocess( + data_out=output[i], + org_im=batch_data[i]['org_im'], + org_im_shape=batch_data[i]['org_im_shape'], + org_im_path=batch_data[i]['org_im_path'], + output_dir=output_dir, + visualization=visualization) + res.append(out) + return res + + def video_stream_segment(self, frame_org, frame_id, prev_gray, prev_cfd, use_gpu=False): + """ + API for human video segmentation. + + Args: + frame_org (numpy.ndarray): frame data, shape of each is [H, W, C], the color space is BGR. + frame_id (int): index of the frame to be decoded. + prev_gray (numpy.ndarray): gray scale image of last frame, shape of each is [H, W] + prev_cfd (numpy.ndarray): fusion image from optical flow image and segment result, shape of each is [H, W] + use_gpu (bool): Whether to use gpu. + + Returns: + img_matting (numpy.ndarray): data of segmentation mask. + cur_gray (numpy.ndarray): gray scale image of current frame, shape of each is [H, W] + optflow_map (numpy.ndarray): optical flow image of current frame, shape of each is [H, W] + + """ + resize_h = 512 + resize_w = 512 + is_init = True + width = int(frame_org.shape[0]) + height = int(frame_org.shape[1]) + disflow = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) + frame = preprocess_v(frame_org, resize_w, resize_h) + image = PaddleTensor(np.array([frame.copy()])) + output = self.gpu_predictor.run([image]) if use_gpu else self.cpu_predictor.run([image]) + score_map = output[1].as_ndarray() + frame = np.transpose(frame, axes=[1, 2, 0]) + score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) + cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) + score_map = 255 * score_map[:, :, 1] + if frame_id == 1: + prev_gray = np.zeros((resize_h, resize_w), np.uint8) + prev_cfd = np.zeros((resize_h, resize_w), np.float32) + optflow_map = postprocess_v(cur_gray, score_map, prev_gray, prev_cfd, disflow, is_init) + else: + optflow_map = postprocess_v(cur_gray, score_map, prev_gray, prev_cfd, disflow, is_init) + optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) + optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8) + img_matting = cv2.resize(optflow_map, (height, width), cv2.INTER_LINEAR) + return [img_matting, cur_gray, optflow_map] + + def video_segment(self, video_path=None, use_gpu=False, save_dir='humanseg_server_video'): + resize_h = 512 + resize_w = 512 + if not video_path: + cap_video = cv2.VideoCapture(0) + else: + cap_video = cv2.VideoCapture(video_path) + if not cap_video.isOpened(): + raise IOError("Error opening video stream or file, " + "--video_path whether existing: {}" + " or camera whether working".format(video_path)) + width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + disflow = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) + prev_gray = np.zeros((resize_h, resize_w), np.uint8) + prev_cfd = np.zeros((resize_h, resize_w), np.float32) + is_init = True + fps = cap_video.get(cv2.CAP_PROP_FPS) + if video_path is not None: + print('Please wait. It is computing......') + if not osp.exists(save_dir): + os.makedirs(save_dir) + save_path = osp.join(save_dir, 'result' + '.avi') + cap_out = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) + while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + frame = preprocess_v(frame_org, resize_w, resize_h) + image = PaddleTensor(np.array([frame.copy()])) + output = self.gpu_predictor.run([image]) if use_gpu else self.cpu_predictor.run([image]) + score_map = output[1].as_ndarray() + frame = np.transpose(frame, axes=[1, 2, 0]) + score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) + cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) + score_map = 255 * score_map[:, :, 1] + optflow_map = postprocess_v(cur_gray, score_map, prev_gray, prev_cfd, disflow, is_init) + prev_gray = cur_gray.copy() + prev_cfd = optflow_map.copy() + optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) + optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8) + img_matting = cv2.resize(optflow_map, (width, height), cv2.INTER_LINEAR) + img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) + cap_out.write(comb) + else: + break + cap_video.release() + cap_out.release() + else: + while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + frame = preprocess_v(frame_org, resize_w, resize_h) + image = PaddleTensor(np.array([frame.copy()])) + output = self.gpu_predictor.run([image]) if use_gpu else self.cpu_predictor.run([image]) + score_map = output[1].as_ndarray() + frame = np.transpose(frame, axes=[1, 2, 0]) + score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) + cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) + score_map = 255 * score_map[:, :, 1] + optflow_map = postprocess_v(cur_gray, score_map, prev_gray, prev_cfd, disflow, is_init) + prev_gray = cur_gray.copy() + prev_cfd = optflow_map.copy() + optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) + optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8) + img_matting = cv2.resize(optflow_map, (width, height), cv2.INTER_LINEAR) + img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) + cv2.imshow('HumanSegmentation', comb) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + else: + break + cap_video.release() + + def save_inference_model(self, + dirname='humanseg_server_model', + model_filename=None, + params_filename=None, + combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, + model_filename=model_filename, + params_filename=params_filename, + executor=exe) + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.segment(images=images_decode, **kwargs) + results = [{'data': cv2_to_base64(result['data'])} for result in results] + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.segment( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) + if args.save_dir is not None: + check_dir(args.save_dir) + self.save_inference_model(args.save_dir) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='humanseg_server_output', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--save_dir', type=str, default='humanseg_server_model', help="The directory to save model.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + + +if __name__ == "__main__": + m = DeeplabV3pXception65HumanSeg() + # img = cv2.imread('photo.jpg') + # res = m.segment(images=[img]) + # print(res[0]['data']) + # m.save_inference_model() + #m.video_segment(video_path='video_test.mp4') + img = cv2.imread('photo.jpg') + # res = m.segment(images=[img], visualization=True) + # print(res[0]['data']) + # m.video_segment('') + cap_video = cv2.VideoCapture('video_test.mp4') + fps = cap_video.get(cv2.CAP_PROP_FPS) + save_path = 'result_frame.avi' + width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + cap_out = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) + prev_gray = None + prev_cfd = None + while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + [img_matting, prev_gray, prev_cfd] = m.video_stream_segment( + frame_org=frame_org, frame_id=cap_video.get(1), prev_gray=prev_gray, prev_cfd=prev_cfd) + img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) + cap_out.write(comb) + else: + break + cap_video.release() + cap_out.release() diff --git a/modules/image/semantic_segmentation/humanseg_server/optimal.py b/modules/image/semantic_segmentation/humanseg_server/optimal.py new file mode 100644 index 00000000..df76c330 --- /dev/null +++ b/modules/image/semantic_segmentation/humanseg_server/optimal.py @@ -0,0 +1,103 @@ +# -*- coding:utf-8 -*- +import numpy as np + + +def human_seg_tracking(pre_gray, cur_gray, prev_cfd, dl_weights, disflow): + """计算光流跟踪匹配点和光流图 + 输入参数: + pre_gray: 上一帧灰度图 + cur_gray: 当前帧灰度图 + prev_cfd: 上一帧光流图 + dl_weights: 融合权重图 + disflow: 光流数据结构 + 返回值: + is_track: 光流点跟踪二值图,即是否具有光流点匹配 + track_cfd: 光流跟踪图 + """ + check_thres = 8 + h, w = pre_gray.shape[:2] + track_cfd = np.zeros_like(prev_cfd) + is_track = np.zeros_like(pre_gray) + flow_fw = disflow.calc(pre_gray, cur_gray, None) + flow_bw = disflow.calc(cur_gray, pre_gray, None) + flow_fw = np.round(flow_fw).astype(np.int) + flow_bw = np.round(flow_bw).astype(np.int) + y_list = np.array(range(h)) + x_list = np.array(range(w)) + yv, xv = np.meshgrid(y_list, x_list) + yv, xv = yv.T, xv.T + cur_x = xv + flow_fw[:, :, 0] + cur_y = yv + flow_fw[:, :, 1] + + # 超出边界不跟踪 + not_track = (cur_x < 0) + (cur_x >= w) + (cur_y < 0) + (cur_y >= h) + flow_bw[~not_track] = flow_bw[cur_y[~not_track], cur_x[~not_track]] + not_track += ( + np.square(flow_fw[:, :, 0] + flow_bw[:, :, 0]) + np.square(flow_fw[:, :, 1] + flow_bw[:, :, 1])) >= check_thres + track_cfd[cur_y[~not_track], cur_x[~not_track]] = prev_cfd[~not_track] + + is_track[cur_y[~not_track], cur_x[~not_track]] = 1 + + not_flow = np.all(np.abs(flow_fw) == 0, axis=-1) * np.all(np.abs(flow_bw) == 0, axis=-1) + dl_weights[cur_y[not_flow], cur_x[not_flow]] = 0.05 + return track_cfd, is_track, dl_weights + + +def human_seg_track_fuse(track_cfd, dl_cfd, dl_weights, is_track): + """光流追踪图和人像分割结构融合 + 输入参数: + track_cfd: 光流追踪图 + dl_cfd: 当前帧分割结果 + dl_weights: 融合权重图 + is_track: 光流点匹配二值图 + 返回 + cur_cfd: 光流跟踪图和人像分割结果融合图 + """ + fusion_cfd = dl_cfd.copy() + is_track = is_track.astype(np.bool) + fusion_cfd[is_track] = dl_weights[is_track] * dl_cfd[is_track] + (1 - dl_weights[is_track]) * track_cfd[is_track] + # 确定区域 + index_certain = ((dl_cfd > 0.9) + (dl_cfd < 0.1)) * is_track + index_less01 = (dl_weights < 0.1) * index_certain + fusion_cfd[index_less01] = 0.3 * dl_cfd[index_less01] + 0.7 * track_cfd[index_less01] + index_larger09 = (dl_weights >= 0.1) * index_certain + fusion_cfd[index_larger09] = 0.4 * dl_cfd[index_larger09] + 0.6 * track_cfd[index_larger09] + return fusion_cfd + + +def threshold_mask(img, thresh_bg, thresh_fg): + dst = (img / 255.0 - thresh_bg) / (thresh_fg - thresh_bg) + dst[np.where(dst > 1)] = 1 + dst[np.where(dst < 0)] = 0 + return dst.astype(np.float32) + + +def postprocess_v(cur_gray, scoremap, prev_gray, pre_cfd, disflow, is_init): + """光流优化 + Args: + cur_gray : 当前帧灰度图 + pre_gray : 前一帧灰度图 + pre_cfd :前一帧融合结果 + scoremap : 当前帧分割结果 + difflow : 光流 + is_init : 是否第一帧 + Returns: + fusion_cfd : 光流追踪图和预测结果融合图 + """ + h, w = scoremap.shape + cur_cfd = scoremap.copy() + + if is_init: + if h <= 64 or w <= 64: + disflow.setFinestScale(1) + elif h <= 160 or w <= 160: + disflow.setFinestScale(2) + else: + disflow.setFinestScale(3) + fusion_cfd = cur_cfd + else: + weights = np.ones((h, w), np.float32) * 0.3 + track_cfd, is_track, weights = human_seg_tracking(prev_gray, cur_gray, pre_cfd, weights, disflow) + fusion_cfd = human_seg_track_fuse(track_cfd, cur_cfd, weights, is_track) + + return fusion_cfd diff --git a/modules/image/semantic_segmentation/humanseg_server/processor.py b/modules/image/semantic_segmentation/humanseg_server/processor.py new file mode 100644 index 00000000..8fd8909a --- /dev/null +++ b/modules/image/semantic_segmentation/humanseg_server/processor.py @@ -0,0 +1,76 @@ +# -*- coding:utf-8 -*- +import os +import time + +import base64 +import cv2 +import numpy as np + +__all__ = ['cv2_to_base64', 'base64_to_cv2', 'postprocess'] + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def postprocess(data_out, org_im, org_im_shape, org_im_path, output_dir, visualization): + """ + Postprocess output of network. one image at a time. + + Args: + data_out (numpy.ndarray): output of network. + org_im (numpy.ndarray): original image. + org_im_shape (list): shape pf original image. + org_im_path (list): path of riginal image. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + Returns: + result (dict): The data of processed image. + """ + result = dict() + for logit in data_out: + logit = (logit * 255).astype(np.uint8) + logit = cv2.resize(logit, (org_im_shape[1], org_im_shape[0])) + rgba = np.concatenate((org_im, np.expand_dims(logit, axis=2)), axis=2) + + if visualization: + check_dir(output_dir) + save_im_path = get_save_image_name(org_im, org_im_path, output_dir) + cv2.imwrite(save_im_path, rgba) + result['save_path'] = save_im_path + result['data'] = rgba[:, :, 3] + else: + result['data'] = rgba[:, :, 3] + return result + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(org_im, org_im_path, output_dir): + """ + Get save image name from source image path. + """ + # name prefix of orginal image + org_im_name = os.path.split(org_im_path)[-1] + im_prefix = os.path.splitext(org_im_name)[0] + ext = '.png' + # save image path + save_im_path = os.path.join(output_dir, im_prefix + ext) + if os.path.exists(save_im_path): + save_im_path = os.path.join(output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) + + return save_im_path diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/README.md b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/README.md index 08533849..74efcf2a 100644 --- a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/README.md +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/README.md @@ -1,6 +1,6 @@ ## 概述 -chinese_ocr_db_crnn_mobile Module用于识别图片当中的汉字。其基于[chinese_text_detection_db_mobile Module](https://www.paddlepaddle.org.cn/hubdetail?name=chinese_text_detection_db_mobile&en_category=TextRecognition)检测得到的文本框,继续识别文本框中的中文文字。识别文字算法采用CRNN(Convolutional Recurrent Neural Network)即卷积递归神经网络。其是DCNN和RNN的组合,专门用于识别图像中的序列式对象。与CTC loss配合使用,进行文字识别,可以直接从文本词级或行级的标注中学习,不需要详细的字符级的标注。该Module是一个超轻量级中文OCR模型,支持直接预测。 +chinese_ocr_db_crnn_mobile Module用于识别图片当中的汉字。其基于[chinese_text_detection_db_mobile Module](https://www.paddlepaddle.org.cn/hubdetail?name=chinese_text_detection_db_mobile&en_category=TextRecognition)检测得到的文本框,继续识别文本框中的中文文字。之后对检测文本框进行角度分类。最终识别文字算法采用CRNN(Convolutional Recurrent Neural Network)即卷积递归神经网络。其是DCNN和RNN的组合,专门用于识别图像中的序列式对象。与CTC loss配合使用,进行文字识别,可以直接从文本词级或行级的标注中学习,不需要详细的字符级的标注。该Module是一个超轻量级中文OCR模型,支持直接预测。

@@ -19,6 +19,16 @@ $ hub run chinese_ocr_db_crnn_mobile --input_path "/PATH/TO/IMAGE" ## API +### \_\_init\_\_(text_detector_module=None, enable_mkldnn=False) + +构造ChineseOCRDBCRNN对象 + +**参数** + +* text_detector_module(str): 文字检测PaddleHub Module名字,如设置为None,则默认使用[chinese_text_detection_db_mobile Module](https://www.paddlepaddle.org.cn/hubdetail?name=chinese_text_detection_db_mobile&en_category=TextRecognition)。其作用为检测图片当中的文本。 +* enable_mkldnn(bool): 是否开启mkldnn加速CPU计算。该参数仅在CPU运行下设置有效。默认为False。 + + ```python def recognize_text(images=[], paths=[], @@ -26,7 +36,8 @@ def recognize_text(images=[], output_dir='ocr_result', visualization=False, box_thresh=0.5, - text_thresh=0.5) + text_thresh=0.5, + angle_classification_thresh=0.9) ``` 预测API,检测输入图片中的所有中文文本的位置。 @@ -38,6 +49,7 @@ def recognize_text(images=[], * use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量** * box\_thresh (float): 检测文本框置信度的阈值; * text\_thresh (float): 识别中文文本置信度的阈值; +* angle_classification_thresh(float): 文本角度分类置信度的阈值 * visualization (bool): 是否将识别结果保存为图片文件; * output\_dir (str): 图片的保存路径,默认设为 ocr\_result; @@ -132,3 +144,11 @@ pyclipper * 1.0.1 修复使用在线服务调用模型失败问题 + +* 1.0.2 + + 支持mkldnn加速CPU计算 + +* 1.1.0 + + 使用超轻量级的三阶段模型(文本框检测-角度分类-文字识别)识别图片文字。 diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/character.py b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/character.py index bf6d21f6..ad6b01ba 100644 --- a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/character.py +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/character.py @@ -22,17 +22,23 @@ class CharacterOps(object): def __init__(self, config): self.character_type = config['character_type'] self.loss_type = config['loss_type'] + self.max_text_len = config['max_text_length'] if self.character_type == "en": self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" dict_character = list(self.character_str) elif self.character_type == "ch": character_dict_path = config['character_dict_path'] + add_space = False + if 'use_space_char' in config: + add_space = config['use_space_char'] self.character_str = "" with open(character_dict_path, "rb") as fin: lines = fin.readlines() for line in lines: - line = line.decode('utf-8').strip("\n") + line = line.decode('utf-8').strip("\n").strip("\r\n") self.character_str += line + if add_space: + self.character_str += " " dict_character = list(self.character_str) elif self.character_type == "en_sensitive": # same with ASTER setting (use 94 char). @@ -46,6 +52,8 @@ class CharacterOps(object): self.end_str = "eos" if self.loss_type == "attention": dict_character = [self.beg_str, self.end_str] + dict_character + elif self.loss_type == "srn": + dict_character = dict_character + [self.beg_str, self.end_str] self.dict = {} for i, char in enumerate(dict_character): self.dict[char] = i @@ -90,7 +98,7 @@ class CharacterOps(object): if is_remove_duplicate: if idx > 0 and text_index[idx - 1] == text_index[idx]: continue - char_list.append(self.character[text_index[idx]]) + char_list.append(self.character[int(text_index[idx])]) text = ''.join(char_list) return text @@ -134,6 +142,36 @@ def cal_predicts_accuracy(char_ops, preds, preds_lod, labels, labels_lod, is_rem return acc, acc_num, img_num +def cal_predicts_accuracy_srn(char_ops, preds, labels, max_text_len, is_debug=False): + acc_num = 0 + img_num = 0 + + char_num = char_ops.get_char_num() + + total_len = preds.shape[0] + img_num = int(total_len / max_text_len) + for i in range(img_num): + cur_label = [] + cur_pred = [] + for j in range(max_text_len): + if labels[j + i * max_text_len] != int(char_num - 1): #0 + cur_label.append(labels[j + i * max_text_len][0]) + else: + break + + for j in range(max_text_len + 1): + if j < len(cur_label) and preds[j + i * max_text_len][0] != cur_label[j]: + break + elif j == len(cur_label) and j == max_text_len: + acc_num += 1 + break + elif j == len(cur_label) and preds[j + i * max_text_len][0] == int(char_num - 1): + acc_num += 1 + break + acc = acc_num * 1.0 / img_num + return acc, acc_num, img_num + + def convert_rec_attention_infer_res(preds): img_num = preds.shape[0] target_lod = [0] diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py index 13c92f49..a0704f86 100644 --- a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py @@ -1,8 +1,4 @@ # -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import argparse import ast import copy @@ -25,32 +21,44 @@ from chinese_ocr_db_crnn_mobile.utils import base64_to_cv2, draw_ocr, get_image_ @moduleinfo( name="chinese_ocr_db_crnn_mobile", - version="1.0.3", - summary= - "The module can recognize the chinese texts in an image. Firstly, it will detect the text box positions based on the differentiable_binarization_chn module. Then it recognizes the chinese texts. ", + version="1.1.0", + summary="The module can recognize the chinese texts in an image. Firstly, it will detect the text box positions \ + based on the differentiable_binarization_chn module. Then it classifies the text angle and recognizes the chinese texts. ", author="paddle-dev", author_email="paddle-dev@baidu.com", type="cv/text_recognition") class ChineseOCRDBCRNN(hub.Module): - def _initialize(self, text_detector_module=None): + def _initialize(self, text_detector_module=None, enable_mkldnn=False): """ initialize with the necessary elements """ self.character_dict_path = os.path.join(self.directory, 'assets', 'ppocr_keys_v1.txt') - char_ops_params = {'character_type': 'ch', 'character_dict_path': self.character_dict_path, 'loss_type': 'ctc'} + char_ops_params = { + 'character_type': 'ch', + 'character_dict_path': self.character_dict_path, + 'loss_type': 'ctc', + 'max_text_length': 25, + 'use_space_char': True + } self.char_ops = CharacterOps(char_ops_params) self.rec_image_shape = [3, 32, 320] self._text_detector_module = text_detector_module self.font_file = os.path.join(self.directory, 'assets', 'simfang.ttf') - self.pretrained_model_path = os.path.join(self.directory, 'inference_model') - self._set_config() + self.enable_mkldnn = enable_mkldnn + + self.rec_pretrained_model_path = os.path.join(self.directory, 'inference_model', 'character_rec') + self.cls_pretrained_model_path = os.path.join(self.directory, 'inference_model', 'angle_cls') + self.rec_predictor, self.rec_input_tensor, self.rec_output_tensors = self._set_config( + self.rec_pretrained_model_path) + self.cls_predictor, self.cls_input_tensor, self.cls_output_tensors = self._set_config( + self.cls_pretrained_model_path) - def _set_config(self): + def _set_config(self, pretrained_model_path): """ - predictor config setting + predictor config path """ - model_file_path = os.path.join(self.pretrained_model_path, 'model') - params_file_path = os.path.join(self.pretrained_model_path, 'params') + model_file_path = os.path.join(pretrained_model_path, 'model') + params_file_path = os.path.join(pretrained_model_path, 'params') config = AnalysisConfig(model_file_path, params_file_path) try: @@ -64,20 +72,26 @@ class ChineseOCRDBCRNN(hub.Module): config.enable_use_gpu(8000, 0) else: config.disable_gpu() + if self.enable_mkldnn: + # cache 10 different shapes for mkldnn to avoid memory leak + config.set_mkldnn_cache_capacity(10) + config.enable_mkldnn() config.disable_glog_info() - - # use zero copy config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") config.switch_use_feed_fetch_ops(False) - self.predictor = create_paddle_predictor(config) - input_names = self.predictor.get_input_names() - self.input_tensor = self.predictor.get_input_tensor(input_names[0]) - output_names = self.predictor.get_output_names() - self.output_tensors = [] + + predictor = create_paddle_predictor(config) + + input_names = predictor.get_input_names() + input_tensor = predictor.get_input_tensor(input_names[0]) + output_names = predictor.get_output_names() + output_tensors = [] for output_name in output_names: - output_tensor = self.predictor.get_output_tensor(output_name) - self.output_tensors.append(output_tensor) + output_tensor = predictor.get_output_tensor(output_name) + output_tensors.append(output_tensor) + + return predictor, input_tensor, output_tensors @property def text_detector_module(self): @@ -85,7 +99,8 @@ class ChineseOCRDBCRNN(hub.Module): text detect module """ if not self._text_detector_module: - self._text_detector_module = hub.Module(name='chinese_text_detection_db_mobile') + self._text_detector_module = hub.Module( + name='chinese_text_detection_db_mobile', enable_mkldnn=self.enable_mkldnn, version='1.0.3') return self._text_detector_module def read_images(self, paths=[]): @@ -100,6 +115,7 @@ class ChineseOCRDBCRNN(hub.Module): return images def get_rotate_crop_image(self, img, points): + ''' img_height, img_width = img.shape[0:2] left = int(np.min(points[:, 0])) right = int(np.max(points[:, 0])) @@ -108,20 +124,40 @@ class ChineseOCRDBCRNN(hub.Module): img_crop = img[top:bottom, left:right, :].copy() points[:, 0] = points[:, 0] - left points[:, 1] = points[:, 1] - top - img_crop_width = int(np.linalg.norm(points[0] - points[1])) - img_crop_height = int(np.linalg.norm(points[0] - points[3])) - pts_std = np.float32([[0, 0], [img_crop_width, 0],\ - [img_crop_width, img_crop_height], [0, img_crop_height]]) + ''' + img_crop_width = int(max(np.linalg.norm(points[0] - points[1]), np.linalg.norm(points[2] - points[3]))) + img_crop_height = int(max(np.linalg.norm(points[0] - points[3]), np.linalg.norm(points[1] - points[2]))) + pts_std = np.float32([[0, 0], [img_crop_width, 0], [img_crop_width, img_crop_height], [0, img_crop_height]]) M = cv2.getPerspectiveTransform(points, pts_std) - dst_img = cv2.warpPerspective(img_crop, M, (img_crop_width, img_crop_height), borderMode=cv2.BORDER_REPLICATE) + dst_img = cv2.warpPerspective( + img, M, (img_crop_width, img_crop_height), borderMode=cv2.BORDER_REPLICATE, flags=cv2.INTER_CUBIC) dst_img_height, dst_img_width = dst_img.shape[0:2] if dst_img_height * 1.0 / dst_img_width >= 1.5: dst_img = np.rot90(dst_img) return dst_img - def resize_norm_img(self, img, max_wh_ratio): + def resize_norm_img_rec(self, img, max_wh_ratio): imgC, imgH, imgW = self.rec_image_shape - imgW = int(32 * max_wh_ratio) + assert imgC == img.shape[2] + imgW = int((32 * max_wh_ratio)) + h, w = img.shape[:2] + ratio = w / float(h) + if math.ceil(imgH * ratio) > imgW: + resized_w = imgW + else: + resized_w = int(math.ceil(imgH * ratio)) + resized_image = cv2.resize(img, (resized_w, imgH)) + resized_image = resized_image.astype('float32') + resized_image = resized_image.transpose((2, 0, 1)) / 255 + resized_image -= 0.5 + resized_image /= 0.5 + padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) + padding_im[:, :, 0:resized_w] = resized_image + return padding_im + + def resize_norm_img_cls(self, img): + cls_image_shape = [3, 48, 192] + imgC, imgH, imgW = cls_image_shape h = img.shape[0] w = img.shape[1] ratio = w / float(h) @@ -131,7 +167,11 @@ class ChineseOCRDBCRNN(hub.Module): resized_w = int(math.ceil(imgH * ratio)) resized_image = cv2.resize(img, (resized_w, imgH)) resized_image = resized_image.astype('float32') - resized_image = resized_image.transpose((2, 0, 1)) / 255 + if cls_image_shape[0] == 1: + resized_image = resized_image / 255 + resized_image = resized_image[np.newaxis, :] + else: + resized_image = resized_image.transpose((2, 0, 1)) / 255 resized_image -= 0.5 resized_image /= 0.5 padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) @@ -145,7 +185,8 @@ class ChineseOCRDBCRNN(hub.Module): output_dir='ocr_result', visualization=False, box_thresh=0.5, - text_thresh=0.5): + text_thresh=0.5, + angle_classification_thresh=0.9): """ Get the chinese texts in the predicted images. Args: @@ -156,7 +197,9 @@ class ChineseOCRDBCRNN(hub.Module): output_dir (str): The directory to store output images. visualization (bool): Whether to save image or not. box_thresh(float): the threshold of the detected text box's confidence - text_thresh(float): the threshold of the recognize chinese texts' confidence + text_thresh(float): the threshold of the chinese text recognition confidence + angle_classification_thresh(float): the threshold of the angle classification confidence + Returns: res (list): The result of chinese texts and save path of images. """ @@ -182,12 +225,13 @@ class ChineseOCRDBCRNN(hub.Module): detection_results = self.text_detector_module.detect_text( images=predicted_data, use_gpu=self.use_gpu, box_thresh=box_thresh) + boxes = [np.array(item['data']).astype(np.float32) for item in detection_results] all_results = [] for index, img_boxes in enumerate(boxes): original_image = predicted_data[index].copy() result = {'save_path': ''} - if img_boxes is None: + if img_boxes.size == 0: result['data'] = [] else: img_crop_list = [] @@ -196,8 +240,10 @@ class ChineseOCRDBCRNN(hub.Module): tmp_box = copy.deepcopy(boxes[num_box]) img_crop = self.get_rotate_crop_image(original_image, tmp_box) img_crop_list.append(img_crop) - + img_crop_list, angle_list = self._classify_text( + img_crop_list, angle_classification_thresh=angle_classification_thresh) rec_results = self._recognize_text(img_crop_list) + # if the recognized text confidence score is lower than text_thresh, then drop it rec_res_final = [] for index, res in enumerate(rec_results): @@ -226,7 +272,14 @@ class ChineseOCRDBCRNN(hub.Module): results = self.recognize_text(images_decode, **kwargs) return results - def save_result_image(self, original_image, detection_boxes, rec_results, output_dir='ocr_result', text_thresh=0.5): + def save_result_image( + self, + original_image, + detection_boxes, + rec_results, + output_dir='ocr_result', + text_thresh=0.5, + ): image = Image.fromarray(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)) txts = [item[0] for item in rec_results] scores = [item[1] for item in rec_results] @@ -241,32 +294,84 @@ class ChineseOCRDBCRNN(hub.Module): cv2.imwrite(save_file_path, draw_img[:, :, ::-1]) return save_file_path - def _recognize_text(self, image_list): - img_num = len(image_list) + def _classify_text(self, image_list, angle_classification_thresh=0.9): + img_list = copy.deepcopy(image_list) + img_num = len(img_list) + # Calculate the aspect ratio of all text bars + width_list = [] + for img in img_list: + width_list.append(img.shape[1] / float(img.shape[0])) + # Sorting can speed up the cls process + indices = np.argsort(np.array(width_list)) + + cls_res = [['', 0.0]] * img_num batch_num = 30 - rec_res = [] - predict_time = 0 for beg_img_no in range(0, img_num, batch_num): end_img_no = min(img_num, beg_img_no + batch_num) norm_img_batch = [] max_wh_ratio = 0 for ino in range(beg_img_no, end_img_no): - h, w = image_list[ino].shape[0:2] - wh_ratio = w / h + h, w = img_list[indices[ino]].shape[0:2] + wh_ratio = w * 1.0 / h max_wh_ratio = max(max_wh_ratio, wh_ratio) for ino in range(beg_img_no, end_img_no): - norm_img = self.resize_norm_img(image_list[ino], max_wh_ratio) + norm_img = self.resize_norm_img_cls(img_list[indices[ino]]) norm_img = norm_img[np.newaxis, :] norm_img_batch.append(norm_img) norm_img_batch = np.concatenate(norm_img_batch) norm_img_batch = norm_img_batch.copy() - self.input_tensor.copy_from_cpu(norm_img_batch) - self.predictor.zero_copy_run() - rec_idx_batch = self.output_tensors[0].copy_to_cpu() - rec_idx_lod = self.output_tensors[0].lod()[0] - predict_batch = self.output_tensors[1].copy_to_cpu() - predict_lod = self.output_tensors[1].lod()[0] + self.cls_input_tensor.copy_from_cpu(norm_img_batch) + self.cls_predictor.zero_copy_run() + + prob_out = self.cls_output_tensors[0].copy_to_cpu() + label_out = self.cls_output_tensors[1].copy_to_cpu() + if len(label_out.shape) != 1: + prob_out, label_out = label_out, prob_out + label_list = ['0', '180'] + for rno in range(len(label_out)): + label_idx = label_out[rno] + score = prob_out[rno][label_idx] + label = label_list[label_idx] + cls_res[indices[beg_img_no + rno]] = [label, score] + if '180' in label and score > angle_classification_thresh: + img_list[indices[beg_img_no + rno]] = cv2.rotate(img_list[indices[beg_img_no + rno]], 1) + return img_list, cls_res + + def _recognize_text(self, img_list): + img_num = len(img_list) + # Calculate the aspect ratio of all text bars + width_list = [] + for img in img_list: + width_list.append(img.shape[1] / float(img.shape[0])) + # Sorting can speed up the recognition process + indices = np.argsort(np.array(width_list)) + + rec_res = [['', 0.0]] * img_num + batch_num = 30 + for beg_img_no in range(0, img_num, batch_num): + end_img_no = min(img_num, beg_img_no + batch_num) + norm_img_batch = [] + max_wh_ratio = 0 + for ino in range(beg_img_no, end_img_no): + h, w = img_list[indices[ino]].shape[0:2] + wh_ratio = w * 1.0 / h + max_wh_ratio = max(max_wh_ratio, wh_ratio) + for ino in range(beg_img_no, end_img_no): + norm_img = self.resize_norm_img_rec(img_list[indices[ino]], max_wh_ratio) + norm_img = norm_img[np.newaxis, :] + norm_img_batch.append(norm_img) + + norm_img_batch = np.concatenate(norm_img_batch, axis=0) + norm_img_batch = norm_img_batch.copy() + + self.rec_input_tensor.copy_from_cpu(norm_img_batch) + self.rec_predictor.zero_copy_run() + + rec_idx_batch = self.rec_output_tensors[0].copy_to_cpu() + rec_idx_lod = self.rec_output_tensors[0].lod()[0] + predict_batch = self.rec_output_tensors[1].copy_to_cpu() + predict_lod = self.rec_output_tensors[1].lod()[0] for rno in range(len(rec_idx_lod) - 1): beg = rec_idx_lod[rno] end = rec_idx_lod[rno + 1] @@ -281,14 +386,17 @@ class ChineseOCRDBCRNN(hub.Module): if len(valid_ind) == 0: continue score = np.mean(probs[valid_ind, ind[valid_ind]]) - rec_res.append([preds_text, score]) + # rec_res.append([preds_text, score]) + rec_res[indices[beg_img_no + rno]] = [preds_text, score] - return rec_res + return rec_res def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): detector_dir = os.path.join(dirname, 'text_detector') + classifier_dir = os.path.join(dirname, 'angle_classifier') recognizer_dir = os.path.join(dirname, 'text_recognizer') self._save_detector_model(detector_dir, model_filename, params_filename, combined) + self._save_classifier_model(classifier_dir, model_filename, params_filename, combined) self._save_recognizer_model(recognizer_dir, model_filename, params_filename, combined) logger.info("The inference model has been saved in the path {}".format(os.path.realpath(dirname))) @@ -302,10 +410,34 @@ class ChineseOCRDBCRNN(hub.Module): place = fluid.CPUPlace() exe = fluid.Executor(place) - model_file_path = os.path.join(self.pretrained_model_path, 'model') - params_file_path = os.path.join(self.pretrained_model_path, 'params') + model_file_path = os.path.join(self.rec_pretrained_model_path, 'model') + params_file_path = os.path.join(self.rec_pretrained_model_path, 'params') + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.rec_pretrained_model_path, + model_filename=model_file_path, + params_filename=params_file_path, + executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + def _save_classifier_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + model_file_path = os.path.join(self.cls_pretrained_model_path, 'model') + params_file_path = os.path.join(self.cls_pretrained_model_path, 'params') program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.pretrained_model_path, + dirname=self.cls_pretrained_model_path, model_filename=model_file_path, params_filename=params_file_path, executor=exe) @@ -363,7 +495,7 @@ class ChineseOCRDBCRNN(hub.Module): if __name__ == '__main__': ocr = ChineseOCRDBCRNN() image_path = [ - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/11.jpg', '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg', + '/mnt/zhangxuefei/PaddleOCR/doc/imgs/2.jpg', '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg', '/mnt/zhangxuefei/PaddleOCR/doc/imgs/test_image.jpg' ] res = ocr.recognize_text(paths=image_path, visualization=True) diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/utils.py b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/utils.py index 5574fbab..5a90b27d 100644 --- a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/utils.py +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/utils.py @@ -162,8 +162,8 @@ def sorted_boxes(dt_boxes): _boxes = list(sorted_boxes) for i in range(num_boxes - 1): - if abs(_boxes[i+1][0][1] - _boxes[i][0][1]) < 10 and \ - (_boxes[i + 1][0][0] < _boxes[i][0][0]): + if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \ + (_boxes[i + 1][0][0] < _boxes[i][0][0]): tmp = _boxes[i] _boxes[i] = _boxes[i + 1] _boxes[i + 1] = tmp diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_server/README.md b/modules/image/text_recognition/chinese_ocr_db_crnn_server/README.md index 389dbb71..ac20c01c 100644 --- a/modules/image/text_recognition/chinese_ocr_db_crnn_server/README.md +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_server/README.md @@ -19,6 +19,15 @@ $ hub run chinese_ocr_db_crnn_server --input_path "/PATH/TO/IMAGE" ## API +### \_\_init\_\_(text_detector_module=None, enable_mkldnn=False) + +构造ChineseOCRDBCRNNServer对象 + +**参数** + +* text_detector_module(str): 文字检测PaddleHub Module名字,如设置为None,则默认使用[chinese_text_detection_db_server Module](https://www.paddlepaddle.org.cn/hubdetail?name=chinese_text_detection_db_server&en_category=TextRecognition)。其作用为检测图片当中的文本。 +* enable_mkldnn(bool): 是否开启mkldnn加速CPU计算。该参数仅在CPU运行下设置有效。默认为False。 + ```python def recognize_text(images=[], paths=[], @@ -26,7 +35,8 @@ def recognize_text(images=[], output_dir='ocr_result', visualization=False, box_thresh=0.5, - text_thresh=0.5) + text_thresh=0.5, + angle_classification_thresh=0.9) ``` 预测API,检测输入图片中的所有中文文本的位置。 @@ -38,6 +48,7 @@ def recognize_text(images=[], * use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量** * box\_thresh (float): 检测文本框置信度的阈值; * text\_thresh (float): 识别中文文本置信度的阈值; +* angle_classification_thresh(float): 文本角度分类置信度的阈值 * visualization (bool): 是否将识别结果保存为图片文件; * output\_dir (str): 图片的保存路径,默认设为 ocr\_result; @@ -128,3 +139,11 @@ pyclipper * 1.0.0 初始发布 + +* 1.0.1 + + 支持mkldnn加速CPU计算 + +* 1.1.0 + + 使用三阶段模型(文本框检测-角度分类-文字识别)识别图片文字。 diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_server/character.py b/modules/image/text_recognition/chinese_ocr_db_crnn_server/character.py index bf6d21f6..ad6b01ba 100644 --- a/modules/image/text_recognition/chinese_ocr_db_crnn_server/character.py +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_server/character.py @@ -22,17 +22,23 @@ class CharacterOps(object): def __init__(self, config): self.character_type = config['character_type'] self.loss_type = config['loss_type'] + self.max_text_len = config['max_text_length'] if self.character_type == "en": self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" dict_character = list(self.character_str) elif self.character_type == "ch": character_dict_path = config['character_dict_path'] + add_space = False + if 'use_space_char' in config: + add_space = config['use_space_char'] self.character_str = "" with open(character_dict_path, "rb") as fin: lines = fin.readlines() for line in lines: - line = line.decode('utf-8').strip("\n") + line = line.decode('utf-8').strip("\n").strip("\r\n") self.character_str += line + if add_space: + self.character_str += " " dict_character = list(self.character_str) elif self.character_type == "en_sensitive": # same with ASTER setting (use 94 char). @@ -46,6 +52,8 @@ class CharacterOps(object): self.end_str = "eos" if self.loss_type == "attention": dict_character = [self.beg_str, self.end_str] + dict_character + elif self.loss_type == "srn": + dict_character = dict_character + [self.beg_str, self.end_str] self.dict = {} for i, char in enumerate(dict_character): self.dict[char] = i @@ -90,7 +98,7 @@ class CharacterOps(object): if is_remove_duplicate: if idx > 0 and text_index[idx - 1] == text_index[idx]: continue - char_list.append(self.character[text_index[idx]]) + char_list.append(self.character[int(text_index[idx])]) text = ''.join(char_list) return text @@ -134,6 +142,36 @@ def cal_predicts_accuracy(char_ops, preds, preds_lod, labels, labels_lod, is_rem return acc, acc_num, img_num +def cal_predicts_accuracy_srn(char_ops, preds, labels, max_text_len, is_debug=False): + acc_num = 0 + img_num = 0 + + char_num = char_ops.get_char_num() + + total_len = preds.shape[0] + img_num = int(total_len / max_text_len) + for i in range(img_num): + cur_label = [] + cur_pred = [] + for j in range(max_text_len): + if labels[j + i * max_text_len] != int(char_num - 1): #0 + cur_label.append(labels[j + i * max_text_len][0]) + else: + break + + for j in range(max_text_len + 1): + if j < len(cur_label) and preds[j + i * max_text_len][0] != cur_label[j]: + break + elif j == len(cur_label) and j == max_text_len: + acc_num += 1 + break + elif j == len(cur_label) and preds[j + i * max_text_len][0] == int(char_num - 1): + acc_num += 1 + break + acc = acc_num * 1.0 / img_num + return acc, acc_num, img_num + + def convert_rec_attention_infer_res(preds): img_num = preds.shape[0] target_lod = [0] diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py b/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py index 2ffd632b..4a9e5a29 100644 --- a/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py @@ -25,32 +25,44 @@ from chinese_ocr_db_crnn_server.utils import base64_to_cv2, draw_ocr, get_image_ @moduleinfo( name="chinese_ocr_db_crnn_server", - version="1.0.2", + version="1.1.0", summary= "The module can recognize the chinese texts in an image. Firstly, it will detect the text box positions based on the differentiable_binarization_chn module. Then it recognizes the chinese texts. ", author="paddle-dev", author_email="paddle-dev@baidu.com", type="cv/text_recognition") class ChineseOCRDBCRNNServer(hub.Module): - def _initialize(self, text_detector_module=None): + def _initialize(self, text_detector_module=None, enable_mkldnn=False): """ initialize with the necessary elements """ self.character_dict_path = os.path.join(self.directory, 'assets', 'ppocr_keys_v1.txt') - char_ops_params = {'character_type': 'ch', 'character_dict_path': self.character_dict_path, 'loss_type': 'ctc'} + char_ops_params = { + 'character_type': 'ch', + 'character_dict_path': self.character_dict_path, + 'loss_type': 'ctc', + 'max_text_length': 25, + 'use_space_char': True + } self.char_ops = CharacterOps(char_ops_params) self.rec_image_shape = [3, 32, 320] self._text_detector_module = text_detector_module self.font_file = os.path.join(self.directory, 'assets', 'simfang.ttf') - self.pretrained_model_path = os.path.join(self.directory, 'assets', 'ch_rec_r34_vd_crnn') - self._set_config() + self.enable_mkldnn = enable_mkldnn - def _set_config(self): + self.rec_pretrained_model_path = os.path.join(self.directory, 'inference_model', 'character_rec') + self.cls_pretrained_model_path = os.path.join(self.directory, 'inference_model', 'angle_cls') + self.rec_predictor, self.rec_input_tensor, self.rec_output_tensors = self._set_config( + self.rec_pretrained_model_path) + self.cls_predictor, self.cls_input_tensor, self.cls_output_tensors = self._set_config( + self.cls_pretrained_model_path) + + def _set_config(self, pretrained_model_path): """ - predictor config setting + predictor config path """ - model_file_path = os.path.join(self.pretrained_model_path, 'model') - params_file_path = os.path.join(self.pretrained_model_path, 'params') + model_file_path = os.path.join(pretrained_model_path, 'model') + params_file_path = os.path.join(pretrained_model_path, 'params') config = AnalysisConfig(model_file_path, params_file_path) try: @@ -64,20 +76,26 @@ class ChineseOCRDBCRNNServer(hub.Module): config.enable_use_gpu(8000, 0) else: config.disable_gpu() + if self.enable_mkldnn: + # cache 10 different shapes for mkldnn to avoid memory leak + config.set_mkldnn_cache_capacity(10) + config.enable_mkldnn() config.disable_glog_info() - - # use zero copy config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") config.switch_use_feed_fetch_ops(False) - self.predictor = create_paddle_predictor(config) - input_names = self.predictor.get_input_names() - self.input_tensor = self.predictor.get_input_tensor(input_names[0]) - output_names = self.predictor.get_output_names() - self.output_tensors = [] + + predictor = create_paddle_predictor(config) + + input_names = predictor.get_input_names() + input_tensor = predictor.get_input_tensor(input_names[0]) + output_names = predictor.get_output_names() + output_tensors = [] for output_name in output_names: - output_tensor = self.predictor.get_output_tensor(output_name) - self.output_tensors.append(output_tensor) + output_tensor = predictor.get_output_tensor(output_name) + output_tensors.append(output_tensor) + + return predictor, input_tensor, output_tensors @property def text_detector_module(self): @@ -85,7 +103,8 @@ class ChineseOCRDBCRNNServer(hub.Module): text detect module """ if not self._text_detector_module: - self._text_detector_module = hub.Module(name='chinese_text_detection_db_server') + self._text_detector_module = hub.Module( + name='chinese_text_detection_db_server', enable_mkldnn=self.enable_mkldnn, version='1.0.2') return self._text_detector_module def read_images(self, paths=[]): @@ -100,6 +119,7 @@ class ChineseOCRDBCRNNServer(hub.Module): return images def get_rotate_crop_image(self, img, points): + ''' img_height, img_width = img.shape[0:2] left = int(np.min(points[:, 0])) right = int(np.max(points[:, 0])) @@ -108,20 +128,40 @@ class ChineseOCRDBCRNNServer(hub.Module): img_crop = img[top:bottom, left:right, :].copy() points[:, 0] = points[:, 0] - left points[:, 1] = points[:, 1] - top - img_crop_width = int(np.linalg.norm(points[0] - points[1])) - img_crop_height = int(np.linalg.norm(points[0] - points[3])) - pts_std = np.float32([[0, 0], [img_crop_width, 0],\ - [img_crop_width, img_crop_height], [0, img_crop_height]]) + ''' + img_crop_width = int(max(np.linalg.norm(points[0] - points[1]), np.linalg.norm(points[2] - points[3]))) + img_crop_height = int(max(np.linalg.norm(points[0] - points[3]), np.linalg.norm(points[1] - points[2]))) + pts_std = np.float32([[0, 0], [img_crop_width, 0], [img_crop_width, img_crop_height], [0, img_crop_height]]) M = cv2.getPerspectiveTransform(points, pts_std) - dst_img = cv2.warpPerspective(img_crop, M, (img_crop_width, img_crop_height), borderMode=cv2.BORDER_REPLICATE) + dst_img = cv2.warpPerspective( + img, M, (img_crop_width, img_crop_height), borderMode=cv2.BORDER_REPLICATE, flags=cv2.INTER_CUBIC) dst_img_height, dst_img_width = dst_img.shape[0:2] if dst_img_height * 1.0 / dst_img_width >= 1.5: dst_img = np.rot90(dst_img) return dst_img - def resize_norm_img(self, img, max_wh_ratio): + def resize_norm_img_rec(self, img, max_wh_ratio): imgC, imgH, imgW = self.rec_image_shape - imgW = int(32 * max_wh_ratio) + assert imgC == img.shape[2] + imgW = int((32 * max_wh_ratio)) + h, w = img.shape[:2] + ratio = w / float(h) + if math.ceil(imgH * ratio) > imgW: + resized_w = imgW + else: + resized_w = int(math.ceil(imgH * ratio)) + resized_image = cv2.resize(img, (resized_w, imgH)) + resized_image = resized_image.astype('float32') + resized_image = resized_image.transpose((2, 0, 1)) / 255 + resized_image -= 0.5 + resized_image /= 0.5 + padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) + padding_im[:, :, 0:resized_w] = resized_image + return padding_im + + def resize_norm_img_cls(self, img): + cls_image_shape = [3, 48, 192] + imgC, imgH, imgW = cls_image_shape h = img.shape[0] w = img.shape[1] ratio = w / float(h) @@ -131,7 +171,11 @@ class ChineseOCRDBCRNNServer(hub.Module): resized_w = int(math.ceil(imgH * ratio)) resized_image = cv2.resize(img, (resized_w, imgH)) resized_image = resized_image.astype('float32') - resized_image = resized_image.transpose((2, 0, 1)) / 255 + if cls_image_shape[0] == 1: + resized_image = resized_image / 255 + resized_image = resized_image[np.newaxis, :] + else: + resized_image = resized_image.transpose((2, 0, 1)) / 255 resized_image -= 0.5 resized_image /= 0.5 padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) @@ -145,7 +189,8 @@ class ChineseOCRDBCRNNServer(hub.Module): output_dir='ocr_result', visualization=False, box_thresh=0.5, - text_thresh=0.5): + text_thresh=0.5, + angle_classification_thresh=0.9): """ Get the chinese texts in the predicted images. Args: @@ -156,7 +201,9 @@ class ChineseOCRDBCRNNServer(hub.Module): output_dir (str): The directory to store output images. visualization (bool): Whether to save image or not. box_thresh(float): the threshold of the detected text box's confidence - text_thresh(float): the threshold of the recognize chinese texts' confidence + text_thresh(float): the threshold of the chinese text recognition confidence + angle_classification_thresh(float): the threshold of the angle classification confidence + Returns: res (list): The result of chinese texts and save path of images. """ @@ -182,12 +229,13 @@ class ChineseOCRDBCRNNServer(hub.Module): detection_results = self.text_detector_module.detect_text( images=predicted_data, use_gpu=self.use_gpu, box_thresh=box_thresh) + boxes = [np.array(item['data']).astype(np.float32) for item in detection_results] all_results = [] for index, img_boxes in enumerate(boxes): original_image = predicted_data[index].copy() result = {'save_path': ''} - if img_boxes is None: + if img_boxes.size == 0: result['data'] = [] else: img_crop_list = [] @@ -196,8 +244,10 @@ class ChineseOCRDBCRNNServer(hub.Module): tmp_box = copy.deepcopy(boxes[num_box]) img_crop = self.get_rotate_crop_image(original_image, tmp_box) img_crop_list.append(img_crop) - + img_crop_list, angle_list = self._classify_text( + img_crop_list, angle_classification_thresh=angle_classification_thresh) rec_results = self._recognize_text(img_crop_list) + # if the recognized text confidence score is lower than text_thresh, then drop it rec_res_final = [] for index, res in enumerate(rec_results): @@ -226,7 +276,14 @@ class ChineseOCRDBCRNNServer(hub.Module): results = self.recognize_text(images_decode, **kwargs) return results - def save_result_image(self, original_image, detection_boxes, rec_results, output_dir='ocr_result', text_thresh=0.5): + def save_result_image( + self, + original_image, + detection_boxes, + rec_results, + output_dir='ocr_result', + text_thresh=0.5, + ): image = Image.fromarray(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)) txts = [item[0] for item in rec_results] scores = [item[1] for item in rec_results] @@ -241,32 +298,84 @@ class ChineseOCRDBCRNNServer(hub.Module): cv2.imwrite(save_file_path, draw_img[:, :, ::-1]) return save_file_path - def _recognize_text(self, image_list): - img_num = len(image_list) + def _classify_text(self, image_list, angle_classification_thresh=0.9): + img_list = copy.deepcopy(image_list) + img_num = len(img_list) + # Calculate the aspect ratio of all text bars + width_list = [] + for img in img_list: + width_list.append(img.shape[1] / float(img.shape[0])) + # Sorting can speed up the cls process + indices = np.argsort(np.array(width_list)) + + cls_res = [['', 0.0]] * img_num batch_num = 30 - rec_res = [] - predict_time = 0 for beg_img_no in range(0, img_num, batch_num): end_img_no = min(img_num, beg_img_no + batch_num) norm_img_batch = [] max_wh_ratio = 0 for ino in range(beg_img_no, end_img_no): - h, w = image_list[ino].shape[0:2] - wh_ratio = w / h + h, w = img_list[indices[ino]].shape[0:2] + wh_ratio = w * 1.0 / h max_wh_ratio = max(max_wh_ratio, wh_ratio) for ino in range(beg_img_no, end_img_no): - norm_img = self.resize_norm_img(image_list[ino], max_wh_ratio) + norm_img = self.resize_norm_img_cls(img_list[indices[ino]]) norm_img = norm_img[np.newaxis, :] norm_img_batch.append(norm_img) norm_img_batch = np.concatenate(norm_img_batch) norm_img_batch = norm_img_batch.copy() - self.input_tensor.copy_from_cpu(norm_img_batch) - self.predictor.zero_copy_run() - rec_idx_batch = self.output_tensors[0].copy_to_cpu() - rec_idx_lod = self.output_tensors[0].lod()[0] - predict_batch = self.output_tensors[1].copy_to_cpu() - predict_lod = self.output_tensors[1].lod()[0] + self.cls_input_tensor.copy_from_cpu(norm_img_batch) + self.cls_predictor.zero_copy_run() + + prob_out = self.cls_output_tensors[0].copy_to_cpu() + label_out = self.cls_output_tensors[1].copy_to_cpu() + if len(label_out.shape) != 1: + prob_out, label_out = label_out, prob_out + label_list = ['0', '180'] + for rno in range(len(label_out)): + label_idx = label_out[rno] + score = prob_out[rno][label_idx] + label = label_list[label_idx] + cls_res[indices[beg_img_no + rno]] = [label, score] + if '180' in label and score > angle_classification_thresh: + img_list[indices[beg_img_no + rno]] = cv2.rotate(img_list[indices[beg_img_no + rno]], 1) + return img_list, cls_res + + def _recognize_text(self, img_list): + img_num = len(img_list) + # Calculate the aspect ratio of all text bars + width_list = [] + for img in img_list: + width_list.append(img.shape[1] / float(img.shape[0])) + # Sorting can speed up the recognition process + indices = np.argsort(np.array(width_list)) + + rec_res = [['', 0.0]] * img_num + batch_num = 30 + for beg_img_no in range(0, img_num, batch_num): + end_img_no = min(img_num, beg_img_no + batch_num) + norm_img_batch = [] + max_wh_ratio = 0 + for ino in range(beg_img_no, end_img_no): + h, w = img_list[indices[ino]].shape[0:2] + wh_ratio = w * 1.0 / h + max_wh_ratio = max(max_wh_ratio, wh_ratio) + for ino in range(beg_img_no, end_img_no): + norm_img = self.resize_norm_img_rec(img_list[indices[ino]], max_wh_ratio) + norm_img = norm_img[np.newaxis, :] + norm_img_batch.append(norm_img) + + norm_img_batch = np.concatenate(norm_img_batch, axis=0) + norm_img_batch = norm_img_batch.copy() + + self.rec_input_tensor.copy_from_cpu(norm_img_batch) + self.rec_predictor.zero_copy_run() + + rec_idx_batch = self.rec_output_tensors[0].copy_to_cpu() + rec_idx_lod = self.rec_output_tensors[0].lod()[0] + predict_batch = self.rec_output_tensors[1].copy_to_cpu() + predict_lod = self.rec_output_tensors[1].lod()[0] for rno in range(len(rec_idx_lod) - 1): beg = rec_idx_lod[rno] end = rec_idx_lod[rno + 1] @@ -281,14 +390,17 @@ class ChineseOCRDBCRNNServer(hub.Module): if len(valid_ind) == 0: continue score = np.mean(probs[valid_ind, ind[valid_ind]]) - rec_res.append([preds_text, score]) + # rec_res.append([preds_text, score]) + rec_res[indices[beg_img_no + rno]] = [preds_text, score] - return rec_res + return rec_res def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): detector_dir = os.path.join(dirname, 'text_detector') + classifier_dir = os.path.join(dirname, 'angle_classifier') recognizer_dir = os.path.join(dirname, 'text_recognizer') self._save_detector_model(detector_dir, model_filename, params_filename, combined) + self._save_classifier_model(classifier_dir, model_filename, params_filename, combined) self._save_recognizer_model(recognizer_dir, model_filename, params_filename, combined) logger.info("The inference model has been saved in the path {}".format(os.path.realpath(dirname))) @@ -302,10 +414,34 @@ class ChineseOCRDBCRNNServer(hub.Module): place = fluid.CPUPlace() exe = fluid.Executor(place) - model_file_path = os.path.join(self.pretrained_model_path, 'model') - params_file_path = os.path.join(self.pretrained_model_path, 'params') + model_file_path = os.path.join(self.rec_pretrained_model_path, 'model') + params_file_path = os.path.join(self.rec_pretrained_model_path, 'params') + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.rec_pretrained_model_path, + model_filename=model_file_path, + params_filename=params_file_path, + executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + def _save_classifier_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + model_file_path = os.path.join(self.cls_pretrained_model_path, 'model') + params_file_path = os.path.join(self.cls_pretrained_model_path, 'params') program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.pretrained_model_path, + dirname=self.cls_pretrained_model_path, model_filename=model_file_path, params_filename=params_file_path, executor=exe) @@ -361,8 +497,7 @@ class ChineseOCRDBCRNNServer(hub.Module): if __name__ == '__main__': - ocr = ChineseOCRDBCRNNServer() - print(ocr.name) + ocr = ChineseOCRDBCRNNServer(enable_mkldnn=False) image_path = [ '/mnt/zhangxuefei/PaddleOCR/doc/imgs/11.jpg', '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg', '/mnt/zhangxuefei/PaddleOCR/doc/imgs/test_image.jpg' diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_server/utils.py b/modules/image/text_recognition/chinese_ocr_db_crnn_server/utils.py index 5574fbab..5a90b27d 100644 --- a/modules/image/text_recognition/chinese_ocr_db_crnn_server/utils.py +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_server/utils.py @@ -162,8 +162,8 @@ def sorted_boxes(dt_boxes): _boxes = list(sorted_boxes) for i in range(num_boxes - 1): - if abs(_boxes[i+1][0][1] - _boxes[i][0][1]) < 10 and \ - (_boxes[i + 1][0][0] < _boxes[i][0][0]): + if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \ + (_boxes[i + 1][0][0] < _boxes[i][0][0]): tmp = _boxes[i] _boxes[i] = _boxes[i + 1] _boxes[i + 1] = tmp diff --git a/modules/image/text_recognition/chinese_text_detection_db_mobile/README.md b/modules/image/text_recognition/chinese_text_detection_db_mobile/README.md index 8dd066c5..9ec79fbe 100644 --- a/modules/image/text_recognition/chinese_text_detection_db_mobile/README.md +++ b/modules/image/text_recognition/chinese_text_detection_db_mobile/README.md @@ -19,6 +19,17 @@ $ hub run chinese_text_detection_db_mobile --input_path "/PATH/TO/IMAGE" ## API +## API + +### \_\_init\_\_(enable_mkldnn=False) + +构造ChineseTextDetectionDB对象 + +**参数** + +* enable_mkldnn(bool): 是否开启mkldnn加速CPU计算。该参数仅在CPU运行下设置有效。默认为False。 + + ```python def detect_text(paths=[], images=[], @@ -51,7 +62,7 @@ def detect_text(paths=[], import paddlehub as hub import cv2 -text_detector = hub.Module(name="chinese_text_detection_db_mobile") +text_detector = hub.Module(name="chinese_text_detection_db_mobile", enable_mkldnn=True) result = text_detector.detect_text(images=[cv2.imread('/PATH/TO/IMAGE')]) # or @@ -121,3 +132,15 @@ pyclipper * 1.0.1 修复使用在线服务调用模型失败问题 + +* 1.0.2 + + 支持mkldnn加速CPU计算 + +* 1.0.3 + + 增加更多预训练数据,更新预训练参数 + +1.1.0 + +使用超轻量级的三阶段模型(文本框检测-角度分类-文字识别)识别图片文字。 diff --git a/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py b/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py index 3cebc67f..14fd6137 100644 --- a/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py +++ b/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py @@ -29,18 +29,20 @@ def base64_to_cv2(b64str): @moduleinfo( name="chinese_text_detection_db_mobile", - version="1.0.1", + version="1.0.3", summary= "The module aims to detect chinese text position in the image, which is based on differentiable_binarization algorithm.", author="paddle-dev", author_email="paddle-dev@baidu.com", type="cv/text_recognition") class ChineseTextDetectionDB(hub.Module): - def _initialize(self): + def _initialize(self, enable_mkldnn=False): """ initialize with the necessary elements """ self.pretrained_model_path = os.path.join(self.directory, 'inference_model') + self.enable_mkldnn = enable_mkldnn + self._set_config() def check_requirements(self): @@ -70,6 +72,11 @@ class ChineseTextDetectionDB(hub.Module): config.enable_use_gpu(8000, 0) else: config.disable_gpu() + config.set_cpu_math_library_num_threads(6) + if self.enable_mkldnn: + # cache 10 different shapes for mkldnn to avoid memory leak + config.set_mkldnn_cache_capacity(10) + config.enable_mkldnn() config.disable_glog_info() @@ -96,19 +103,18 @@ class ChineseTextDetectionDB(hub.Module): images.append(img) return images + def clip_det_res(self, points, img_height, img_width): + for pno in range(points.shape[0]): + points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1)) + points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1)) + return points + def filter_tag_det_res(self, dt_boxes, image_shape): img_height, img_width = image_shape[0:2] dt_boxes_new = [] for box in dt_boxes: box = self.order_points_clockwise(box) - left = int(np.min(box[:, 0])) - right = int(np.max(box[:, 0])) - top = int(np.min(box[:, 1])) - bottom = int(np.max(box[:, 1])) - bbox_height = bottom - top - bbox_width = right - left - diffh = math.fabs(box[0, 1] - box[1, 1]) - diffw = math.fabs(box[0, 0] - box[3, 0]) + box = self.clip_det_res(box, img_height, img_width) rect_width = int(np.linalg.norm(box[0] - box[1])) rect_height = int(np.linalg.norm(box[0] - box[3])) if rect_width <= 10 or rect_height <= 10: @@ -162,7 +168,7 @@ class ChineseTextDetectionDB(hub.Module): """ self.check_requirements() - from chinese_text_detection_db_mobile.processor import DBPreProcess, DBPostProcess, draw_boxes, get_image_ext + from chinese_text_detection_db_mobile.processor import DBProcessTest, DBPostProcess, draw_boxes, get_image_ext if use_gpu: try: @@ -182,13 +188,19 @@ class ChineseTextDetectionDB(hub.Module): assert predicted_data != [], "There is not any image to be predicted. Please check the input data." - preprocessor = DBPreProcess() - postprocessor = DBPostProcess(box_thresh) + preprocessor = DBProcessTest(params={'max_side_len': 960}) + postprocessor = DBPostProcess(params={ + 'thresh': 0.3, + 'box_thresh': 0.5, + 'max_candidates': 1000, + 'unclip_ratio': 2.0 + }) all_imgs = [] all_ratios = [] all_results = [] for original_image in predicted_data: + ori_im = original_image.copy() im, ratio_list = preprocessor(original_image) res = {'save_path': ''} if im is None: @@ -196,11 +208,20 @@ class ChineseTextDetectionDB(hub.Module): else: im = im.copy() - starttime = time.time() self.input_tensor.copy_from_cpu(im) self.predictor.zero_copy_run() - data_out = self.output_tensors[0].copy_to_cpu() - dt_boxes_list = postprocessor(data_out, [ratio_list]) + + outputs = [] + for output_tensor in self.output_tensors: + output = output_tensor.copy_to_cpu() + outputs.append(output) + + outs_dict = {} + outs_dict['maps'] = outputs[0] + + # data_out = self.output_tensors[0].copy_to_cpu() + dt_boxes_list = postprocessor(outs_dict, [ratio_list]) + dt_boxes = dt_boxes_list[0] boxes = self.filter_tag_det_res(dt_boxes_list[0], original_image.shape) res['data'] = boxes.astype(np.int).tolist() @@ -298,7 +319,7 @@ class ChineseTextDetectionDB(hub.Module): if __name__ == '__main__': db = ChineseTextDetectionDB() image_path = [ - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/11.jpg', '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg', + '/mnt/zhangxuefei/PaddleOCR/doc/imgs/2.jpg', '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg', '/mnt/zhangxuefei/PaddleOCR/doc/imgs/test_image.jpg' ] res = db.detect_text(paths=image_path, visualization=True) diff --git a/modules/image/text_recognition/chinese_text_detection_db_mobile/processor.py b/modules/image/text_recognition/chinese_text_detection_db_mobile/processor.py index a213f1ef..5c6df83d 100644 --- a/modules/image/text_recognition/chinese_text_detection_db_mobile/processor.py +++ b/modules/image/text_recognition/chinese_text_detection_db_mobile/processor.py @@ -12,25 +12,43 @@ import numpy as np import pyclipper -class DBPreProcess(object): - def __init__(self, max_side_len=960): - self.max_side_len = max_side_len +class DBProcessTest(object): + """ + DB pre-process for Test mode + """ + + def __init__(self, params): + super(DBProcessTest, self).__init__() + self.resize_type = 0 + if 'test_image_shape' in params: + self.image_shape = params['test_image_shape'] + # print(self.image_shape) + self.resize_type = 1 + if 'max_side_len' in params: + self.max_side_len = params['max_side_len'] + else: + self.max_side_len = 2400 - def resize_image_type(self, im): + def resize_image_type0(self, im): """ resize image to a size multiple of 32 which is required by the network + args: + img(array): array with shape [h, w, c] + return(tuple): + img, (ratio_h, ratio_w) """ + max_side_len = self.max_side_len h, w, _ = im.shape resize_w = w resize_h = h # limit the max side - if max(resize_h, resize_w) > self.max_side_len: + if max(resize_h, resize_w) > max_side_len: if resize_h > resize_w: - ratio = float(self.max_side_len) / resize_h + ratio = float(max_side_len) / resize_h else: - ratio = float(self.max_side_len) / resize_w + ratio = float(max_side_len) / resize_w else: ratio = 1. resize_h = int(resize_h * ratio) @@ -58,19 +76,34 @@ class DBPreProcess(object): ratio_w = resize_w / float(w) return im, (ratio_h, ratio_w) + def resize_image_type1(self, im): + resize_h, resize_w = self.image_shape + ori_h, ori_w = im.shape[:2] # (h, w, c) + im = cv2.resize(im, (int(resize_w), int(resize_h))) + ratio_h = float(resize_h) / ori_h + ratio_w = float(resize_w) / ori_w + return im, (ratio_h, ratio_w) + def normalize(self, im): img_mean = [0.485, 0.456, 0.406] img_std = [0.229, 0.224, 0.225] im = im.astype(np.float32, copy=False) im = im / 255 - im -= img_mean - im /= img_std + im[:, :, 0] -= img_mean[0] + im[:, :, 1] -= img_mean[1] + im[:, :, 2] -= img_mean[2] + im[:, :, 0] /= img_std[0] + im[:, :, 1] /= img_std[1] + im[:, :, 2] /= img_std[2] channel_swap = (2, 0, 1) im = im.transpose(channel_swap) return im def __call__(self, im): - im, (ratio_h, ratio_w) = self.resize_image_type(im) + if self.resize_type == 0: + im, (ratio_h, ratio_w) = self.resize_image_type0(im) + else: + im, (ratio_h, ratio_w) = self.resize_image_type1(im) im = self.normalize(im) im = im[np.newaxis, :] return [im, (ratio_h, ratio_w)] @@ -81,10 +114,11 @@ class DBPostProcess(object): The post process for Differentiable Binarization (DB). """ - def __init__(self, thresh=0.3, box_thresh=0.5, max_candidates=1000): - self.thresh = thresh - self.box_thresh = box_thresh - self.max_candidates = max_candidates + def __init__(self, params): + self.thresh = params['thresh'] + self.box_thresh = params['box_thresh'] + self.max_candidates = params['max_candidates'] + self.unclip_ratio = params['unclip_ratio'] self.min_size = 3 def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height): @@ -131,7 +165,8 @@ class DBPostProcess(object): scores[index] = score return boxes, scores - def unclip(self, box, unclip_ratio=2.0): + def unclip(self, box): + unclip_ratio = self.unclip_ratio poly = Polygon(box) distance = poly.area * unclip_ratio / poly.length offset = pyclipper.PyclipperOffset() @@ -174,8 +209,10 @@ class DBPostProcess(object): cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1) return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] - def __call__(self, predictions, ratio_list): - pred = predictions[:, 0, :, :] + def __call__(self, outs_dict, ratio_list): + pred = outs_dict['maps'] + + pred = pred[:, 0, :, :] segmentation = pred > self.thresh boxes_batch = [] diff --git a/modules/image/text_recognition/chinese_text_detection_db_server/README.md b/modules/image/text_recognition/chinese_text_detection_db_server/README.md index 37a70e19..75618083 100644 --- a/modules/image/text_recognition/chinese_text_detection_db_server/README.md +++ b/modules/image/text_recognition/chinese_text_detection_db_server/README.md @@ -19,6 +19,14 @@ $ hub run chinese_text_detection_db_server --input_path "/PATH/TO/IMAGE" ## API +### \_\_init\_\_(enable_mkldnn=False) + +构造ChineseTextDetectionDBServer对象 + +**参数** + +* enable_mkldnn(bool): 是否开启mkldnn加速CPU计算。该参数仅在CPU运行下设置有效。默认为False。 + ```python def detect_text(paths=[], images=[], @@ -117,3 +125,11 @@ pyclipper * 1.0.0 初始发布 + +* 1.0.2 + + 支持mkldnn加速CPU计算 + +* 1.0.3 + + 增加更多预训练数据,更新预训练参数 diff --git a/modules/image/text_recognition/chinese_text_detection_db_server/module.py b/modules/image/text_recognition/chinese_text_detection_db_server/module.py index 60c54eb6..91ac7f32 100644 --- a/modules/image/text_recognition/chinese_text_detection_db_server/module.py +++ b/modules/image/text_recognition/chinese_text_detection_db_server/module.py @@ -29,18 +29,20 @@ def base64_to_cv2(b64str): @moduleinfo( name="chinese_text_detection_db_server", - version="1.0.0", + version="1.0.2", summary= "The module aims to detect chinese text position in the image, which is based on differentiable_binarization algorithm.", author="paddle-dev", author_email="paddle-dev@baidu.com", type="cv/text_recognition") class ChineseTextDetectionDBServer(hub.Module): - def _initialize(self): + def _initialize(self, enable_mkldnn=False): """ initialize with the necessary elements """ - self.pretrained_model_path = os.path.join(self.directory, 'ch_det_r50_vd_db') + self.pretrained_model_path = os.path.join(self.directory, 'inference_model') + self.enable_mkldnn = enable_mkldnn + self._set_config() def check_requirements(self): @@ -70,6 +72,8 @@ class ChineseTextDetectionDBServer(hub.Module): config.enable_use_gpu(8000, 0) else: config.disable_gpu() + if self.enable_mkldnn: + config.enable_mkldnn() config.disable_glog_info() diff --git a/modules/text/language_model/lda_news/document.py b/modules/text/language_model/lda_news/document.py index 98eae505..4476230a 100644 --- a/modules/text/language_model/lda_news/document.py +++ b/modules/text/language_model/lda_news/document.py @@ -5,6 +5,7 @@ class Topic(object): """Basic data structure of topic, contains topic id and corresponding probability. """ + def __init__(self, tid, prob): self.tid = tid # topic id self.prob = prob # topic probability @@ -14,6 +15,7 @@ class Token(object): """Basic storage unit of LDA documents, contains word id and corresponding topic. """ + def __init__(self, topic, id): self.topic = topic self.id = id @@ -23,6 +25,7 @@ class Sentence(object): """Basic storage unit of SentenceLDA documents, contains word ids of the sentence and its corresponding topic id. """ + def __init__(self, topic, tokens): self.topic = topic self.tokens = tokens @@ -31,6 +34,7 @@ class Sentence(object): class LDADoc(object): """The storage structure of LDA model's inference result. """ + def __init__(self): self._num_topics = None # Number of topics. self._num_accum = None # Number of accumulated sample rounds. @@ -116,8 +120,8 @@ class LDADoc(object): dense_dist = np.zeros(self._num_topics) if self.size() == 0: return dense_dist - dense_dist = (self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / (self.size() + - self._alpha * self._num_topics) + dense_dist = (self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / ( + self.size() + self._alpha * self._num_topics) return dense_dist def accumulate_topic_num(self): @@ -129,6 +133,7 @@ class SLDADoc(LDADoc): """Sentence LDA Document, inherited from LDADoc. Add add_sentence interface. """ + def __init__(self): super().__init__() self.__sentences = None diff --git a/modules/text/language_model/lda_news/model.py b/modules/text/language_model/lda_news/model.py index 11f186d4..3ef089f9 100644 --- a/modules/text/language_model/lda_news/model.py +++ b/modules/text/language_model/lda_news/model.py @@ -11,6 +11,7 @@ from lda_news.vocab import Vocab, WordCount class TopicModel(object): """Storage Structure of Topic model, including vocabulary and word topic count. """ + def __init__(self, model_dir, config): """ Args: diff --git a/modules/text/language_model/lda_news/module.py b/modules/text/language_model/lda_news/module.py index 1a0e5f8f..6066ce0d 100644 --- a/modules/text/language_model/lda_news/module.py +++ b/modules/text/language_model/lda_news/module.py @@ -105,9 +105,8 @@ class TopicModel(hub.Module): wd = WordAndDis() wd.word = word sm = SemanticMatching() - wd.distance = sm.likelihood_based_similarity(terms=[word], - doc_topic_dist=doc_topic_dist, - model=self.__engine.get_model()) + wd.distance = sm.likelihood_based_similarity( + terms=[word], doc_topic_dist=doc_topic_dist, model=self.__engine.get_model()) items.append(wd) def take_elem(word_dis): diff --git a/modules/text/language_model/lda_news/tokenizer.py b/modules/text/language_model/lda_news/tokenizer.py index c07b3c4d..e59037d8 100644 --- a/modules/text/language_model/lda_news/tokenizer.py +++ b/modules/text/language_model/lda_news/tokenizer.py @@ -5,6 +5,7 @@ class Tokenizer(object): """Base tokenizer class. """ + def __init__(self): pass @@ -18,6 +19,7 @@ class SimpleTokenizer(Tokenizer): Notes: This tokenizer can only recognize the words in the corresponding vocab file. """ + def __init__(self, vocab_path): super().__init__() self.__max_word_len = 0 diff --git a/modules/text/language_model/lda_news/util.py b/modules/text/language_model/lda_news/util.py index e589602d..9f1ebdab 100644 --- a/modules/text/language_model/lda_news/util.py +++ b/modules/text/language_model/lda_news/util.py @@ -46,6 +46,7 @@ def rand_k(k): def timeit(f): """Return time cost of function f. """ + def timed(*args, **kwargs): start_time = time.time() result = f(*args, **kwargs) diff --git a/modules/text/language_model/lda_news/vose_alias.py b/modules/text/language_model/lda_news/vose_alias.py index be80ee8d..e4f158b6 100644 --- a/modules/text/language_model/lda_news/vose_alias.py +++ b/modules/text/language_model/lda_news/vose_alias.py @@ -6,6 +6,7 @@ from lda_news.util import rand, rand_k class VoseAlias(object): """Vose's Alias Method. """ + def __init__(self): self.__alias = None self.__prob = None # np.array diff --git a/modules/text/language_model/lda_novel/document.py b/modules/text/language_model/lda_novel/document.py index 98eae505..4476230a 100644 --- a/modules/text/language_model/lda_novel/document.py +++ b/modules/text/language_model/lda_novel/document.py @@ -5,6 +5,7 @@ class Topic(object): """Basic data structure of topic, contains topic id and corresponding probability. """ + def __init__(self, tid, prob): self.tid = tid # topic id self.prob = prob # topic probability @@ -14,6 +15,7 @@ class Token(object): """Basic storage unit of LDA documents, contains word id and corresponding topic. """ + def __init__(self, topic, id): self.topic = topic self.id = id @@ -23,6 +25,7 @@ class Sentence(object): """Basic storage unit of SentenceLDA documents, contains word ids of the sentence and its corresponding topic id. """ + def __init__(self, topic, tokens): self.topic = topic self.tokens = tokens @@ -31,6 +34,7 @@ class Sentence(object): class LDADoc(object): """The storage structure of LDA model's inference result. """ + def __init__(self): self._num_topics = None # Number of topics. self._num_accum = None # Number of accumulated sample rounds. @@ -116,8 +120,8 @@ class LDADoc(object): dense_dist = np.zeros(self._num_topics) if self.size() == 0: return dense_dist - dense_dist = (self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / (self.size() + - self._alpha * self._num_topics) + dense_dist = (self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / ( + self.size() + self._alpha * self._num_topics) return dense_dist def accumulate_topic_num(self): @@ -129,6 +133,7 @@ class SLDADoc(LDADoc): """Sentence LDA Document, inherited from LDADoc. Add add_sentence interface. """ + def __init__(self): super().__init__() self.__sentences = None diff --git a/modules/text/language_model/lda_novel/model.py b/modules/text/language_model/lda_novel/model.py index 47b0c4ec..f16962be 100644 --- a/modules/text/language_model/lda_novel/model.py +++ b/modules/text/language_model/lda_novel/model.py @@ -11,6 +11,7 @@ from lda_novel.vocab import Vocab, WordCount class TopicModel(object): """Storage Structure of Topic model, including vocabulary and word topic count. """ + def __init__(self, model_dir, config): """ Args: diff --git a/modules/text/language_model/lda_novel/module.py b/modules/text/language_model/lda_novel/module.py index c10d4489..ed211ac4 100644 --- a/modules/text/language_model/lda_novel/module.py +++ b/modules/text/language_model/lda_novel/module.py @@ -105,9 +105,8 @@ class TopicModel(hub.Module): wd = WordAndDis() wd.word = word sm = SemanticMatching() - wd.distance = sm.likelihood_based_similarity(terms=[word], - doc_topic_dist=doc_topic_dist, - model=self.__engine.get_model()) + wd.distance = sm.likelihood_based_similarity( + terms=[word], doc_topic_dist=doc_topic_dist, model=self.__engine.get_model()) items.append(wd) def take_elem(word_dis): diff --git a/modules/text/language_model/lda_novel/tokenizer.py b/modules/text/language_model/lda_novel/tokenizer.py index 1d9afabc..585aed88 100644 --- a/modules/text/language_model/lda_novel/tokenizer.py +++ b/modules/text/language_model/lda_novel/tokenizer.py @@ -7,6 +7,7 @@ from paddlehub.common.logger import logger class Tokenizer(object): """Base tokenizer class. """ + def __init__(self): pass @@ -20,6 +21,7 @@ class SimpleTokenizer(Tokenizer): Notes: This tokenizer can only recognize the words in the corresponding vocab file. """ + def __init__(self, vocab_path): super().__init__() self.__max_word_len = 0 diff --git a/modules/text/language_model/lda_novel/util.py b/modules/text/language_model/lda_novel/util.py index 4b781825..fd294308 100644 --- a/modules/text/language_model/lda_novel/util.py +++ b/modules/text/language_model/lda_novel/util.py @@ -46,6 +46,7 @@ def rand_k(k): def timeit(f): """Return time cost of function f. """ + def timed(*args, **kwargs): start_time = time.time() result = f(*args, **kwargs) diff --git a/modules/text/language_model/lda_novel/vose_alias.py b/modules/text/language_model/lda_novel/vose_alias.py index 4bb7dbb6..ab9ba908 100644 --- a/modules/text/language_model/lda_novel/vose_alias.py +++ b/modules/text/language_model/lda_novel/vose_alias.py @@ -9,6 +9,7 @@ from lda_novel.util import rand, rand_k class VoseAlias(object): """Vose's Alias Method. """ + def __init__(self): self.__alias = None self.__prob = None # np.array diff --git a/modules/text/language_model/lda_webpage/document.py b/modules/text/language_model/lda_webpage/document.py index 98eae505..4476230a 100644 --- a/modules/text/language_model/lda_webpage/document.py +++ b/modules/text/language_model/lda_webpage/document.py @@ -5,6 +5,7 @@ class Topic(object): """Basic data structure of topic, contains topic id and corresponding probability. """ + def __init__(self, tid, prob): self.tid = tid # topic id self.prob = prob # topic probability @@ -14,6 +15,7 @@ class Token(object): """Basic storage unit of LDA documents, contains word id and corresponding topic. """ + def __init__(self, topic, id): self.topic = topic self.id = id @@ -23,6 +25,7 @@ class Sentence(object): """Basic storage unit of SentenceLDA documents, contains word ids of the sentence and its corresponding topic id. """ + def __init__(self, topic, tokens): self.topic = topic self.tokens = tokens @@ -31,6 +34,7 @@ class Sentence(object): class LDADoc(object): """The storage structure of LDA model's inference result. """ + def __init__(self): self._num_topics = None # Number of topics. self._num_accum = None # Number of accumulated sample rounds. @@ -116,8 +120,8 @@ class LDADoc(object): dense_dist = np.zeros(self._num_topics) if self.size() == 0: return dense_dist - dense_dist = (self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / (self.size() + - self._alpha * self._num_topics) + dense_dist = (self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / ( + self.size() + self._alpha * self._num_topics) return dense_dist def accumulate_topic_num(self): @@ -129,6 +133,7 @@ class SLDADoc(LDADoc): """Sentence LDA Document, inherited from LDADoc. Add add_sentence interface. """ + def __init__(self): super().__init__() self.__sentences = None diff --git a/modules/text/language_model/lda_webpage/model.py b/modules/text/language_model/lda_webpage/model.py index 58fd16e0..8c05da14 100644 --- a/modules/text/language_model/lda_webpage/model.py +++ b/modules/text/language_model/lda_webpage/model.py @@ -11,6 +11,7 @@ from lda_webpage.vocab import Vocab, WordCount class TopicModel(object): """Storage Structure of Topic model, including vocabulary and word topic count. """ + def __init__(self, model_dir, config): """ Args: diff --git a/modules/text/language_model/lda_webpage/module.py b/modules/text/language_model/lda_webpage/module.py index 0603e952..ebe1da43 100644 --- a/modules/text/language_model/lda_webpage/module.py +++ b/modules/text/language_model/lda_webpage/module.py @@ -105,9 +105,8 @@ class TopicModel(hub.Module): wd = WordAndDis() wd.word = word sm = SemanticMatching() - wd.distance = sm.likelihood_based_similarity(terms=[word], - doc_topic_dist=doc_topic_dist, - model=self.__engine.get_model()) + wd.distance = sm.likelihood_based_similarity( + terms=[word], doc_topic_dist=doc_topic_dist, model=self.__engine.get_model()) items.append(wd) def take_elem(word_dis): diff --git a/modules/text/language_model/lda_webpage/tokenizer.py b/modules/text/language_model/lda_webpage/tokenizer.py index 1d9afabc..585aed88 100644 --- a/modules/text/language_model/lda_webpage/tokenizer.py +++ b/modules/text/language_model/lda_webpage/tokenizer.py @@ -7,6 +7,7 @@ from paddlehub.common.logger import logger class Tokenizer(object): """Base tokenizer class. """ + def __init__(self): pass @@ -20,6 +21,7 @@ class SimpleTokenizer(Tokenizer): Notes: This tokenizer can only recognize the words in the corresponding vocab file. """ + def __init__(self, vocab_path): super().__init__() self.__max_word_len = 0 diff --git a/modules/text/language_model/lda_webpage/util.py b/modules/text/language_model/lda_webpage/util.py index edd5923b..09892ee7 100644 --- a/modules/text/language_model/lda_webpage/util.py +++ b/modules/text/language_model/lda_webpage/util.py @@ -46,6 +46,7 @@ def rand_k(k): def timeit(f): """Return time cost of function f. """ + def timed(*args, **kwargs): start_time = time.time() result = f(*args, **kwargs) diff --git a/modules/text/language_model/lda_webpage/vose_alias.py b/modules/text/language_model/lda_webpage/vose_alias.py index 66ad348a..f722c692 100644 --- a/modules/text/language_model/lda_webpage/vose_alias.py +++ b/modules/text/language_model/lda_webpage/vose_alias.py @@ -9,6 +9,7 @@ from lda_webpage.util import rand, rand_k class VoseAlias(object): """Vose's Alias Method. """ + def __init__(self): self.__alias = None self.__prob = None # np.array diff --git a/modules/text/language_model/slda_news/document.py b/modules/text/language_model/slda_news/document.py index 98eae505..4476230a 100644 --- a/modules/text/language_model/slda_news/document.py +++ b/modules/text/language_model/slda_news/document.py @@ -5,6 +5,7 @@ class Topic(object): """Basic data structure of topic, contains topic id and corresponding probability. """ + def __init__(self, tid, prob): self.tid = tid # topic id self.prob = prob # topic probability @@ -14,6 +15,7 @@ class Token(object): """Basic storage unit of LDA documents, contains word id and corresponding topic. """ + def __init__(self, topic, id): self.topic = topic self.id = id @@ -23,6 +25,7 @@ class Sentence(object): """Basic storage unit of SentenceLDA documents, contains word ids of the sentence and its corresponding topic id. """ + def __init__(self, topic, tokens): self.topic = topic self.tokens = tokens @@ -31,6 +34,7 @@ class Sentence(object): class LDADoc(object): """The storage structure of LDA model's inference result. """ + def __init__(self): self._num_topics = None # Number of topics. self._num_accum = None # Number of accumulated sample rounds. @@ -116,8 +120,8 @@ class LDADoc(object): dense_dist = np.zeros(self._num_topics) if self.size() == 0: return dense_dist - dense_dist = (self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / (self.size() + - self._alpha * self._num_topics) + dense_dist = (self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / ( + self.size() + self._alpha * self._num_topics) return dense_dist def accumulate_topic_num(self): @@ -129,6 +133,7 @@ class SLDADoc(LDADoc): """Sentence LDA Document, inherited from LDADoc. Add add_sentence interface. """ + def __init__(self): super().__init__() self.__sentences = None diff --git a/modules/text/language_model/slda_news/model.py b/modules/text/language_model/slda_news/model.py index 23f030ea..f63ca92e 100644 --- a/modules/text/language_model/slda_news/model.py +++ b/modules/text/language_model/slda_news/model.py @@ -11,6 +11,7 @@ from slda_news.vocab import Vocab, WordCount class TopicModel(object): """Storage Structure of Topic model, including vocabulary and word topic count. """ + def __init__(self, model_dir, config): """ Args: diff --git a/modules/text/language_model/slda_news/tokenizer.py b/modules/text/language_model/slda_news/tokenizer.py index 1d9afabc..585aed88 100644 --- a/modules/text/language_model/slda_news/tokenizer.py +++ b/modules/text/language_model/slda_news/tokenizer.py @@ -7,6 +7,7 @@ from paddlehub.common.logger import logger class Tokenizer(object): """Base tokenizer class. """ + def __init__(self): pass @@ -20,6 +21,7 @@ class SimpleTokenizer(Tokenizer): Notes: This tokenizer can only recognize the words in the corresponding vocab file. """ + def __init__(self, vocab_path): super().__init__() self.__max_word_len = 0 diff --git a/modules/text/language_model/slda_news/util.py b/modules/text/language_model/slda_news/util.py index 8a241056..b1f01135 100644 --- a/modules/text/language_model/slda_news/util.py +++ b/modules/text/language_model/slda_news/util.py @@ -46,6 +46,7 @@ def rand_k(k): def timeit(f): """Return time cost of function f. """ + def timed(*args, **kwargs): start_time = time.time() result = f(*args, **kwargs) diff --git a/modules/text/language_model/slda_news/vose_alias.py b/modules/text/language_model/slda_news/vose_alias.py index 702dfa22..4eae586d 100644 --- a/modules/text/language_model/slda_news/vose_alias.py +++ b/modules/text/language_model/slda_news/vose_alias.py @@ -9,6 +9,7 @@ from slda_news.util import rand, rand_k class VoseAlias(object): """Vose's Alias Method. """ + def __init__(self): self.__alias = None self.__prob = None # np.array diff --git a/modules/text/language_model/slda_novel/document.py b/modules/text/language_model/slda_novel/document.py index 98eae505..4476230a 100644 --- a/modules/text/language_model/slda_novel/document.py +++ b/modules/text/language_model/slda_novel/document.py @@ -5,6 +5,7 @@ class Topic(object): """Basic data structure of topic, contains topic id and corresponding probability. """ + def __init__(self, tid, prob): self.tid = tid # topic id self.prob = prob # topic probability @@ -14,6 +15,7 @@ class Token(object): """Basic storage unit of LDA documents, contains word id and corresponding topic. """ + def __init__(self, topic, id): self.topic = topic self.id = id @@ -23,6 +25,7 @@ class Sentence(object): """Basic storage unit of SentenceLDA documents, contains word ids of the sentence and its corresponding topic id. """ + def __init__(self, topic, tokens): self.topic = topic self.tokens = tokens @@ -31,6 +34,7 @@ class Sentence(object): class LDADoc(object): """The storage structure of LDA model's inference result. """ + def __init__(self): self._num_topics = None # Number of topics. self._num_accum = None # Number of accumulated sample rounds. @@ -116,8 +120,8 @@ class LDADoc(object): dense_dist = np.zeros(self._num_topics) if self.size() == 0: return dense_dist - dense_dist = (self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / (self.size() + - self._alpha * self._num_topics) + dense_dist = (self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / ( + self.size() + self._alpha * self._num_topics) return dense_dist def accumulate_topic_num(self): @@ -129,6 +133,7 @@ class SLDADoc(LDADoc): """Sentence LDA Document, inherited from LDADoc. Add add_sentence interface. """ + def __init__(self): super().__init__() self.__sentences = None diff --git a/modules/text/language_model/slda_novel/model.py b/modules/text/language_model/slda_novel/model.py index 05dac700..cd4e6bab 100644 --- a/modules/text/language_model/slda_novel/model.py +++ b/modules/text/language_model/slda_novel/model.py @@ -11,6 +11,7 @@ from slda_novel.vocab import Vocab, WordCount class TopicModel(object): """Storage Structure of Topic model, including vocabulary and word topic count. """ + def __init__(self, model_dir, config): """ Args: diff --git a/modules/text/language_model/slda_novel/tokenizer.py b/modules/text/language_model/slda_novel/tokenizer.py index 1d9afabc..585aed88 100644 --- a/modules/text/language_model/slda_novel/tokenizer.py +++ b/modules/text/language_model/slda_novel/tokenizer.py @@ -7,6 +7,7 @@ from paddlehub.common.logger import logger class Tokenizer(object): """Base tokenizer class. """ + def __init__(self): pass @@ -20,6 +21,7 @@ class SimpleTokenizer(Tokenizer): Notes: This tokenizer can only recognize the words in the corresponding vocab file. """ + def __init__(self, vocab_path): super().__init__() self.__max_word_len = 0 diff --git a/modules/text/language_model/slda_novel/util.py b/modules/text/language_model/slda_novel/util.py index 6b24c714..b92e183a 100644 --- a/modules/text/language_model/slda_novel/util.py +++ b/modules/text/language_model/slda_novel/util.py @@ -46,6 +46,7 @@ def rand_k(k): def timeit(f): """Return time cost of function f. """ + def timed(*args, **kwargs): start_time = time.time() result = f(*args, **kwargs) diff --git a/modules/text/language_model/slda_novel/vose_alias.py b/modules/text/language_model/slda_novel/vose_alias.py index a3ddba61..1f424a04 100644 --- a/modules/text/language_model/slda_novel/vose_alias.py +++ b/modules/text/language_model/slda_novel/vose_alias.py @@ -9,6 +9,7 @@ from slda_novel.util import rand, rand_k class VoseAlias(object): """Vose's Alias Method. """ + def __init__(self): self.__alias = None self.__prob = None # np.array diff --git a/modules/text/language_model/slda_webpage/document.py b/modules/text/language_model/slda_webpage/document.py index 98eae505..4476230a 100644 --- a/modules/text/language_model/slda_webpage/document.py +++ b/modules/text/language_model/slda_webpage/document.py @@ -5,6 +5,7 @@ class Topic(object): """Basic data structure of topic, contains topic id and corresponding probability. """ + def __init__(self, tid, prob): self.tid = tid # topic id self.prob = prob # topic probability @@ -14,6 +15,7 @@ class Token(object): """Basic storage unit of LDA documents, contains word id and corresponding topic. """ + def __init__(self, topic, id): self.topic = topic self.id = id @@ -23,6 +25,7 @@ class Sentence(object): """Basic storage unit of SentenceLDA documents, contains word ids of the sentence and its corresponding topic id. """ + def __init__(self, topic, tokens): self.topic = topic self.tokens = tokens @@ -31,6 +34,7 @@ class Sentence(object): class LDADoc(object): """The storage structure of LDA model's inference result. """ + def __init__(self): self._num_topics = None # Number of topics. self._num_accum = None # Number of accumulated sample rounds. @@ -116,8 +120,8 @@ class LDADoc(object): dense_dist = np.zeros(self._num_topics) if self.size() == 0: return dense_dist - dense_dist = (self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / (self.size() + - self._alpha * self._num_topics) + dense_dist = (self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / ( + self.size() + self._alpha * self._num_topics) return dense_dist def accumulate_topic_num(self): @@ -129,6 +133,7 @@ class SLDADoc(LDADoc): """Sentence LDA Document, inherited from LDADoc. Add add_sentence interface. """ + def __init__(self): super().__init__() self.__sentences = None diff --git a/modules/text/language_model/slda_webpage/model.py b/modules/text/language_model/slda_webpage/model.py index 0b332ccb..e3e78020 100644 --- a/modules/text/language_model/slda_webpage/model.py +++ b/modules/text/language_model/slda_webpage/model.py @@ -11,6 +11,7 @@ from slda_webpage.vocab import Vocab, WordCount class TopicModel(object): """Storage Structure of Topic model, including vocabulary and word topic count. """ + def __init__(self, model_dir, config): """ Args: diff --git a/modules/text/language_model/slda_webpage/tokenizer.py b/modules/text/language_model/slda_webpage/tokenizer.py index 1d9afabc..585aed88 100644 --- a/modules/text/language_model/slda_webpage/tokenizer.py +++ b/modules/text/language_model/slda_webpage/tokenizer.py @@ -7,6 +7,7 @@ from paddlehub.common.logger import logger class Tokenizer(object): """Base tokenizer class. """ + def __init__(self): pass @@ -20,6 +21,7 @@ class SimpleTokenizer(Tokenizer): Notes: This tokenizer can only recognize the words in the corresponding vocab file. """ + def __init__(self, vocab_path): super().__init__() self.__max_word_len = 0 diff --git a/modules/text/language_model/slda_webpage/util.py b/modules/text/language_model/slda_webpage/util.py index e3181ead..6323a820 100644 --- a/modules/text/language_model/slda_webpage/util.py +++ b/modules/text/language_model/slda_webpage/util.py @@ -46,6 +46,7 @@ def rand_k(k): def timeit(f): """Return time cost of function f. """ + def timed(*args, **kwargs): start_time = time.time() result = f(*args, **kwargs) diff --git a/modules/text/language_model/slda_webpage/vose_alias.py b/modules/text/language_model/slda_webpage/vose_alias.py index bc08b165..1190c84d 100644 --- a/modules/text/language_model/slda_webpage/vose_alias.py +++ b/modules/text/language_model/slda_webpage/vose_alias.py @@ -9,6 +9,7 @@ from slda_webpage.util import rand, rand_k class VoseAlias(object): """Vose's Alias Method. """ + def __init__(self): self.__alias = None self.__prob = None # np.array diff --git a/modules/text/language_model/slda_weibo/document.py b/modules/text/language_model/slda_weibo/document.py index 98eae505..4476230a 100644 --- a/modules/text/language_model/slda_weibo/document.py +++ b/modules/text/language_model/slda_weibo/document.py @@ -5,6 +5,7 @@ class Topic(object): """Basic data structure of topic, contains topic id and corresponding probability. """ + def __init__(self, tid, prob): self.tid = tid # topic id self.prob = prob # topic probability @@ -14,6 +15,7 @@ class Token(object): """Basic storage unit of LDA documents, contains word id and corresponding topic. """ + def __init__(self, topic, id): self.topic = topic self.id = id @@ -23,6 +25,7 @@ class Sentence(object): """Basic storage unit of SentenceLDA documents, contains word ids of the sentence and its corresponding topic id. """ + def __init__(self, topic, tokens): self.topic = topic self.tokens = tokens @@ -31,6 +34,7 @@ class Sentence(object): class LDADoc(object): """The storage structure of LDA model's inference result. """ + def __init__(self): self._num_topics = None # Number of topics. self._num_accum = None # Number of accumulated sample rounds. @@ -116,8 +120,8 @@ class LDADoc(object): dense_dist = np.zeros(self._num_topics) if self.size() == 0: return dense_dist - dense_dist = (self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / (self.size() + - self._alpha * self._num_topics) + dense_dist = (self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / ( + self.size() + self._alpha * self._num_topics) return dense_dist def accumulate_topic_num(self): @@ -129,6 +133,7 @@ class SLDADoc(LDADoc): """Sentence LDA Document, inherited from LDADoc. Add add_sentence interface. """ + def __init__(self): super().__init__() self.__sentences = None diff --git a/modules/text/language_model/slda_weibo/model.py b/modules/text/language_model/slda_weibo/model.py index 645bd184..500f44b5 100644 --- a/modules/text/language_model/slda_weibo/model.py +++ b/modules/text/language_model/slda_weibo/model.py @@ -11,6 +11,7 @@ from slda_weibo.vocab import Vocab, WordCount class TopicModel(object): """Storage Structure of Topic model, including vocabulary and word topic count. """ + def __init__(self, model_dir, config): """ Args: diff --git a/modules/text/language_model/slda_weibo/tokenizer.py b/modules/text/language_model/slda_weibo/tokenizer.py index 1d9afabc..585aed88 100644 --- a/modules/text/language_model/slda_weibo/tokenizer.py +++ b/modules/text/language_model/slda_weibo/tokenizer.py @@ -7,6 +7,7 @@ from paddlehub.common.logger import logger class Tokenizer(object): """Base tokenizer class. """ + def __init__(self): pass @@ -20,6 +21,7 @@ class SimpleTokenizer(Tokenizer): Notes: This tokenizer can only recognize the words in the corresponding vocab file. """ + def __init__(self, vocab_path): super().__init__() self.__max_word_len = 0 diff --git a/modules/text/language_model/slda_weibo/util.py b/modules/text/language_model/slda_weibo/util.py index 04b2fc99..9c2a651e 100644 --- a/modules/text/language_model/slda_weibo/util.py +++ b/modules/text/language_model/slda_weibo/util.py @@ -46,6 +46,7 @@ def rand_k(k): def timeit(f): """Return time cost of function f. """ + def timed(*args, **kwargs): start_time = time.time() result = f(*args, **kwargs) diff --git a/modules/text/language_model/slda_weibo/vose_alias.py b/modules/text/language_model/slda_weibo/vose_alias.py index c8c13237..268f307a 100644 --- a/modules/text/language_model/slda_weibo/vose_alias.py +++ b/modules/text/language_model/slda_weibo/vose_alias.py @@ -9,6 +9,7 @@ from slda_weibo.util import rand, rand_k class VoseAlias(object): """Vose's Alias Method. """ + def __init__(self): self.__alias = None self.__prob = None # np.array diff --git a/modules/text/text_generation/ernie_gen/README.md b/modules/text/text_generation/ernie_gen/README.md new file mode 100644 index 00000000..fc3a08d3 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/README.md @@ -0,0 +1,190 @@ +## 概述 + +ERNIE-GEN 是面向生成任务的预训练-微调框架,首次在预训练阶段加入span-by-span 生成任务,让模型每次能够生成一个语义完整的片段。在预训练和微调中通过填充式生成机制和噪声感知机制来缓解曝光偏差问题。此外, ERNIE-GEN 采样多片段-多粒度目标文本采样策略, 增强源文本和目标文本的关联性,加强了编码器和解码器的交互。ernie_gen module是一个具备微调功能的module,可以快速完成特定场景module的制作。 +

+
+

+ +更多详情参考论文[ERNIE-GEN:An Enhanced Multi-Flow Pre-training and Fine-tuning Framework for Natural Language Generation](https://arxiv.org/abs/2001.11314)。 + +## API + +```python +def finetune( + train_path, + dev_path=None, + save_dir="ernie_gen_result", + init_ckpt_path=None, + use_gpu=True, + max_steps=500, + batch_size=8, + max_encode_len=50, + max_decode_len=50, + learning_rate=5e-5, + warmup_proportion=0.1, + weight_decay=0.1, + noise_prob=0, + label_smooth=0, + beam_width=5, + length_penalty=1.0, + log_interval=100, + save_interval=200, +): +``` + +微调API, + +**参数** + +* train_path(str): 训练集路径。训练集的格式应为:"序号\t输入文本\t标签",例如:"1\t床前明月光\t疑是地上霜" +* dev_path(str): 验证集路径。验证集的格式应为:"序号\t输入文本\t标签",例如:"1\t举头望明月\t低头思故乡" +* save_dir(str): 模型保存以及验证集预测输出路径。 +* init_ckpt_path(str): 模型初始化加载路径,可实现增量训练。 +* use_gpu(bool): 是否使用GPU。 +* max_steps(int): 最大训练步数。 +* batch_size(int): 训练时的batch大小。 +* max_encode_len(int): 最长编码长度。 +* max_decode_len(int): 最长解码长度。 +* learning_rate(float): 学习率大小。 +* warmup_proportion(float): 学习率warmup比例。 +* weight_decay(float): 权值衰减大小。 +* noise_prob(float): 噪声概率,详见ernie gen论文。 +* label_smooth(float): 标签平滑权重。 +* beam_width(int): 验证集预测时的beam大小。 +* length_penalty(float): 验证集预测时的长度惩罚权重。 +* log_interval(int): 训练时的日志打印间隔步数。 +* save_interval(int): 训练时的模型保存间隔部署。验证集将在模型保存完毕后进行预测。 + +**返回** + +* result(dict): 运行结果。包含2个键: +``` + last_save_path(str): 训练结束时的模型保存路径。 + last_ppl(float): 训练结束时的模型困惑度。 +``` + +```python +def export( + params_path, + module_name, + author, + max_encode_len=50, + max_decode_len=50, + version="1.0.0", + summary="", + author_email="", + export_path=".") +``` + +module导出API,通过此API可以一键将训练参数打包为hub module。 + +**参数** + +* params_path(str): 模型参数路径。 +* module_name(str): module名称,例如"ernie_gen_couplet"。 +* author(str): 作者名称。 +* max_encode_len(int): 最大编码长度。 +* max_decode_len(int): 最大解码长度。 +* version(str): 版本号。 +* summary(str): module的英文简介。 +* author_email(str): 作者的邮箱地址。 +* export_path(str): module的导出路径。 + +**代码示例** + +```python +import paddlehub as hub + +module = hub.Module(name="ernie_gen") + +result = module.finetune( + train_path='test_data/train.txt', + dev_path='test_data/dev.txt', + max_steps=300, + batch_size=2 +) + +module.export(params_path=result['last_save_path'], module_name="ernie_gen_test", author="test") +``` + +## 使用方式 + +模型转换完毕之后,通过`hub install $module_name`安装该模型,即可通过以下三种方式调用自制module: + +1. 命令行预测 + +```shell +$ hub run $module_name --input_text="输入文本" --use_gpu True --beam_width 5 +``` + +2. API预测 +```python +import paddlehub as hub + +module = hub.Module(name="$module_name") + +test_texts = ["输入文本1", "输入文本2"] +# generate包含3个参数,texts为输入文本列表,use_gpu指定是否使用gpu,beam_width指定beam search宽度。 +results = module.generate(texts=test_texts, use_gpu=True, beam_width=5) +for result in results: + print(result) +``` + +3. 服务端部署 + +服务端启动模型服务: + +```shell +$ hub serving start -m $module_name -p 8866 +``` + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +客户端通过以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json + +# 发送HTTP请求 + +data = {'texts':["输入文本1", "输入文本2"], + 'use_gpu':True, 'beam_width':5} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/$module_name" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +# 保存结果 +results = r.json()["results"] +for result in results: + print(result) +``` + +**NOTE**: 上述`$module_name`为export指定的module_name。 + +您也可以将$module_name文件夹打包为tar.gz压缩包并联系PaddleHub工作人员上传至PaddleHub模型仓库,这样更多的用户可以通过一键安装的方式使用您的模型。PaddleHub非常欢迎您的贡献,共同推动开源社区成长。 + +## 查看代码 + +https://github.com/PaddlePaddle/ERNIE/blob/repro/ernie-gen/ + +### 依赖 + +paddlepaddle >= 1.8.2 + +paddlehub >= 1.7.0 + + +## 更新历史 + +* 1.0.0 + + 初始发布 + +* 1.0.1 + + 修复模型导出bug + +* 1.0.2 + + 修复windows运行中的bug diff --git a/modules/text/text_generation/ernie_gen/__init__.py b/modules/text/text_generation/ernie_gen/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modules/text/text_generation/ernie_gen/decode.py b/modules/text/text_generation/ernie_gen/decode.py new file mode 100644 index 00000000..a9dd8609 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/decode.py @@ -0,0 +1,258 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +from collections import namedtuple + +import paddle.fluid as F +import paddle.fluid.layers as L +import paddle.fluid.dygraph as D +import numpy as np +from paddlehub.common.logger import logger + + +def gen_bias(encoder_inputs, decoder_inputs, step): + decoder_bsz, decoder_seqlen = decoder_inputs.shape[:2] + attn_bias = L.reshape(L.range(0, decoder_seqlen, 1, dtype='float32') + 1, [1, -1, 1]) + decoder_bias = L.cast((L.matmul(attn_bias, 1. / attn_bias, transpose_y=True) >= 1.), + 'float32') # [1, 1, decoderlen, decoderlen] + encoder_bias = L.unsqueeze(L.cast(L.ones_like(encoder_inputs), 'float32'), [1]) # [bsz, 1, encoderlen] + encoder_bias = L.expand(encoder_bias, [1, decoder_seqlen, 1]) # [bsz,decoderlen, encoderlen] + decoder_bias = L.expand(decoder_bias, [decoder_bsz, 1, 1]) # [bsz, decoderlen, decoderlen] + if step > 0: + bias = L.concat([encoder_bias, L.ones([decoder_bsz, decoder_seqlen, step], 'float32'), decoder_bias], -1) + else: + bias = L.concat([encoder_bias, decoder_bias], -1) + return bias + + +@D.no_grad +def greedy_search_infilling(model, + q_ids, + q_sids, + sos_id, + eos_id, + attn_id, + max_encode_len=640, + max_decode_len=100, + tgt_type_id=3): + model.eval() + _, logits, info = model(q_ids, q_sids) + gen_ids = L.argmax(logits, -1) + d_batch, d_seqlen = q_ids.shape + seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) + logger.debug(seqlen.numpy()) + logger.debug(d_seqlen) + has_stopped = np.zeros([d_batch], dtype=np.bool) + gen_seq_len = np.zeros([d_batch], dtype=np.int64) + output_ids = [] + + past_cache = info['caches'] + + cls_ids = L.ones([d_batch], dtype='int64') * sos_id + attn_ids = L.ones([d_batch], dtype='int64') * attn_id + ids = L.stack([cls_ids, attn_ids], -1) + for step in range(max_decode_len): + logger.debug('decode step %d' % step) + bias = gen_bias(q_ids, ids, step) + pos_ids = D.to_variable(np.tile(np.array([[step, step + 1]], dtype=np.int64), [d_batch, 1])) + pos_ids += seqlen + _, logits, info = model( + ids, L.ones_like(ids) * tgt_type_id, pos_ids=pos_ids, attn_bias=bias, past_cache=past_cache) + gen_ids = L.argmax(logits, -1) + + past_cached_k, past_cached_v = past_cache + cached_k, cached_v = info['caches'] + cached_k = [L.concat([pk, k[:, :1, :]], 1) for pk, k in zip(past_cached_k, cached_k)] # concat cached + cached_v = [L.concat([pv, v[:, :1, :]], 1) for pv, v in zip(past_cached_v, cached_v)] + past_cache = (cached_k, cached_v) + + gen_ids = gen_ids[:, 1] + ids = L.stack([gen_ids, attn_ids], 1) + + gen_ids = gen_ids.numpy() + has_stopped |= (gen_ids == eos_id).astype(np.bool) + gen_seq_len += (1 - has_stopped.astype(np.int64)) + output_ids.append(gen_ids.tolist()) + if has_stopped.all(): + break + output_ids = np.array(output_ids).transpose([1, 0]) + return output_ids + + +BeamSearchState = namedtuple('BeamSearchState', ['log_probs', 'lengths', 'finished']) +BeamSearchOutput = namedtuple('BeamSearchOutput', ['scores', 'predicted_ids', 'beam_parent_ids']) + + +def log_softmax(x): + e_x = np.exp(x - np.max(x)) + return np.log(e_x / e_x.sum()) + + +def mask_prob(p, onehot_eos, finished): + is_finished = L.cast(L.reshape(finished, [-1, 1]) != 0, 'float32') + p = is_finished * (1. - L.cast(onehot_eos, 'float32')) * -9999. + (1. - is_finished) * p + return p + + +def hyp_score(log_probs, length, length_penalty): + lp = L.pow((5. + L.cast(length, 'float32')) / 6., length_penalty) + return log_probs / lp + + +def beam_search_step(state, logits, eos_id, beam_width, is_first_step, length_penalty): + """logits.shape == [B*W, V]""" + _, vocab_size = logits.shape + + bsz, beam_width = state.log_probs.shape + onehot_eos = L.cast(F.one_hot(L.ones([1], 'int64') * eos_id, vocab_size), 'int64') # [1, V] + + probs = L.log(L.softmax(logits)) # [B*W, V] + probs = mask_prob(probs, onehot_eos, state.finished) # [B*W, V] + allprobs = L.reshape(state.log_probs, [-1, 1]) + probs # [B*W, V] + + not_finished = 1 - L.reshape(state.finished, [-1, 1]) # [B*W,1] + not_eos = 1 - onehot_eos + length_to_add = not_finished * not_eos # [B*W,V] + alllen = L.reshape(state.lengths, [-1, 1]) + length_to_add + + allprobs = L.reshape(allprobs, [-1, beam_width * vocab_size]) + alllen = L.reshape(alllen, [-1, beam_width * vocab_size]) + allscore = hyp_score(allprobs, alllen, length_penalty) + if is_first_step: + allscore = L.reshape(allscore, [bsz, beam_width, -1])[:, 0, :] # first step only consiter beam 0 + scores, idx = L.topk(allscore, k=beam_width) # [B, W] + next_beam_id = idx // vocab_size # [B, W] + next_word_id = idx % vocab_size + + gather_idx = L.concat([L.where(idx != -1)[:, :1], L.reshape(idx, [-1, 1])], 1) + next_probs = L.reshape(L.gather_nd(allprobs, gather_idx), idx.shape) + next_len = L.reshape(L.gather_nd(alllen, gather_idx), idx.shape) + + gather_idx = L.concat([L.where(next_beam_id != -1)[:, :1], L.reshape(next_beam_id, [-1, 1])], 1) + next_finished = L.reshape(L.gather_nd(state.finished, gather_idx), + state.finished.shape) # [gather new beam state according to new beam id] + + next_finished += L.cast(next_word_id == eos_id, 'int64') + next_finished = L.cast(next_finished > 0, 'int64') + + next_state = BeamSearchState(log_probs=next_probs, lengths=next_len, finished=next_finished) + output = BeamSearchOutput(scores=scores, predicted_ids=next_word_id, beam_parent_ids=next_beam_id) + + return output, next_state + + +@D.no_grad +def beam_search_infilling(model, + q_ids, + q_sids, + sos_id, + eos_id, + attn_id, + max_encode_len=640, + max_decode_len=100, + beam_width=5, + tgt_type_id=3, + length_penalty=1.0): + model.eval() + _, __, info = model(q_ids, q_sids) + d_batch, d_seqlen = q_ids.shape + + state = BeamSearchState( + log_probs=L.zeros([d_batch, beam_width], 'float32'), + lengths=L.zeros([d_batch, beam_width], 'int64'), + finished=L.zeros([d_batch, beam_width], 'int64')) + outputs = [] + + def reorder_(t, parent_id): + """reorder cache according to parent beam id""" + gather_idx = L.where(parent_id != -1)[:, 0] * beam_width + L.reshape(parent_id, [-1]) + t = L.gather(t, gather_idx) + return t + + def tile_(t, times): + _shapes = list(t.shape[1:]) + ret = L.reshape(L.expand(L.unsqueeze(t, [1]), [ + 1, + times, + ] + [ + 1, + ] * len(_shapes)), [ + -1, + ] + _shapes) + return ret + + cached_k, cached_v = info['caches'] + cached_k = [tile_(k, beam_width) for k in cached_k] + cached_v = [tile_(v, beam_width) for v in cached_v] + past_cache = (cached_k, cached_v) + + q_ids = tile_(q_ids, beam_width) + seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) + + cls_ids = L.ones([d_batch * beam_width], dtype='int64') * sos_id + attn_ids = L.ones([d_batch * beam_width], dtype='int64') * attn_id # SOS + ids = L.stack([cls_ids, attn_ids], -1) + for step in range(max_decode_len): + bias = gen_bias(q_ids, ids, step) + pos_ids = D.to_variable(np.tile(np.array([[step, step + 1]], dtype=np.int64), [d_batch * beam_width, 1])) + pos_ids += seqlen + _, logits, info = model( + ids, L.ones_like(ids) * tgt_type_id, pos_ids=pos_ids, attn_bias=bias, past_cache=past_cache) + + output, state = beam_search_step( + state, + logits[:, 1], + eos_id=eos_id, + beam_width=beam_width, + is_first_step=(step == 0), + length_penalty=length_penalty) + outputs.append(output) + + past_cached_k, past_cached_v = past_cache + cached_k, cached_v = info['caches'] + cached_k = [ + reorder_(L.concat([pk, k[:, :1, :]], 1), output.beam_parent_ids) for pk, k in zip(past_cached_k, cached_k) + ] # concat cached + cached_v = [ + reorder_(L.concat([pv, v[:, :1, :]], 1), output.beam_parent_ids) for pv, v in zip(past_cached_v, cached_v) + ] + past_cache = (cached_k, cached_v) + + pred_ids_flatten = L.reshape(output.predicted_ids, [d_batch * beam_width]) + ids = L.stack([pred_ids_flatten, attn_ids], 1) + + if state.finished.numpy().all(): + break + + final_ids = L.stack([o.predicted_ids for o in outputs], 0) + final_parent_ids = L.stack([o.beam_parent_ids for o in outputs], 0) + final_ids = L.gather_tree(final_ids, final_parent_ids)[:, :, 0] # pick best beam + final_ids = L.transpose(L.reshape(final_ids, [-1, d_batch * 1]), [1, 0]) + return final_ids + + +en_patten = re.compile(r'^[a-zA-Z0-9]*$') + + +def post_process(token): + if token.startswith('##'): + ret = token[2:] + else: + if en_patten.match(token): + ret = ' ' + token + else: + ret = token + return ret diff --git a/modules/text/text_generation/ernie_gen/module.py b/modules/text/text_generation/ernie_gen/module.py new file mode 100644 index 00000000..3a6ee5b7 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/module.py @@ -0,0 +1,437 @@ +# coding:utf-8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import sys +import shutil +from copy import deepcopy + +import numpy as np +import paddle.fluid as F +import paddle.fluid.layers as L +import paddle.fluid.dygraph as D +try: + from ernie.modeling_ernie import ErnieModelForGeneration + from ernie.tokenizing_ernie import ErnieTokenizer + from ernie.optimization import AdamW, LinearDecay +except: + raise ImportError( + "The module requires additional dependencies: ernie. You can install ernie via 'pip install paddle-ernie'") +import paddlehub as hub +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo + +from .decode import beam_search_infilling, post_process +import ernie_gen.propeller.paddle as propeller + + +@moduleinfo( + name="ernie_gen", + version="1.0.2", + summary="ERNIE-GEN is a multi-flow language generation framework for both pre-training and fine-tuning.", + author="baidu", + author_email="", + type="nlp/text_generation", +) +class ErnieGen(hub.Module): + def _initialize(self): + """ + initialize with the necessary elements + """ + self.tokenizer = ErnieTokenizer.from_pretrained("ernie-1.0", mask_token=None) + self.rev_dict = {v: k for k, v in self.tokenizer.vocab.items()} + self.rev_lookup = np.vectorize(lambda i: self.rev_dict[i]) + self._model = None + + @property + def model(self): + if not self._model: + self._model = ErnieModelForGeneration.from_pretrained("ernie-1.0") + return self._model + + def finetune( + self, + train_path, + dev_path=None, + save_dir="ernie_gen_result", + init_ckpt_path=None, + use_gpu=True, + max_steps=500, + batch_size=8, + max_encode_len=50, + max_decode_len=50, + learning_rate=5e-5, + warmup_proportion=0.1, + weight_decay=0.1, + noise_prob=0, + label_smooth=0, + beam_width=5, + length_penalty=1.0, + log_interval=100, + save_interval=200, + ): + """ + finetune with the specified dataset. + + Args: + train_path(str): the train dataset path. + dev_path(str): the dev dataset path. + save_dir(str): the model params and dev dataset predict result save path. + init_ckpt_path(str): incremental training load path. + use_gpu(bool): use gpu or not. + max_steps(int): max training steps. + batch_size(int): the batch size. + max_encode_len(int): the max encode length. + max_decode_len(int): the max decode length. + learning_rate(float): the learning rate. + warmup_proportion(float): the warmup proportion. + weight_decay(float): the weight decay magnitude. + noise_prob(float): the nosie probability. see the ernie gen paper for details. + label_smooth(float): the label smooth magnitude. + beam_width(int): the beam size during evaluating the dev dataset. + length_penalty(float): the length penalty during evaluating the dev dataset. + log_interval(int): the log interval. + save_interval(int): the save interval. dev set will be evaluated after saving. + + Return: + result(dict): A Dictionary of shape:: + { + last_save_path(str): last model save path. + last_ppl(float): last model ppl. + } + """ + self.max_encode_len = max_encode_len + self.max_decode_len = max_decode_len + self.noise_prob = noise_prob + + place = F.CUDAPlace(0) if use_gpu else F.CPUPlace() + + with F.dygraph.guard(place): + if init_ckpt_path is not None: + logger.info('loading checkpoint from %s' % init_ckpt_path) + sd, _ = D.load_dygraph(init_ckpt_path) + self.model.set_dict(sd) + + feature_column = propeller.data.FeatureColumns([ + propeller.data.LabelColumn('id'), + propeller.data.TextColumn( + 'src', + unk_id=self.tokenizer.unk_id, + vocab_dict=self.tokenizer.vocab, + tokenizer=self.tokenizer.tokenize), + propeller.data.TextColumn( + 'tgt', + unk_id=self.tokenizer.unk_id, + vocab_dict=self.tokenizer.vocab, + tokenizer=self.tokenizer.tokenize), + ]) + + train_ds = feature_column.build_dataset('train', data_file=train_path, shuffle=False, + repeat=True, use_gz=False)\ + .map(self._map_fn).shuffle(10000).padded_batch(batch_size).map(self._after_padding) + train_ds.data_shapes = [[None, None]] * 7 + [[None, None, None]] * 3 + [[None]] + train_ds.data_types = ['int64'] * 11 + + if dev_path: + dev_ds = feature_column.build_dataset('dev', data_file=dev_path, shuffle=False, + repeat=False, use_gz=False) \ + .map(self._map_fn) \ + .padded_batch(1) \ + .map(self._after_padding) + dev_ds.data_shapes = [[None, None]] * 7 + [[None, None, None]] * 3 + [[None]] + dev_ds.data_types = ['int64'] * 11 + + vocab_size, _ = self.model.word_emb.weight.shape + g_clip = F.clip.GradientClipByGlobalNorm(1.0) + opt = AdamW( + learning_rate=LinearDecay(learning_rate, int(warmup_proportion * max_steps), max_steps), + parameter_list=self.model.parameters(), + weight_decay=weight_decay, + grad_clip=g_clip) + + loss = None + + save_path = None + ppl = None + + if save_dir and not os.path.exists(save_dir): + os.makedirs(save_dir) + for step, data in enumerate(train_ds.start(place)): + (example_id, src_ids, src_sids, src_pids, tgt_ids, tgt_sids, tgt_pids, attn_ids, mask_src_2_src, + mask_tgt_2_srctgt, mask_attn_2_srctgtattn, tgt_labels) = data + + _, __, info = self.model( + src_ids, sent_ids=src_sids, pos_ids=src_pids, attn_bias=mask_src_2_src, encode_only=True) + cached_k, cached_v = info['caches'] + _, __, info = self.model( + tgt_ids, + sent_ids=tgt_sids, + pos_ids=tgt_pids, + attn_bias=mask_tgt_2_srctgt, + past_cache=(cached_k, cached_v), + encode_only=True) + cached_k2, cached_v2 = info['caches'] + past_cache_k = [L.concat([k, k2], 1) for k, k2 in zip(cached_k, cached_k2)] + past_cache_v = [L.concat([v, v2], 1) for v, v2 in zip(cached_v, cached_v2)] + if label_smooth > 0.: + tgt_labels = L.label_smooth(F.one_hot(tgt_labels, vocab_size), epsilon=label_smooth) + loss, _, __ = self.model( + attn_ids, + sent_ids=tgt_sids, + pos_ids=tgt_pids, + attn_bias=mask_attn_2_srctgtattn, + past_cache=(past_cache_k, past_cache_v), + tgt_labels=tgt_labels, + tgt_pos=L.where(attn_ids == self.tokenizer.vocab['[MASK]'])) + + loss.backward() + opt.minimize(loss) + self.model.clear_gradients() + + if step % log_interval == 0: + loss_np = loss.numpy() + ppl = np.exp(loss_np) + logger.info('[step %d / %d]train loss %.5f, ppl %.5f, elr %.3e' % (step, max_steps, loss_np, ppl, + opt.current_step_lr())) + if save_dir and step % save_interval == 0 and step > 0: + loss_np = loss.numpy() + ppl = np.exp(loss_np) + save_name = "step_%s_ppl_%.5f" % (step, ppl) + save_path = os.path.join(save_dir, save_name) + logger.info("save the model in %s" % save_path) + F.save_dygraph(self.model.state_dict(), save_path) + + if dev_path: + logger.info('evaluating...') + res = self._evaluate(dev_ds, place, beam_width, length_penalty) + output_path = os.path.join(save_dir, "step_%s_ppl_%.5f.txt" % (step, ppl)) + logger.info('save the predict result in %s' % output_path) + with open(output_path, 'w') as fout: + fout.write(('\n'.join(res))) + + if step > max_steps: + break + + if loss: + loss_np = loss.numpy() + ppl = np.exp(loss_np) + logger.info( + '[final step %d]train loss %.5f, ppl %.5f, elr %.3e' % (step, loss_np, ppl, opt.current_step_lr())) + if save_dir: + save_name = "step_%s_ppl_%.5f" % (step, ppl) + save_path = os.path.join(save_dir, save_name) + logger.info("save the model in %s" % save_path) + F.save_dygraph(self.model.state_dict(), save_path) + + if dev_path: + logger.info('evaluating...') + res = self._evaluate(dev_ds, place, beam_width, length_penalty) + output_path = os.path.join(save_dir, "step_%s_ppl_%.5f.txt" % (step, ppl)) + logger.info('save the predict result in %s' % output_path) + with open(output_path, 'w') as fout: + fout.write(('\n'.join(res))) + + result = { + "last_save_path": "%s.pdparams" % save_path, + "last_ppl": ppl[0], + } + + return result + + def export(self, + params_path, + module_name, + author, + max_encode_len=50, + max_decode_len=50, + version="1.0.0", + summary="", + author_email="", + export_path="."): + """ + export the model saved in the params_path to a hub module. + + Args: + params_path(str): the model params save path. + module_name(str): the module name. + author(str): the author name. + max_encode_len(int): the max encode length. + max_decode_len(int): the max decode length. + version(str): the version information. + summary(str): the module brief introduction. + author_email(str): the author email address. + export_path(str): the module export path. + """ + if not os.path.exists(params_path): + raise FileNotFoundError("The path %s does not exist." % params_path) + export_module_path = os.path.join(export_path, module_name) + if not os.path.exists(export_module_path): + os.makedirs(export_module_path) + logger.info("Begin export the model save in %s ..." % params_path) + + assets_path = os.path.join(self.directory, "template", "assets") + model_path = os.path.join(self.directory, "template", "model") + init_path = os.path.join(self.directory, "template", "__init__.py") + module_temp_path = os.path.join(self.directory, "template", "module.temp") + + export_assets_path = os.path.join(export_module_path, "assets") + export_params_path = os.path.join(export_module_path, "assets", "ernie_gen.pdparams") + export_init_path = os.path.join(export_module_path, "__init__.py") + export_model_path = os.path.join(export_module_path, "model") + + shutil.copyfile(init_path, export_init_path) + shutil.copytree(assets_path, export_assets_path) + shutil.copyfile(params_path, export_params_path) + shutil.copytree(model_path, export_model_path) + + module_path = os.path.join(export_module_path, "module.py") + with open(module_temp_path, encoding="utf8") as ftemp, open(module_path, "w") as fmodule: + content = ftemp.read().replace(r"{module_name}", module_name).replace(r"{author}", author).replace( + r"{version}", version).replace(r"{summary}", summary).replace(r"{author_email}", author_email).replace( + r"{max_encode_len}", str(max_encode_len)).replace(r"{max_decode_len}", str(max_decode_len)) + fmodule.write(content) + + logger.info("The module has exported to %s" % os.path.abspath(export_module_path)) + + def _evaluate(self, datasets, place, beam_width, length_penalty): + self.model.eval() + printables = [] + for step, data in enumerate(datasets.start(place)): + (example_id, src_ids, src_sids, src_pids, _, _, _, _, _, _, _, _) = data # never use target when infer + output_ids = beam_search_infilling( + self.model, + src_ids, + src_sids, + eos_id=self.tokenizer.sep_id, + sos_id=self.tokenizer.cls_id, + attn_id=self.tokenizer.vocab["[MASK]"], + max_decode_len=self.max_decode_len, + max_encode_len=self.max_encode_len, + beam_width=beam_width, + length_penalty=length_penalty, + tgt_type_id=1, + ) + output_str = self.rev_lookup(output_ids.numpy()) + for eid, ostr in zip(example_id.numpy().tolist(), output_str.tolist()): + if '[SEP]' in ostr: + ostr = ostr[:ostr.index('[SEP]')] + ostr = ''.join(map(post_process, ostr)) + printables.append('%d\t%s' % (eid, ostr)) + self.model.train() + return printables + + def _map_fn(self, example_id, src_ids, tgt_ids): + src_ids = src_ids[:self.max_encode_len] + tgt_ids = tgt_ids[:self.max_decode_len] + src_ids, src_sids = self.tokenizer.build_for_ernie(src_ids) + src_pids = np.arange(len(src_ids), dtype=np.int64) + + tgt_ids, tgt_sids = self.tokenizer.build_for_ernie(tgt_ids) + tgt_pids = np.arange(len(tgt_ids), dtype=np.int64) + len(src_ids) # continues position + tgt_sids = np.ones_like(tgt_sids) + + attn_ids = np.ones_like(tgt_ids) * self.tokenizer.vocab['[MASK]'] + if self.noise_prob > 0.: + tgt_labels = deepcopy(tgt_ids) + tgt_ids = self._make_some_noise(tgt_ids, self.noise_prob) #corrupted + else: + tgt_labels = tgt_ids + + return (example_id, src_ids, src_pids, src_sids, tgt_ids, tgt_pids, tgt_sids, attn_ids, tgt_labels) + + def _make_some_noise(self, ids, noise_prob): + noise_ids = np.random.randint(1, len(self.tokenizer.vocab), size=ids.shape) + pos, = np.where(np.ones_like(ids)) + np.random.shuffle(pos) + pos = pos[:int(noise_prob * len(pos))] + ids[pos, ] = noise_ids[pos, ] + return ids + + def _after_padding(self, example_id, src_ids, src_pids, src_sids, tgt_ids, tgt_pids, tgt_sids, attn_ids, + tgt_labels): + ''' + attention mask: + *** src, tgt, attn + src 00, 01, 11 + tgt 10, 11, 12 + attn 20, 21, 22 + *** s1, s2 | t1 t2 t3| attn1 attn2 attn3 + s1 1, 1 | 0, 0, 0,| 0, 0, 0, + s2 1, 1 | 0, 0, 0,| 0, 0, 0, + - + t1 1, 1, | 1, 0, 0,| 0, 0, 0, + t2 1, 1, | 1, 1, 0,| 0, 0, 0, + t3 1, 1, | 1, 1, 1,| 0, 0, 0, + - + attn1 1, 1, | 0, 0, 0,| 1, 0, 0, + attn2 1, 1, | 1, 0, 0,| 0, 1, 0, + attn3 1, 1, | 1, 1, 0,| 0, 0, 1, + for details, see Fig3. https://arxiv.org/abs/2001.11314 + ''' + + src_len = src_ids.shape[1] + tgt_len = tgt_ids.shape[1] + mask_00 = self._gen_mask(src_ids, 'bidi', query_len=src_len) + + mask_10 = self._gen_mask(src_ids, 'bidi', query_len=tgt_len) + mask_11 = self._gen_mask(tgt_ids, 'causal', query_len=tgt_len) + + mask_20 = self._gen_mask(src_ids, 'bidi', query_len=tgt_len) + mask_21 = self._gen_mask(tgt_ids, 'causal_without_diag', query_len=tgt_len) + mask_22 = self._gen_mask(attn_ids, 'diag', query_len=tgt_len) + ''' + mask = np.concatenate([ + np.concatenate([mask_00, mask_01, mask_02], 2), + np.concatenate([mask_10, mask_11, mask_12], 2), + np.concatenate([mask_20, mask_21, mask_22], 2), + ], 1) + ids = np.concatenate([src_ids, tgt_ids, attn_ids], 1) + pids = np.concatenate([src_pids, tgt_pids, tgt_pids], 1) + sids = np.concatenate([src_sids, tgt_sids, tgt_sids], 1) + ''' + + mask_src_2_src = mask_00 + mask_tgt_2_srctgt = np.concatenate([mask_10, mask_11], 2) + mask_attn_2_srctgtattn = np.concatenate([mask_20, mask_21, mask_22], 2) + + tgt_labels = tgt_labels[np.where(tgt_labels != 0)] + return (example_id, src_ids, src_sids, src_pids, tgt_ids, tgt_sids, tgt_pids, attn_ids, mask_src_2_src, + mask_tgt_2_srctgt, mask_attn_2_srctgtattn, tgt_labels) + + def _gen_mask(self, batch_ids, mask_type='bidi', query_len=None, pad_value=0): + if query_len is None: + query_len = batch_ids.shape[1] + if mask_type != 'empty': + mask = (batch_ids != pad_value).astype(np.float32) + mask = np.tile(np.expand_dims(mask, 1), [1, query_len, 1]) + if mask_type == 'causal': + assert query_len == batch_ids.shape[1] + mask = np.tril(mask) + elif mask_type == 'causal_without_diag': + assert query_len == batch_ids.shape[1] + mask = np.tril(mask, -1) + elif mask_type == 'diag': + assert query_len == batch_ids.shape[1] + mask = np.stack([np.diag(np.diag(m)) for m in mask], 0) + else: + mask = np.zeros_like(batch_ids).astype(np.float32) + mask = np.tile(np.expand_dims(mask, 1), [1, query_len, 1]) + return mask + + +if __name__ == "__main__": + module = ErnieGen() + result = module.finetune( + train_path='test_data/train.txt', dev_path='test_data/dev.txt', max_steps=300, batch_size=2) + module.export(params_path=result['last_save_path'], module_name="ernie_gen_test", author="test") diff --git a/modules/text/text_generation/ernie_gen/propeller/__init__.py b/modules/text/text_generation/ernie_gen/propeller/__init__.py new file mode 100644 index 00000000..ffe40876 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/__init__.py @@ -0,0 +1,44 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Propeller""" +from __future__ import print_function +from __future__ import absolute_import +from __future__ import unicode_literals + +import os +import sys +import logging +import six +from time import time + +__version__ = '0.2' + +log = logging.getLogger(__name__) +stream_hdl = logging.StreamHandler(stream=sys.stderr) +formatter = logging.Formatter(fmt='[%(levelname)s] %(asctime)s [%(filename)12s:%(lineno)5d]:\t%(message)s') + +try: + from colorlog import ColoredFormatter + fancy_formatter = ColoredFormatter( + fmt='%(log_color)s[%(levelname)s] %(asctime)s [%(filename)12s:%(lineno)5d]:\t%(message)s') + stream_hdl.setFormatter(fancy_formatter) +except ImportError: + stream_hdl.setFormatter(formatter) + +log.setLevel(logging.INFO) +log.addHandler(stream_hdl) +log.propagate = False + +from ernie_gen.propeller.types import * +from ernie_gen.propeller.util import ArgumentParser, parse_hparam, parse_runconfig, parse_file diff --git a/modules/text/text_generation/ernie_gen/propeller/data/__init__.py b/modules/text/text_generation/ernie_gen/propeller/data/__init__.py new file mode 100644 index 00000000..31701fc0 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/data/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +doc +""" diff --git a/modules/text/text_generation/ernie_gen/propeller/data/functional.py b/modules/text/text_generation/ernie_gen/propeller/data/functional.py new file mode 100644 index 00000000..f4ab9644 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/data/functional.py @@ -0,0 +1,467 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Basic Dataset API""" +from __future__ import print_function +from __future__ import absolute_import +from __future__ import unicode_literals + +import sys +import logging +import os +import itertools +import random +import inspect +import multiprocessing +from contextlib import contextmanager +import gzip +import struct +import functools + +import six +from six.moves import zip, map, filter +import numpy as np + +from ernie_gen.propeller.util import map_structure + +log = logging.getLogger(__name__) + +__all__ = ['Dataset'] + + +@contextmanager +def _open_file(filename, format=None): + if format is None: + fd = open(filename, 'rb') + elif format == 'GZIP': + fd = gzip.open(filename, 'rb') + else: + raise ValueError('unkwon file format %s' % format) + yield fd + fd.close() + + +def _open_record(filename): + def _gen(): + with _open_file(filename, format='GZIP') as f: + while True: + data = f.read(struct.calcsize('i')) + if not len(data): + raise StopIteration + l, = struct.unpack('i', data) + data = f.read(l) + yield data + + return _gen + + +def _shuffle_func(dataset, buffer_size): + def _gen(): + buf = [] + iterable = dataset() + try: + while len(buf) < buffer_size: + buf.append(next(iterable)) + while 1: + i = random.randint(0, buffer_size - 1) + n = next(iterable) + yield buf[i] + buf[i] = n + except StopIteration: + if len(buf): + random.shuffle(buf) + for i in buf: + yield i + + return _gen + + +def _interleave_func(iterable, map_fn, cycle_length, block_length): + def _gen(): + ls = itertools.tee(iterable(), cycle_length) + buf = [] + for i, j in enumerate(ls): + j = itertools.islice(j, i, None, cycle_length) + j = map(map_fn, j) + j = (jjj for jj in j for jjj in jj) #flatten + buf.append(j) + + for tup in six.moves.zip_longest(*buf): + for ii in (i for i in tup if i is not None): + yield ii + + return _gen + + +def _repeat_func(dataset, n): + def _gen(): + iterable = dataset() + if n >= 0: + ret = itertools.chain(*itertools.tee(iterable, n)) + else: + ret = itertools.cycle(iterable) + + for i in ret: + yield i + + return _gen + + +def _filter_func(dataset, fn): + def _gen(): + for i in dataset(): + if isinstance(i, tuple) or isinstance(i, list): + if fn(*i) is True: + yield i + else: + if fn(i) is True: + yield i + + return _gen + + +def _map_func(dataset, fn): + def _gen(): + for i in dataset(): + if isinstance(i, tuple) or isinstance(i, list): + yield fn(*i) + else: + yield fn(i) + + return _gen + + +def _shard_func(dataset, num_shards, index): + def _gen(): + iterable = dataset() + ret = itertools.islice(iterable, index, None, num_shards) + for i in ret: + yield i + + return _gen + + +def _take_func(dataset, count): + def _gen(): + iterable = dataset() + ret = itertools.islice(iterable, count) + for i in ret: + yield i + + return _gen + + +def _chain_func(dataset, dataset2): + def _gen(): + iterable = dataset() + iterable2 = dataset2() + ret = itertools.chain(iterable, iterable2) + for i in ret: + yield i + + return _gen + + +def _buffered_func(dataset, size): + """ + Creates a buffered data reader. + + The buffered data reader will read and save data entries into a + buffer. Reading from the buffered data reader will proceed as long + as the buffer is not empty. + + :param reader: the data reader to read from. + :type reader: callable + :param size: max buffer size. + :type size: int + + :returns: the buffered data reader. + """ + + class _EndSignal(object): + pass + + end = _EndSignal() + + def _read_worker(r, q): + for d in r: + q.put(d) + q.put(end) + + def _data_reader(): + r = dataset() + q = multiprocessing.Queue(maxsize=size) + t = multiprocessing.Process( + target=_read_worker, args=( + r, + q, + )) + t.daemon = True + t.start() + e = q.get() + while e != end: + yield e + e = q.get() + + return _data_reader + + +def _batch_func(dataset, batch_size): + def _gen(): + iterable = dataset() + while True: + buf = list(itertools.islice(iterable, batch_size)) + if not len(buf): + raise StopIteration + buf = list(zip(*buf)) # transpose + buf = [np.stack(b) for b in buf] + yield buf + + return _gen + + +def _padded_batch_func(dataset, batch_size, pad_value=0, max_seqlen=None): + if not isinstance(batch_size, int): + raise ValueError('unknown batch_size: %s' % repr(batch_size)) + + def _gen(): + iterable = dataset() + pad_value_t = pad_value + while True: + buf = list(itertools.islice(iterable, batch_size)) + if not len(buf): + raise StopIteration + buf = list(zip(*buf)) # transpose + if type(pad_value_t) not in [list, tuple]: + pad_value_t = [pad_value_t] * len(buf) + padded = [] + assert len(buf) == len(pad_value_t), 'pad_value [%d] != element size[%d]' % (len(pad_value_t), len(buf)) + for e, pv in zip(buf, pad_value_t): + elem = e[0] + if (not np.isscalar(elem)) and elem.shape != (): + max_len = max(map(len, e)) if max_seqlen is None else max_seqlen + + def _fn(i): + if max_len >= len(i): + return np.pad(i, [0, max_len - len(i)], 'constant', constant_values=pv) + else: + return i[:max_len] + + e = map(_fn, e) + padded.append(np.stack(list(e))) + yield padded + + return _gen + + +class Dataset(object): + """Python Wrapper for PyReader""" + + @classmethod + def from_generator_func(cls, _gen, data_shapes=None, data_types=None): + """doc""" + if not inspect.isgeneratorfunction(_gen): + raise ValueError('expect generator function, got %s' % repr(_gen)) + + def _wrapper(): #compat to py3.7 + try: + for item in _gen(): + yield item + except RuntimeError as e: + if str(e) != 'generator raised StopIteration': + raise e + + ret = cls() + ret.generator = _wrapper + ret.data_shapes = data_shapes + ret.data_types = data_types + return ret + + @classmethod + def from_file(cls, filename, format=None): + """doc""" + if os.path.getsize(filename) == 0: + raise RuntimeError('%s is empty' % filename) + + def _gen(): + with _open_file(filename, format) as f: + for line in f: + yield line + + ret = cls() + ret.generator = _gen + ret.data_shapes = [] + ret.data_types = str + return ret + + @classmethod + def from_record_file(cls, filename): + """doc""" + if os.path.getsize(filename) == 0: + raise RuntimeError('%s is empty' % filename) + _gen = _open_record(filename) + ret = cls() + ret.generator = _gen + ret.data_shapes = [] + ret.data_types = str + return ret + + @classmethod + def from_list(cls, ls): + """doc""" + if not isinstance(ls, list): + raise ValueError('expect list, got %s' % repr(ls)) + + def _gen(): + for i in ls: + yield i + + ret = cls() + ret.generator = _gen + ret.data_shapes = [] + ret.data_types = str + return ret + + def __init__(self): + self.name = None + self._data_shapes = None + self._data_types = None + self.generator = None + self.pyreader = None + + def __repr__(self): + return 'Dataset: name: %s, data_shapes %s, data_types %s' % (self.name, self._data_shapes, self._data_types) + + def __eq__(self, other): + return self.name == other.name and \ + self._data_shapes == other._data_shapes and \ + self._data_types == other._data_types + + def __iter__(self): + return self.generator() + + #def __call__(self): + # return self.generator() + + def _infer_shapes_and_types(self): + if self.generator is not None and self.name is not None: + log.info('Try to infer data shapes & types from generator') + first_value = next(self.generator()) + shapes, types = [], [] + for v in first_value: + if not isinstance(v, np.ndarray): + raise ValueError('dataset generator should use numpy elements, got %s' % first_value) + shapes.append(v.shape) + types.append(v.dtype.name) + self._data_shapes = shapes + self._data_types = types + log.info('Dataset `%s` has data_shapes: %s data_types: %s' % (self.name, repr(shapes), repr(types))) + else: + raise ValueError('Try to infer data shapes or types from incomplete Dataset') + + @property + def data_shapes(self): + """doc""" + if self._data_shapes is None: + self._infer_shapes_and_types() + return self._data_shapes + else: + return self._data_shapes + + @data_shapes.setter + def data_shapes(self, val): + """doc""" + self._data_shapes = val + + @property + def data_types(self): + """doc""" + if self._data_types is None: + self._infer_shapes_and_types() + return self._data_types + else: + return self._data_types + + @data_types.setter + def data_types(self, val): + """doc""" + self._data_types = val + + def apply(self, transform_func): + """apply transform func to datasets""" + #input_shapes = transform_func.input_shapes + #input_types = transform_func.input_types + #data_shapes = transform_func.data_shapes + #data_types = transform_func.data_types + #assert input_shapes == self._data_shapes + #assert input_types = self._data_types + ret_gen = transform_func(self.generator) + ret = type(self).from_generator_func(ret_gen) + if self.name is not None: + ret.name = self.name + #ret.data_shapes = data_shapes + #ret.data_types = data_types + return ret + + def shuffle(self, buffer_size): + """doc""" + func = functools.partial(_shuffle_func, buffer_size=buffer_size) + return self.apply(func) + + def repeat(self, n=-1): + """doc""" + func = functools.partial(_repeat_func, n=n) + return self.apply(func) + + def map(self, fn): + """doc""" + func = functools.partial(_map_func, fn=fn) + return self.apply(func) + + def filter(self, fn): + """doc""" + func = functools.partial(_filter_func, fn=fn) + return self.apply(func) + + def shard(self, num_shards, index): + """doc""" + func = functools.partial(_shard_func, num_shards=num_shards, index=index) + return self.apply(func) + + def interleave(self, map_fn, cycle_length, block_length): + """doc""" + func = functools.partial(_interleave_func, map_fn=map_fn, cycle_length=cycle_length, block_length=block_length) + return self.apply(func) + + def batch(self, batch_size): + func = functools.partial(_batch_func, batch_size=batch_size) + return self.apply(func) + + def padded_batch(self, batch_size, pad_value=0, max_seqlen=None): + """doc""" + func = functools.partial(_padded_batch_func, batch_size=batch_size, pad_value=pad_value, max_seqlen=max_seqlen) + return self.apply(func) + + def take(self, count=1): + """doc""" + func = functools.partial(_take_func, count=count) + return self.apply(func) + + def buffered(self, size=10): + """doc""" + func = functools.partial(_buffered_func, size=size) + return self.apply(func) + + def chain(self, other): + func = functools.partial(_chain_func, dataset2=other.generator) + return self.apply(func) diff --git a/modules/text/text_generation/ernie_gen/propeller/paddle/__init__.py b/modules/text/text_generation/ernie_gen/propeller/paddle/__init__.py new file mode 100644 index 00000000..c35000cd --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/paddle/__init__.py @@ -0,0 +1,51 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +doc +""" +from __future__ import print_function +from __future__ import absolute_import +from __future__ import unicode_literals + +import six +import logging + +log = logging.getLogger(__name__) + + +def enable_textone(): + try: + import textone + except ImportError: + log.fatal('enable textone failed: textone not found!') + raise + global textone_enabled + log.info('textone enabled') + from ernie_gen.propeller.paddle.train.monitored_executor import MonitoredExecutor, TextoneTrainer + if TextoneTrainer is None: + raise RuntimeError('enable textone failed: textone not found!') + MonitoredExecutor.saver_class = TextoneTrainer + + +from ernie_gen.propeller.types import * +from ernie_gen.propeller.util import ArgumentParser, parse_hparam, parse_runconfig, parse_file + +from ernie_gen.propeller.paddle import data +from ernie_gen.propeller.paddle import train +from ernie_gen.propeller.paddle.train import * + +import paddle +paddle_version = [int(i) for i in paddle.__version__.split('.')] +if paddle_version[1] < 7: + raise RuntimeError('propeller 0.2 requires paddle 1.7+, got %s' % paddle.__version__) diff --git a/modules/text/text_generation/ernie_gen/propeller/paddle/collection.py b/modules/text/text_generation/ernie_gen/propeller/paddle/collection.py new file mode 100644 index 00000000..8b85b37f --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/paddle/collection.py @@ -0,0 +1,61 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""global collections""" + +from __future__ import print_function +from __future__ import absolute_import +from __future__ import unicode_literals + +import sys + +_global_collection = None + + +class Key(object): + """predefine collection keys""" + SUMMARY_SCALAR = 1 + SUMMARY_HISTOGRAM = 2 + SKIP_OPTIMIZE = 3 + + +class Collections(object): + """global collections to record everything""" + + def __init__(self): + self.col = {} + + def __enter__(self): + global _global_collection + _global_collection = self + return self + + def __exit__(self, err_type, err_value, trace): + global _global_collection + _global_collection = None + + def add(self, key, val): + """doc""" + self.col.setdefault(key, []).append(val) + + def get(self, key): + """doc""" + return self.col.get(key, None) + + +def default_collection(): + """return global collection""" + global _global_collection + if _global_collection is None: + _global_collection = Collections() + return _global_collection diff --git a/modules/text/text_generation/ernie_gen/propeller/paddle/data/__init__.py b/modules/text/text_generation/ernie_gen/propeller/paddle/data/__init__.py new file mode 100644 index 00000000..615cdb76 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/paddle/data/__init__.py @@ -0,0 +1,22 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +doc +""" +from __future__ import print_function +from __future__ import absolute_import +from __future__ import unicode_literals + +from ernie_gen.propeller.paddle.data.functional import * +from ernie_gen.propeller.paddle.data.feature_column import * diff --git a/modules/text/text_generation/ernie_gen/propeller/paddle/data/example.proto b/modules/text/text_generation/ernie_gen/propeller/paddle/data/example.proto new file mode 100644 index 00000000..3c613917 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/paddle/data/example.proto @@ -0,0 +1,29 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Protocol messages for describing input data Examples for machine learning +// model training or inference. +syntax = "proto3"; + +import "ernie_gen.propeller/paddle/data/feature.proto"; +package ernie_gen.propeller; + +message Example { + Features features = 1; +}; + +message SequenceExample { + Features context = 1; + FeatureLists feature_lists = 2; +}; diff --git a/modules/text/text_generation/ernie_gen/propeller/paddle/data/example_pb2.py b/modules/text/text_generation/ernie_gen/propeller/paddle/data/example_pb2.py new file mode 100644 index 00000000..bd3cb4b1 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/paddle/data/example_pb2.py @@ -0,0 +1,148 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: propeller/paddle/data/example.proto + +import sys +_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode('latin1')) +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + +from ernie_gen.propeller.paddle.data import feature_pb2 as propeller_dot_paddle_dot_data_dot_feature__pb2 + +DESCRIPTOR = _descriptor.FileDescriptor( + name='propeller/paddle/data/example.proto', + package='propeller', + syntax='proto3', + serialized_options=None, + serialized_pb=_b( + '\n#propeller/paddle/data/example.proto\x12\tpropeller\x1a#propeller/paddle/data/feature.proto\"0\n\x07\x45xample\x12%\n\x08\x66\x65\x61tures\x18\x01 \x01(\x0b\x32\x13.propeller.Features\"g\n\x0fSequenceExample\x12$\n\x07\x63ontext\x18\x01 \x01(\x0b\x32\x13.propeller.Features\x12.\n\rfeature_lists\x18\x02 \x01(\x0b\x32\x17.propeller.FeatureListsb\x06proto3' + ), + dependencies=[ + propeller_dot_paddle_dot_data_dot_feature__pb2.DESCRIPTOR, + ]) + +_EXAMPLE = _descriptor.Descriptor( + name='Example', + full_name='propeller.Example', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='features', + full_name='propeller.Example.features', + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[], + serialized_start=87, + serialized_end=135, +) + +_SEQUENCEEXAMPLE = _descriptor.Descriptor( + name='SequenceExample', + full_name='propeller.SequenceExample', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='context', + full_name='propeller.SequenceExample.context', + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='feature_lists', + full_name='propeller.SequenceExample.feature_lists', + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[], + serialized_start=137, + serialized_end=240, +) + +_EXAMPLE.fields_by_name['features'].message_type = propeller_dot_paddle_dot_data_dot_feature__pb2._FEATURES +_SEQUENCEEXAMPLE.fields_by_name['context'].message_type = propeller_dot_paddle_dot_data_dot_feature__pb2._FEATURES +_SEQUENCEEXAMPLE.fields_by_name[ + 'feature_lists'].message_type = propeller_dot_paddle_dot_data_dot_feature__pb2._FEATURELISTS +DESCRIPTOR.message_types_by_name['Example'] = _EXAMPLE +DESCRIPTOR.message_types_by_name['SequenceExample'] = _SEQUENCEEXAMPLE +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +Example = _reflection.GeneratedProtocolMessageType( + 'Example', + (_message.Message, ), + dict( + DESCRIPTOR=_EXAMPLE, + __module__='propeller.paddle.data.example_pb2' + # @@protoc_insertion_point(class_scope:propeller.Example) + )) +_sym_db.RegisterMessage(Example) + +SequenceExample = _reflection.GeneratedProtocolMessageType( + 'SequenceExample', + (_message.Message, ), + dict( + DESCRIPTOR=_SEQUENCEEXAMPLE, + __module__='propeller.paddle.data.example_pb2' + # @@protoc_insertion_point(class_scope:propeller.SequenceExample) + )) +_sym_db.RegisterMessage(SequenceExample) + +# @@protoc_insertion_point(module_scope) diff --git a/modules/text/text_generation/ernie_gen/propeller/paddle/data/feature.proto b/modules/text/text_generation/ernie_gen/propeller/paddle/data/feature.proto new file mode 100644 index 00000000..aa0f2dbc --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/paddle/data/feature.proto @@ -0,0 +1,46 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; +package ernie_gen.propeller; + +message BytesList { + repeated bytes value = 1; +} +message FloatList { + repeated float value = 1 [packed = true]; +} +message Int64List { + repeated int64 value = 1 [packed = true]; +} + +message Feature { + oneof kind { + BytesList bytes_list = 1; + FloatList float_list = 2; + Int64List int64_list = 3; + } +}; + +message Features { + map feature = 1; +}; + +message FeatureList { + repeated Feature feature = 1; +}; + +message FeatureLists { + map feature_list = 1; +}; diff --git a/modules/text/text_generation/ernie_gen/propeller/paddle/data/feature_column.py b/modules/text/text_generation/ernie_gen/propeller/paddle/data/feature_column.py new file mode 100644 index 00000000..b81937f5 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/paddle/data/feature_column.py @@ -0,0 +1,436 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""FeatureColumns and many Column""" +from __future__ import print_function +from __future__ import absolute_import +from __future__ import unicode_literals + +import os +import sys +import struct +from six.moves import zip, map +import itertools +import gzip +from functools import partial +import six +import logging + +import numpy as np +from glob import glob +from ernie_gen.propeller.paddle.train import distribution + +from ernie_gen.propeller.data.functional import _interleave_func +from ernie_gen.propeller.paddle.data.functional import Dataset +from ernie_gen.propeller.paddle.data import example_pb2, feature_pb2 +import multiprocessing + +log = logging.getLogger(__name__) + +__all__ = ['FeatureColumns', 'TextColumn', 'TextIDColumn', 'LabelColumn', 'RawBytesColumn', 'basic_tokenizer', 'Column'] + + +def basic_tokenizer(sen): + """doc""" + seg = sen.split(b' ') + seg = filter(lambda i: i != b' ', seg) + return seg + + +class Column(object): + """doc""" + + def __init__(self, name): + """doc""" + pass + + def raw_to_proto(self, raw): + """doc""" + return feature_pb2.Feature() + + @property + def output_shapes(self): + """doc""" + pass + + @property + def output_types(self): + """doc""" + pass + + def proto_to_instance(self, proto): + """doc""" + raise NotImplementedError() + + def raw_to_instance(self, raw): + """doc""" + raise NotImplementedError() + + +class LabelColumn(Column): + """doc""" + + def __init__(self, name, vocab_dict=None, vocab_file=None): + """doc""" + self.name = name + self.vocab = None + if vocab_file: + self.vocab = {j.strip(): i for i, j in enumerate(open(vocab_file, 'rb').readlines())} + if vocab_dict: + self.vocab = vocab_dict + + @property + def output_shapes(self): + """doc""" + return [1] + + @property + def output_types(self): + """doc""" + return 'int64' + + def raw_to_proto(self, raw): + """doc""" + if self.vocab is None: + ids = [int(raw)] + else: + ids = [self.vocab[raw]] + fe = feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=ids)) + return fe + + def proto_to_instance(self, feature): + """doc""" + ret = np.array(feature.int64_list.value[0], dtype=np.int64) + return ret + + def raw_to_instance(self, raw): + """doc""" + if self.vocab is None: + ids = int(raw) + else: + ids = self.vocab[raw] + return np.array(ids, dtype=np.int64) + + +class TextColumn(Column): + """doc""" + + def __init__(self, name, unk_id, vocab_file=None, vocab_dict=None, tokenizer=basic_tokenizer): + self.name = name + self.tokenizer = tokenizer + self.unk_id = unk_id + if not (vocab_file or vocab_dict): + raise ValueError('at least specify vocab_file or vocab_dict') + if vocab_file: + self.vocab = {j.strip(): i for i, j in enumerate(open(vocab_file, 'rb').readlines())} + if vocab_dict: + self.vocab = vocab_dict + + @property + def output_shapes(self): + """doc""" + return [-1] + + @property + def output_types(self): + """doc""" + return 'int64' + + def raw_to_proto(self, raw): + """doc""" + ids = [s if isinstance(s, int) else self.vocab.get(s, self.unk_id) for s in self.tokenizer(raw)] + fe = feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=ids)) + return fe + + def proto_to_instance(self, feature): + """doc""" + ret = np.array(feature.int64_list.value, dtype=np.int64) + return ret + + def raw_to_instance(self, raw): + """doc""" + ids = [s if isinstance(s, int) else self.vocab.get(s, self.unk_id) for s in self.tokenizer(raw)] + return np.array(ids, dtype=np.int64) + + +class RawBytesColumn(Column): + def __init__(self, name): + self.name = name + + @property + def output_shapes(self): + """doc""" + return [-1] + + @property + def output_types(self): + """doc""" + return 'bytes' + + # def raw_to_proto(self, raw): + # """doc""" + # fe = feature_pb2.Feature(bytes_list=BytesList(value=[raw])) + # return fe + + def proto_to_instance(self, feature): + """doc""" + ret = feature.bytes_list.value[0] #np.array(feature.int64_list.value, dtype=np.int64) + return ret + + def raw_to_instance(self, raw): + """doc""" + return raw + + +class TextIDColumn(Column): + """doc""" + + def __init__(self, name): + """doc""" + self.name = name + + @property + def output_shapes(self): + """doc""" + return [-1] + + @property + def output_types(self): + """doc""" + return 'int64' + + def raw_to_proto(self, raw): + """doc""" + ids = [int(s) for s in raw.split(b' ')] + fe = feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=ids)) + return fe + + def proto_to_instance(self, feature): + """doc""" + ret = np.array(feature.int64_list.value, dtype=np.int64) + return ret + + def raw_to_instance(self, raw): + """doc""" + ret = np.array([int(i) for i in raw.split(b' ')], dtype=np.int64) + return ret + + +def _list_files(raw_dir): + return [os.path.join(raw_dir, p) for p in os.listdir(raw_dir)] + + +_columns = None + + +def _init_worker(col): + global _columns + _columns = col + + +def _worker_entrence(args): + args = (_columns, ) + args + return _make_gz(args) + + +class FeatureColumns(object): + """A Dataset Factory object""" + + def __init__(self, columns): + """doc""" + self._columns = columns + + def _make_gz_dataset(self, raw_dir, gz_dir): + assert raw_dir or gz_dir, 'data_dir not specified when using gz mode' + if raw_dir is not None: + assert os.path.exists(raw_dir), 'raw_dir not exists: %s' % raw_dir + raw_file = os.listdir(raw_dir) + if gz_dir is None: + gz_dir = '%s_gz' % raw_dir.rstrip('/') + + if not os.path.exists(gz_dir): + os.mkdir(gz_dir) + + if raw_dir is not None: + if len(raw_file) != 0: + log.debug('try making gz') + pool = multiprocessing.Pool(initializer=_init_worker, initargs=(self._columns, )) + args = [(os.path.join(raw_dir, f), os.path.join(gz_dir, f), b'\t') for f in raw_file] + pool.map(_worker_entrence, args) + pool.close() + pool.join() + else: + assert len( + os.listdir(gz_dir)) != 0, 'cant find gz file or raw-txt file at [%s] and [%s]' % (raw_dir, gz_dir) + return gz_dir + + def _read_gz_dataset(self, gz_files, shuffle=False, repeat=True, shard=False, **kwargs): + if len(gz_files) == 0: + raise ValueError('reading gz from empty file list: %s' % gz_files) + log.info('reading gz from %s' % '\n'.join(gz_files)) + dataset = Dataset.from_list(gz_files) + if repeat: + dataset = dataset.repeat() + + # if shard and distribution.status.mode == distribution.DistributionMode.NCCL: + # log.info('Apply dataset sharding in distribution env') + # train_ds = train_ds.shard(distribution.status.num_replica, + # distribution.status.replica_id) + + if shuffle: + dataset = dataset.shuffle(buffer_size=len(gz_files)) + fn = partial( + _interleave_func, + map_fn=lambda filename: Dataset.from_record_file(filename), + cycle_length=len(gz_files), + block_length=1) + dataset = dataset.apply(fn) + if shuffle: + dataset = dataset.shuffle(buffer_size=1000) + + def _parse_gz(record_str): # function that takes python_str as input + ex = example_pb2.Example() + ex.ParseFromString(record_str) + ret = [] + fea_dict = ex.features.feature + for c in self._columns: + ins = c.proto_to_instance(fea_dict[c.name]) + ret.append(ins) + return ret + + dataset = dataset.map(_parse_gz) + return dataset + + def _read_txt_dataset(self, data_files, shuffle=False, repeat=True, **kwargs): + log.info('reading raw files from %s' % '\n'.join(data_files)) + dataset = Dataset.from_list(data_files) + if repeat: + dataset = dataset.repeat() + if shuffle: + dataset = dataset.shuffle(buffer_size=len(data_files)) + + fn = partial( + _interleave_func, + map_fn=lambda filename: Dataset.from_file(filename), + cycle_length=len(data_files), + block_length=1) + dataset = dataset.apply(fn) + if shuffle: + dataset = dataset.shuffle(buffer_size=1000) + + def _parse_txt_file(record_str): # function that takes python_str as input + features = record_str.strip(b'\n').split(b'\t') + ret = [column.raw_to_instance(feature) for feature, column in zip(features, self._columns)] + return ret + + dataset = dataset.map(_parse_txt_file) + return dataset + + def _read_stdin_dataset(self, encoding='utf8', shuffle=False, **kwargs): + log.info('reading raw files stdin') + + def _gen(): + if six.PY3: + source = sys.stdin.buffer + else: + source = sys.stdin + while True: + line = source.readline() + if len(line) == 0: + break + yield line, + + dataset = Dataset.from_generator_func(_gen) + if shuffle: + dataset = dataset.shuffle(buffer_size=1000) + + def _parse_stdin(record_str): + """function that takes python_str as input""" + features = record_str.strip(b'\n').split(b'\t') + ret = [column.raw_to_instance(feature) for feature, column in zip(features, self._columns)] + return ret + + dataset = dataset.map(_parse_stdin) + return dataset + + def _prepare_dataset(self, + dataset, + map_func_before_batch=None, + map_func_after_batch=None, + shuffle_buffer_size=None, + batch_size=1, + pad_id=0, + prefetch=None, + **kwargs): + + if map_func_before_batch is not None: + dataset = dataset.map(map_func_before_batch) + if batch_size: + dataset = dataset.padded_batch(batch_size, pad_id) + if map_func_after_batch is not None: + dataset = dataset.map(map_func_after_batch) + return dataset + + def build_dataset(self, name, use_gz=True, data_dir=None, gz_dir=None, data_file=None, **kwargs): + """ + build `Dataset` from `data_dir` or `data_file` + if `use_gz`, will try to convert data_files to gz format and save to `gz_dir`, if `gz_dir` not given, will create one. + """ + if use_gz: + gz_dir = self._make_gz_dataset(data_dir, gz_dir) + gz_files = _list_files(gz_dir) if gz_dir is not None else gz_dir + ds = self._read_gz_dataset(gz_files, **kwargs) + else: + if data_dir is not None: + data_files = _list_files(data_dir) + elif data_file is not None: + data_files = [data_file] + else: + raise ValueError('data_dir or data_files not specified') + ds = self._read_txt_dataset(data_files, **kwargs) + ds.name = name + return ds + + def build_dataset_from_stdin(self, name, **kwargs): + """doc""" + ds = self._read_stdin_dataset(**kwargs) + ds.name = name + return ds + + +def _make_gz(args): + try: + columns, from_file, to_file, sep = args + if os.path.exists(to_file): + return + with open(from_file, 'rb') as fin, gzip.open(to_file, 'wb') as fout: + log.debug('making gz %s => %s' % (from_file, to_file)) + for i, line in enumerate(fin): + line = line.strip(b'\n').split(sep) + #if i % 10000 == 0: + # log.debug('making gz %s => %s [%d]' % (from_file, to_file, i)) + if len(line) != len(columns): + log.error('columns not match at %s, got %d, expect %d' % (from_file, len(line), len(columns))) + continue + features = {} + for l, c in zip(line, columns): + features[c.name] = c.raw_to_proto(l) + example = example_pb2.Example(features=feature_pb2.Features(feature=features)) + serialized = example.SerializeToString() + l = len(serialized) + data = struct.pack('i%ds' % l, l, serialized) + fout.write(data) + log.debug('done making gz %s => %s' % (from_file, to_file)) + except Exception as e: + log.exception(e) + raise e diff --git a/modules/text/text_generation/ernie_gen/propeller/paddle/data/feature_pb2.py b/modules/text/text_generation/ernie_gen/propeller/paddle/data/feature_pb2.py new file mode 100644 index 00000000..21a96379 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/paddle/data/feature_pb2.py @@ -0,0 +1,549 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: propeller/paddle/data/feature.proto + +import sys +_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode('latin1')) +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + +DESCRIPTOR = _descriptor.FileDescriptor( + name='propeller/paddle/data/feature.proto', + package='propeller', + syntax='proto3', + serialized_options=None, + serialized_pb=_b( + '\n#propeller/paddle/data/feature.proto\x12\tpropeller\"\x1a\n\tBytesList\x12\r\n\x05value\x18\x01 \x03(\x0c\"\x1e\n\tFloatList\x12\x11\n\x05value\x18\x01 \x03(\x02\x42\x02\x10\x01\"\x1e\n\tInt64List\x12\x11\n\x05value\x18\x01 \x03(\x03\x42\x02\x10\x01\"\x95\x01\n\x07\x46\x65\x61ture\x12*\n\nbytes_list\x18\x01 \x01(\x0b\x32\x14.propeller.BytesListH\x00\x12*\n\nfloat_list\x18\x02 \x01(\x0b\x32\x14.propeller.FloatListH\x00\x12*\n\nint64_list\x18\x03 \x01(\x0b\x32\x14.propeller.Int64ListH\x00\x42\x06\n\x04kind\"\x81\x01\n\x08\x46\x65\x61tures\x12\x31\n\x07\x66\x65\x61ture\x18\x01 \x03(\x0b\x32 .propeller.Features.FeatureEntry\x1a\x42\n\x0c\x46\x65\x61tureEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.propeller.Feature:\x02\x38\x01\"2\n\x0b\x46\x65\x61tureList\x12#\n\x07\x66\x65\x61ture\x18\x01 \x03(\x0b\x32\x12.propeller.Feature\"\x9a\x01\n\x0c\x46\x65\x61tureLists\x12>\n\x0c\x66\x65\x61ture_list\x18\x01 \x03(\x0b\x32(.propeller.FeatureLists.FeatureListEntry\x1aJ\n\x10\x46\x65\x61tureListEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12%\n\x05value\x18\x02 \x01(\x0b\x32\x16.propeller.FeatureList:\x02\x38\x01\x62\x06proto3' + )) + +_BYTESLIST = _descriptor.Descriptor( + name='BytesList', + full_name='propeller.BytesList', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='value', + full_name='propeller.BytesList.value', + index=0, + number=1, + type=12, + cpp_type=9, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[], + serialized_start=50, + serialized_end=76, +) + +_FLOATLIST = _descriptor.Descriptor( + name='FloatList', + full_name='propeller.FloatList', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='value', + full_name='propeller.FloatList.value', + index=0, + number=1, + type=2, + cpp_type=6, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=_b('\020\001'), + file=DESCRIPTOR), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[], + serialized_start=78, + serialized_end=108, +) + +_INT64LIST = _descriptor.Descriptor( + name='Int64List', + full_name='propeller.Int64List', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='value', + full_name='propeller.Int64List.value', + index=0, + number=1, + type=3, + cpp_type=2, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=_b('\020\001'), + file=DESCRIPTOR), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[], + serialized_start=110, + serialized_end=140, +) + +_FEATURE = _descriptor.Descriptor( + name='Feature', + full_name='propeller.Feature', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='bytes_list', + full_name='propeller.Feature.bytes_list', + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='float_list', + full_name='propeller.Feature.float_list', + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='int64_list', + full_name='propeller.Feature.int64_list', + index=2, + number=3, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + _descriptor.OneofDescriptor( + name='kind', full_name='propeller.Feature.kind', index=0, containing_type=None, fields=[]), + ], + serialized_start=143, + serialized_end=292, +) + +_FEATURES_FEATUREENTRY = _descriptor.Descriptor( + name='FeatureEntry', + full_name='propeller.Features.FeatureEntry', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='key', + full_name='propeller.Features.FeatureEntry.key', + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='value', + full_name='propeller.Features.FeatureEntry.value', + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=_b('8\001'), + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[], + serialized_start=358, + serialized_end=424, +) + +_FEATURES = _descriptor.Descriptor( + name='Features', + full_name='propeller.Features', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='feature', + full_name='propeller.Features.feature', + index=0, + number=1, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR), + ], + extensions=[], + nested_types=[ + _FEATURES_FEATUREENTRY, + ], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[], + serialized_start=295, + serialized_end=424, +) + +_FEATURELIST = _descriptor.Descriptor( + name='FeatureList', + full_name='propeller.FeatureList', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='feature', + full_name='propeller.FeatureList.feature', + index=0, + number=1, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[], + serialized_start=426, + serialized_end=476, +) + +_FEATURELISTS_FEATURELISTENTRY = _descriptor.Descriptor( + name='FeatureListEntry', + full_name='propeller.FeatureLists.FeatureListEntry', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='key', + full_name='propeller.FeatureLists.FeatureListEntry.key', + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='value', + full_name='propeller.FeatureLists.FeatureListEntry.value', + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=_b('8\001'), + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[], + serialized_start=559, + serialized_end=633, +) + +_FEATURELISTS = _descriptor.Descriptor( + name='FeatureLists', + full_name='propeller.FeatureLists', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='feature_list', + full_name='propeller.FeatureLists.feature_list', + index=0, + number=1, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR), + ], + extensions=[], + nested_types=[ + _FEATURELISTS_FEATURELISTENTRY, + ], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[], + serialized_start=479, + serialized_end=633, +) + +_FEATURE.fields_by_name['bytes_list'].message_type = _BYTESLIST +_FEATURE.fields_by_name['float_list'].message_type = _FLOATLIST +_FEATURE.fields_by_name['int64_list'].message_type = _INT64LIST +_FEATURE.oneofs_by_name['kind'].fields.append(_FEATURE.fields_by_name['bytes_list']) +_FEATURE.fields_by_name['bytes_list'].containing_oneof = _FEATURE.oneofs_by_name['kind'] +_FEATURE.oneofs_by_name['kind'].fields.append(_FEATURE.fields_by_name['float_list']) +_FEATURE.fields_by_name['float_list'].containing_oneof = _FEATURE.oneofs_by_name['kind'] +_FEATURE.oneofs_by_name['kind'].fields.append(_FEATURE.fields_by_name['int64_list']) +_FEATURE.fields_by_name['int64_list'].containing_oneof = _FEATURE.oneofs_by_name['kind'] +_FEATURES_FEATUREENTRY.fields_by_name['value'].message_type = _FEATURE +_FEATURES_FEATUREENTRY.containing_type = _FEATURES +_FEATURES.fields_by_name['feature'].message_type = _FEATURES_FEATUREENTRY +_FEATURELIST.fields_by_name['feature'].message_type = _FEATURE +_FEATURELISTS_FEATURELISTENTRY.fields_by_name['value'].message_type = _FEATURELIST +_FEATURELISTS_FEATURELISTENTRY.containing_type = _FEATURELISTS +_FEATURELISTS.fields_by_name['feature_list'].message_type = _FEATURELISTS_FEATURELISTENTRY +DESCRIPTOR.message_types_by_name['BytesList'] = _BYTESLIST +DESCRIPTOR.message_types_by_name['FloatList'] = _FLOATLIST +DESCRIPTOR.message_types_by_name['Int64List'] = _INT64LIST +DESCRIPTOR.message_types_by_name['Feature'] = _FEATURE +DESCRIPTOR.message_types_by_name['Features'] = _FEATURES +DESCRIPTOR.message_types_by_name['FeatureList'] = _FEATURELIST +DESCRIPTOR.message_types_by_name['FeatureLists'] = _FEATURELISTS +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +BytesList = _reflection.GeneratedProtocolMessageType( + 'BytesList', + (_message.Message, ), + dict( + DESCRIPTOR=_BYTESLIST, + __module__='propeller.paddle.data.feature_pb2' + # @@protoc_insertion_point(class_scope:propeller.BytesList) + )) +_sym_db.RegisterMessage(BytesList) + +FloatList = _reflection.GeneratedProtocolMessageType( + 'FloatList', + (_message.Message, ), + dict( + DESCRIPTOR=_FLOATLIST, + __module__='propeller.paddle.data.feature_pb2' + # @@protoc_insertion_point(class_scope:propeller.FloatList) + )) +_sym_db.RegisterMessage(FloatList) + +Int64List = _reflection.GeneratedProtocolMessageType( + 'Int64List', + (_message.Message, ), + dict( + DESCRIPTOR=_INT64LIST, + __module__='propeller.paddle.data.feature_pb2' + # @@protoc_insertion_point(class_scope:propeller.Int64List) + )) +_sym_db.RegisterMessage(Int64List) + +Feature = _reflection.GeneratedProtocolMessageType( + 'Feature', + (_message.Message, ), + dict( + DESCRIPTOR=_FEATURE, + __module__='propeller.paddle.data.feature_pb2' + # @@protoc_insertion_point(class_scope:propeller.Feature) + )) +_sym_db.RegisterMessage(Feature) + +Features = _reflection.GeneratedProtocolMessageType( + 'Features', + (_message.Message, ), + dict( + FeatureEntry=_reflection.GeneratedProtocolMessageType( + 'FeatureEntry', + (_message.Message, ), + dict( + DESCRIPTOR=_FEATURES_FEATUREENTRY, + __module__='propeller.paddle.data.feature_pb2' + # @@protoc_insertion_point(class_scope:propeller.Features.FeatureEntry) + )), + DESCRIPTOR=_FEATURES, + __module__='propeller.paddle.data.feature_pb2' + # @@protoc_insertion_point(class_scope:propeller.Features) + )) +_sym_db.RegisterMessage(Features) +_sym_db.RegisterMessage(Features.FeatureEntry) + +FeatureList = _reflection.GeneratedProtocolMessageType( + 'FeatureList', + (_message.Message, ), + dict( + DESCRIPTOR=_FEATURELIST, + __module__='propeller.paddle.data.feature_pb2' + # @@protoc_insertion_point(class_scope:propeller.FeatureList) + )) +_sym_db.RegisterMessage(FeatureList) + +FeatureLists = _reflection.GeneratedProtocolMessageType( + 'FeatureLists', + (_message.Message, ), + dict( + FeatureListEntry=_reflection.GeneratedProtocolMessageType( + 'FeatureListEntry', + (_message.Message, ), + dict( + DESCRIPTOR=_FEATURELISTS_FEATURELISTENTRY, + __module__='propeller.paddle.data.feature_pb2' + # @@protoc_insertion_point(class_scope:propeller.FeatureLists.FeatureListEntry) + )), + DESCRIPTOR=_FEATURELISTS, + __module__='propeller.paddle.data.feature_pb2' + # @@protoc_insertion_point(class_scope:propeller.FeatureLists) + )) +_sym_db.RegisterMessage(FeatureLists) +_sym_db.RegisterMessage(FeatureLists.FeatureListEntry) + +_FLOATLIST.fields_by_name['value']._options = None +_INT64LIST.fields_by_name['value']._options = None +_FEATURES_FEATUREENTRY._options = None +_FEATURELISTS_FEATURELISTENTRY._options = None +# @@protoc_insertion_point(module_scope) diff --git a/modules/text/text_generation/ernie_gen/propeller/paddle/data/functional.py b/modules/text/text_generation/ernie_gen/propeller/paddle/data/functional.py new file mode 100644 index 00000000..9b104245 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/paddle/data/functional.py @@ -0,0 +1,66 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Pyreader based Dataset""" + +import sys +import numpy as np +import logging + +import paddle.fluid as F +import paddle.fluid.layers as L + +from ernie_gen.propeller.data.functional import Dataset as DatasetBase + +log = logging.getLogger(__name__) + + +class Dataset(DatasetBase): + """Pyreader based Dataset""" + + def placeholders(self): + """doc""" + if self.name is None: + raise ValueError('can not get feature from unnamed Dataset') + + ret = [] + for i, (shape, types) in enumerate(zip(self.data_shapes, self.data_types)): + ret.append(L.data('%s_placeholder_%d' % (self.name, i), shape=shape, append_batch_size=False, dtype=types)) + return ret + + def features(self): + """start point of net building. call this in a program scope""" + if self.name is None: + raise ValueError('can not get feature from unnamed Dataset') + + if len(self.data_shapes) != len(self.data_types): + raise ValueError('Dataset shapes and types not match: shape:%s types%s' % (repr(self._data_shapes), + repr(self._data_types))) + return self.placeholders() + + def start(self, places=None): + """start Pyreader""" + if places is None: + places = F.cuda_places() if F.core.is_compiled_with_cuda() else F.cpu_places() + #assert self.pyreader is not None, 'use Dataset.features to build net first, then start dataset' + def _gen(): + try: + for idx, i in enumerate(self.generator()): + yield i + except Exception as e: + log.exception(e) + raise e + + r = F.io.PyReader(feed_list=self.placeholders(), capacity=50, iterable=True, return_list=F.in_dygraph_mode()) + r.decorate_batch_generator(_gen, places=places) + return r() diff --git a/modules/text/text_generation/ernie_gen/propeller/paddle/summary.py b/modules/text/text_generation/ernie_gen/propeller/paddle/summary.py new file mode 100644 index 00000000..af1fc644 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/paddle/summary.py @@ -0,0 +1,37 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""record summary tensor in a collection scope""" + +from __future__ import print_function +from __future__ import absolute_import +from __future__ import unicode_literals + +import sys + +import paddle.fluid as F +from ernie_gen.propeller.paddle.collection import default_collection, Key + + +def scalar(name, tensor): + """scalar summary""" + if not isinstance(tensor, F.framework.Variable): + raise ValueError('expect paddle Variable, got %s' % repr(tensor)) + default_collection().add(Key.SUMMARY_SCALAR, (name, tensor)) + + +def histogram(name, tensor): + """histogram summary""" + if not isinstance(tensor, F.framework.Variable): + raise ValueError('expect paddle Variable, got %s' % repr(tensor)) + default_collection().add(Key.SUMMARY_HISTOGRAM, (name, tensor)) diff --git a/modules/text/text_generation/ernie_gen/propeller/paddle/train/__init__.py b/modules/text/text_generation/ernie_gen/propeller/paddle/train/__init__.py new file mode 100644 index 00000000..b7867c92 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/paddle/train/__init__.py @@ -0,0 +1,33 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Propeller training""" + +from __future__ import print_function +from __future__ import absolute_import +from __future__ import unicode_literals + +import os +import sys +import logging +from time import time + +log = logging.getLogger(__name__) + +from ernie_gen.propeller.paddle.train.monitored_executor import * +from ernie_gen.propeller.paddle.train.trainer import * +from ernie_gen.propeller.paddle.train.hooks import * +from ernie_gen.propeller.train.model import Model +from ernie_gen.propeller.paddle.train import exporter +from ernie_gen.propeller.paddle.train import distribution +from ernie_gen.propeller.paddle.train import metrics diff --git a/modules/text/text_generation/ernie_gen/propeller/paddle/train/distribution.py b/modules/text/text_generation/ernie_gen/propeller/paddle/train/distribution.py new file mode 100644 index 00000000..c1ccaf4a --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/paddle/train/distribution.py @@ -0,0 +1,159 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import print_function +from __future__ import absolute_import +from __future__ import unicode_literals + +import functools +import six +import os +import logging +from time import sleep + +import paddle.fluid as F +import paddle.fluid.layers as L + +log = logging.getLogger(__name__) +import ernie_gen.propeller.util + +__all__ = ['init_distribuition_env', 'status'] + +status = None + + +class DistributionMode(object): + LOCAL = 0 + NCCL = 1 + + +class DistributionStatus(object): + def __init__(self, config): + if config is None: + self._mode = DistributionMode.LOCAL + self._env = None + self._this = None + else: + try: + self._mode = DistributionMode.NCCL + + cluster = config['cluster'] + task = config['task']['type'] + idx = int(config['task']['index']) + self._this = cluster[task][idx] + + self._env = cluster['chief'] + cluster.get('worker', []) + if len(set(self._env)) != len(self._env): + raise ValueError('duplicate host in dis_config %s' % config) + + except KeyError as e: + raise ValueError('PROPELLER_DISCONFIG wrong: %s not found in %s' % (e, repr(config))) + + @property + def mode(self): + return self._mode + + @property + def num_replica(self): + if self._mode == DistributionMode.LOCAL: + return 1 + elif self._mode == DistributionMode.NCCL: + return len(self._env) + else: + raise ValueError('Got unknow distribution mode %s' % repr(self._mode)) + + @property + def replica_id(self): + if self._mode == DistributionMode.LOCAL: + return 0 + elif self._mode == DistributionMode.NCCL: + return self._env.index(self._this) + else: + raise ValueError('Got unknow distribution mode %s' % repr(self._mode)) + + @property + def is_master(self): + if self._mode == DistributionMode.LOCAL: + return True + elif self._mode == DistributionMode.NCCL: + return self.replica_id == 0 + else: + raise ValueError('got unknow distribution mode %s' % repr(self._mode)) + + +def _get_paddlestype_disconfig(): + env = os.environ.copy() + if not ('PADDLE_TRAINER_ID' in env and 'PADDLE_CURRENT_ENDPOINT' in env and 'PADDLE_TRAINERS_NUM' in env + and 'PADDLE_TRAINER_ENDPOINTS' in env): + return None + else: + ip_port_list = env['PADDLE_TRAINER_ENDPOINTS'].split(',') + assert len(ip_port_list) == int(env['PADDLE_TRAINERS_NUM']) + ip_port_self = env['PADDLE_CURRENT_ENDPOINT'] + world = {"chief": [ip_port_list[0]]} + for ip_port in ip_port_list[1:]: + world.setdefault('worker', []).append(ip_port) + self_index = ip_port_list.index(ip_port_self) + self_type = 'chief' if self_index == 0 else 'worker' + if self_type == 'worker': + self_index -= 1 + env_dict = {'cluster': world, 'task': {'type': self_type, 'index': self_index}} + return env_dict + + +dis_config = ernie_gen.propeller.util._get_dict_from_environ_or_json_or_file(None, 'PROPELLER_DISCONFIG') +if dis_config is None: + log.debug('no PROPELLER_DISCONFIG found, try paddlestype setting') + dis_config = _get_paddlestype_disconfig() + if dis_config is None: + log.debug('no paddle stype setting found') +status = DistributionStatus(dis_config) + + +def run_on_master(func): + """skip function in distribution env""" + + @functools.wraps(func) + def f(*arg, **kwargs): + """f""" + if status is None: + raise ValueError('distribution mode unkown at this point') + if status.mode == DistributionMode.LOCAL: + r = func(*arg, **kwargs) + elif status.mode == DistributionMode.NCCL: + if status.is_master: + r = func(*arg, **kwargs) + else: + r = 0 # skip function + #MPI.COMM_WORLD.Barrier() + return r + + return f + + +def init_distribuition_env(program): + if status.mode == DistributionMode.LOCAL: + log.info('Initializing local training') + elif status.mode == DistributionMode.NCCL: + config = F.DistributeTranspilerConfig() + config.mode = "nccl2" + config.nccl_comm_num = 1 + F.DistributeTranspiler(config=config).transpile( + status.replica_id, + trainers=','.join(status._env), + current_endpoint=status._this, + program=program.train_program, + startup_program=program.startup_program) + log.info('Initializing distribution training with config %s' % (repr(dis_config))) + if status.is_master: + sleep(30) diff --git a/modules/text/text_generation/ernie_gen/propeller/paddle/train/exporter.py b/modules/text/text_generation/ernie_gen/propeller/paddle/train/exporter.py new file mode 100644 index 00000000..cfdd60f4 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/paddle/train/exporter.py @@ -0,0 +1,154 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +exporters +""" +from __future__ import print_function +from __future__ import absolute_import +from __future__ import unicode_literals + +import sys +import os +import itertools +import six +import inspect +import abc +import logging + +import numpy as np +import paddle.fluid as F +import paddle.fluid.layers as L + +from ernie_gen.propeller.util import map_structure +from ernie_gen.propeller.paddle.train import Saver +from ernie_gen.propeller.types import InferenceSpec +from ernie_gen.propeller.train.model import Model +from ernie_gen.propeller.paddle.train.trainer import _build_net +from ernie_gen.propeller.paddle.train.trainer import _build_model_fn +from ernie_gen.propeller.types import RunMode +from ernie_gen.propeller.types import ProgramPair + +log = logging.getLogger(__name__) + + +@six.add_metaclass(abc.ABCMeta) +class Exporter(object): + """base exporter""" + + @abc.abstractmethod + def export(self, exe, program, eval_result, state): + """export""" + raise NotImplementedError() + + +class BestExporter(Exporter): + """export saved model accordingto `cmp_fn`""" + + def __init__(self, export_dir, cmp_fn): + """doc""" + self._export_dir = export_dir + self._best = None + self.cmp_fn = cmp_fn + + def export(self, exe, program, eval_model_spec, eval_result, state): + """doc""" + log.debug('New evaluate result: %s \nold: %s' % (repr(eval_result), repr(self._best))) + if self._best is None and state['best_model'] is not None: + self._best = state['best_model'] + log.debug('restoring best state %s' % repr(self._best)) + if self._best is None or self.cmp_fn(old=self._best, new=eval_result): + log.debug('[Best Exporter]: export to %s' % self._export_dir) + eval_program = program.train_program + # FIXME: all eval datasets has same name/types/shapes now!!! so every eval program are the smae + + saver = Saver(self._export_dir, exe, program=program, max_ckpt_to_keep=1) + saver.save(state) + eval_result = map_structure(float, eval_result) + self._best = eval_result + state['best_model'] = eval_result + else: + log.debug('[Best Exporter]: skip step %s' % state.gstep) + + +class BestInferenceModelExporter(Exporter): + """export inference model accordingto `cmp_fn`""" + + def __init__(self, export_dir, cmp_fn, model_class_or_model_fn=None, hparams=None, dataset=None): + """doc""" + self._export_dir = export_dir + self._best = None + self.cmp_fn = cmp_fn + self.model_class_or_model_fn = model_class_or_model_fn + self.hparams = hparams + self.dataset = dataset + + def export(self, exe, program, eval_model_spec, eval_result, state): + """doc""" + if self.model_class_or_model_fn is not None and self.hparams is not None \ + and self.dataset is not None: + log.info('Building program by user defined model function') + if issubclass(self.model_class_or_model_fn, Model): + _model_fn = _build_model_fn(self.model_class_or_model_fn) + elif inspect.isfunction(self.model_class_or_model_fn): + _model_fn = self.model_class_or_model_fn + else: + raise ValueError('unknown model %s' % self.model_class_or_model_fn) + + # build net + infer_program = F.Program() + startup_prog = F.Program() + with F.program_guard(infer_program, startup_prog): + #share var with Train net + with F.unique_name.guard(): + log.info('Building Infer Graph') + infer_fea = self.dataset.features() + # run_config is None + self.model_spec = _build_net(_model_fn, infer_fea, RunMode.PREDICT, self.hparams, None) + log.info('Done') + infer_program = infer_program.clone(for_test=True) + self.program = ProgramPair(train_program=infer_program, startup_program=startup_prog) + + else: + self.program = program + self.model_spec = eval_model_spec + if self._best is None and state['best_inf_model'] is not None: + self._best = state['best_inf_model'] + log.debug('restoring best state %s' % repr(self._best)) + log.debug('New evaluate result: %s \nold: %s' % (repr(eval_result), repr(self._best))) + + if self._best is None or self.cmp_fn(old=self._best, new=eval_result): + log.debug('[Best Exporter]: export to %s' % self._export_dir) + if self.model_spec.inference_spec is None: + raise ValueError('model_fn didnt return InferenceSpec') + + inf_spec_dict = self.model_spec.inference_spec + if not isinstance(inf_spec_dict, dict): + inf_spec_dict = {'inference': inf_spec_dict} + for inf_spec_name, inf_spec in six.iteritems(inf_spec_dict): + if not isinstance(inf_spec, InferenceSpec): + raise ValueError('unknow inference spec type: %s' % inf_spec) + + save_dir = os.path.join(self._export_dir, inf_spec_name) + log.debug('[Best Exporter]: save inference model: "%s" to %s' % (inf_spec_name, save_dir)) + feed_var = [i.name for i in inf_spec.inputs] + fetch_var = inf_spec.outputs + + infer_program = self.program.train_program + startup_prog = F.Program() + F.io.save_inference_model(save_dir, feed_var, fetch_var, exe, main_program=infer_program) + eval_result = map_structure(float, eval_result) + state['best_inf_model'] = eval_result + self._best = eval_result + else: + log.debug('[Best Exporter]: skip step %s' % state.gstep) diff --git a/modules/text/text_generation/ernie_gen/propeller/paddle/train/hooks.py b/modules/text/text_generation/ernie_gen/propeller/paddle/train/hooks.py new file mode 100644 index 00000000..4b17ea21 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/paddle/train/hooks.py @@ -0,0 +1,320 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""train hooks""" +from __future__ import print_function +from __future__ import absolute_import +from __future__ import unicode_literals + +import sys +import six +import os +import itertools + +import numpy as np +import logging +import paddle.fluid as F +import paddle.fluid.layers as L + +from ernie_gen.propeller import util +from ernie_gen.propeller.paddle.train import distribution +from ernie_gen.propeller.paddle.train.metrics import Metrics + +__all__ = [ + 'RunHook', 'TqdmProgressBarHook', 'TqdmNotebookProgressBarHook', 'CheckpointSaverHook', 'LoggingHook', + 'StopAtStepHook', 'EvalHook' +] + +log = logging.getLogger(__name__) + + +class RunHook(object): + """RunHook Base class""" + + def __init__(self): + """doc""" + pass + + def before_train(self, program): + """doc""" + pass + + def before_run(self, state): + """doc""" + return [] + + def after_run(self, res_list, state): + """doc""" + pass + + def should_stop(self, state): + """doc""" + return False + + def after_train(self): + """doc""" + pass + + +class TqdmProgressBarHook(RunHook): + """show a progress bar when training""" + + def __init__(self, max_steps, desc=None): + """doc""" + self.tqdm = None + import tqdm + from ernie_gen.propeller import log as main_log + hdl = main_log.handlers[0] + + class _TqdmLogginHandler(logging.Handler): + def emit(self, record): + """doc""" + try: + msg = self.format(record) + tqdm.tqdm.write(msg, file=sys.stderr) + self.flush() + except (KeyboardInterrupt, SystemExit) as e: + raise e + except: + self.handleError(record) + + tqdm_hdl = _TqdmLogginHandler() + tqdm_hdl.setFormatter(hdl.formatter) + main_log.removeHandler(hdl) + main_log.addHandler(tqdm_hdl) + self.tqdm = tqdm.tqdm(total=max_steps, desc=None) + + def before_run(self, state): + self.tqdm.n = state.gstep + return [] + + def __del__(self): + if self.tqdm: + self.tqdm.close() + + +class TqdmNotebookProgressBarHook(RunHook): + """show a progress bar when training""" + + def __init__(self, max_steps, desc=None): + """doc""" + self.tqdm = None + import tqdm + from ernie_gen.propeller import log as main_log + hdl = main_log.handlers[0] + + class _TqdmLogginHandler(logging.Handler): + def emit(self, record): + """doc""" + try: + msg = self.format(record) + tqdm.tqdm.write(msg, file=sys.stderr) + self.flush() + except (KeyboardInterrupt, SystemExit) as e: + raise e + except: + self.handleError(record) + + tqdm_hdl = _TqdmLogginHandler() + tqdm_hdl.setFormatter(hdl.formatter) + main_log.removeHandler(hdl) + main_log.addHandler(tqdm_hdl) + self.tqdm = tqdm.tqdm_notebook(total=max_steps, desc=None) + + def before_run(self, state): + """doc""" + self.tqdm.n = state.gstep + self.tqdm.refresh() + return [] + + def __del__(self): + """doc""" + if self.tqdm: + self.tqdm.close() + + +class LoggingHook(RunHook): + """log tensor in to screan and VisualDL""" + + def __init__(self, loss, per_step=10, skip_step=100, summary_writer=None, summary_record=None): + """doc""" + if per_step is None or skip_step is None: + raise ValueError('wrong step argument, per step: %d skip_step %d' % (per_step, skip_step)) + self.loss = loss + self.per_step = per_step + self.skip_step = skip_step + self.summary_record = summary_record + self.writer = summary_writer + self.last_state = None + + def before_train(self, program): + """doc""" + if self.summary_record: + if self.summary_record.scalar: + self.s_name, self.s_tolog = zip(*self.summary_record.scalar) + else: + self.s_name, self.s_tolog = [], [] + + if self.summary_record.histogram: + self.h_name, self.h_tolog = zip(*self.summary_record.histogram) + else: + self.h_name, self.h_tolog = [], [] + + def before_run(self, state): + """doc""" + if state.gstep % self.per_step == 0 and state.step > self.skip_step: + ret = [self.loss] + if self.summary_record: + ret += self.s_tolog + ret += self.h_tolog + return ret + else: + return [] + + def after_run(self, res_list, state): + """doc""" + if state.gstep % self.per_step == 0 and state.step > self.skip_step: + if not self.summary_record: + return + + loss = float(res_list[0]) + s_np = res_list[1:1 + len(self.s_name)] + h_np = res_list[1 + len(self.s_name):1 + len(self.s_name) + len(self.h_name)] + + if self.last_state is not None: + speed = (state.gstep - self.last_state.gstep) / (state.time - self.last_state.time) + else: + speed = -1. + self.last_state = state + + # log to VisualDL + if self.writer is not None: + self.writer.add_scalar('loss', loss, state.gstep) + for name, t in zip(self.s_name, s_np): + if np.isnan(t).any(): + log.warning('Nan summary: %s, skip' % name) + else: + self.writer.add_scalar(name, t, state.gstep) + + for name, t in zip(self.h_name, h_np): + if np.isnan(t).any(): + log.warning('Nan summary: %s, skip' % name) + else: + self.writer.add_histogram(name, t, state.gstep) + + if speed > 0.: + self.writer.add_scalar('global_step', speed, state.gstep) + + # log to stdout + log.debug('\t'.join([ + 'step: %d' % state.gstep, + 'steps/sec: %.5f' % speed, + 'loss: %.5f' % loss, + '' if self.summary_record is None else ' '.join(map(lambda t: '%s:%s' % t, zip(self.s_name, s_np))), + ])) + + +class StopAtStepHook(RunHook): + """stop training at some step""" + + def __init__(self, stop_global_step, stop_step): + """doc""" + self._stop_gstep = stop_global_step + self._stop_step = stop_step + + def should_stop(self, state): + """doc""" + if (self._stop_gstep and state.gstep >= self._stop_gstep) or \ + (self._stop_step and state.step >= self._stop_step): + log.info('StopAtStepHook called stop') + return True + else: + return False + + +class EvalHook(RunHook): + """hook this on a eval Executor""" + + def __init__(self, metrics, summary_writer=None): + """doc""" + self.writer = summary_writer + self._result = None + + if not isinstance(metrics, dict): + raise ValueError('metrics should be dict, got %s' % repr(metrics)) + + for k, m in six.iteritems(metrics): + if not isinstance(m, Metrics): + raise ValueError('metrics %s should be instance of propeller.Metrics, got %s' % (k, repr(m))) + + if len(metrics): + self.names = list(metrics.keys()) + self.metrics = list(metrics.values()) + else: + self.names, self.metrics = [], [] + + def before_train(self, program): + """doc""" + for m in self.metrics: + m.reset() + + def before_run(self, state): + """doc""" + ls = [m.tensor for m in self.metrics] + for i in ls: + if not (isinstance(i, list) or isinstance(i, tuple)): + raise ValueError('metrics should return tuple or list of tensors, got %s' % repr(i)) + for ii in i: + if not isinstance(ii, F.framework.Variable): + raise ValueError( + 'metrics tensor be propeller.train.Metrics, got %s of type %s' % (repr(ii), type(ii))) + ls_flt, self.schema = util.flatten(ls) + #log.debug(ls_flt) + return ls_flt + + def after_run(self, res_list, state): + """doc""" + res = util.unflatten(res_list, self.schema) + for r, m in zip(res, self.metrics): + m.update(r) + + @property + def result(self): + """doc""" + return self._result + + def after_train(self): + """doc""" + printable = [] + self._result = {} + for n, m in zip(self.names, self.metrics): + val = m.eval() + self._result[n] = val + + return self.result + + +class CheckpointSaverHook(RunHook): + """Save checkpoint every n step""" + + def __init__(self, saver, per_step=10, skip_step=100): + """doc""" + self.saver = saver + self.per_step = per_step + self.skip_step = skip_step + + def after_run(self, res_list, state): + """doc""" + if state.gstep % self.per_step == 0 and \ + state.step > self.skip_step: + self.saver.save(state) diff --git a/modules/text/text_generation/ernie_gen/propeller/paddle/train/metrics.py b/modules/text/text_generation/ernie_gen/propeller/paddle/train/metrics.py new file mode 100644 index 00000000..662c6657 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/paddle/train/metrics.py @@ -0,0 +1,666 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""predefined metrics""" + +import sys +import os +import six + +import numpy as np +import itertools +import logging + +import paddle.fluid as F +import paddle.fluid.layers as L +import sklearn.metrics + +log = logging.getLogger(__name__) + +__all__ = ['Metrics', 'F1', 'Recall', 'Precision', 'Mrr', 'Mean', 'Acc', 'ChunkF1', 'RecallAtPrecision'] + + +class Metrics(object): + """Metrics base class""" + + def __init__(self): + """doc""" + self.saver = [] + + @property + def tensor(self): + """doc""" + pass + + def update(self, *args): + """doc""" + pass + + def eval(self): + """doc""" + pass + + +class Mean(Metrics): + """doc""" + + def __init__(self, t): + """doc""" + self.t = t + self.reset() + + def reset(self): + """doc""" + self.saver = np.array([]) + + @property + def tensor(self): + """doc""" + return self.t, + + def update(self, args): + """doc""" + t, = args + t = t.reshape([-1]) + self.saver = np.concatenate([self.saver, t]) + + def eval(self): + """doc""" + return self.saver.mean() + + +class Ppl(Mean): + """doc""" + + def eval(self): + """doc""" + return np.exp(self.saver.mean()) + + +class Acc(Mean): + """doc""" + + def __init__(self, label, pred): + """doc""" + if label.shape != pred.shape: + raise ValueError( + 'expect label shape == pred shape, got: label.shape=%s, pred.shape = %s' % (repr(label), repr(pred))) + self.eq = L.equal(pred, label) + self.reset() + + @property + def tensor(self): + """doc""" + return self.eq, + + +class MSE(Mean): + """doc""" + + def __init__(self, label, pred): + """doc""" + if label.shape != pred.shape: + raise ValueError( + 'expect label shape == pred shape, got: label.shape=%s, pred.shape = %s' % (repr(label), repr(pred))) + + diff = pred - label + self.mse = diff * diff + self.reset() + + @property + def tensor(self): + """doc""" + return self.mse, + + +class Cosine(Mean): + """doc""" + + def __init__(self, label, pred): + """doc""" + if label.shape != pred.shape: + raise ValueError( + 'expect label shape == pred shape, got: label.shape=%s, pred.shape = %s' % (repr(label), repr(pred))) + + self.cos = L.cos_sim(label, pred) + self.reset() + + @property + def tensor(self): + """doc""" + return self.cos, + + +class MacroF1(Metrics): + """doc""" + + def __init__(self, label, pred): + """doc""" + if label.shape != pred.shape: + raise ValueError( + 'expect label shape == pred shape, got: label.shape=%s, pred.shape = %s' % (repr(label), repr(pred))) + + self.label = label + self.pred = pred + self.reset() + + def reset(self): + """doc""" + self.label_saver = np.array([], dtype=np.bool) + self.pred_saver = np.array([], dtype=np.bool) + + @property + def tensor(self): + """doc""" + return self.label, self.pred + + def update(self, args): + """doc""" + label, pred = args + label = label.reshape([-1]).astype(np.bool) + pred = pred.reshape([-1]).astype(np.bool) + if label.shape != pred.shape: + raise ValueError('Metrics precesion: input not match: label:%s pred:%s' % (label, pred)) + self.label_saver = np.concatenate([self.label_saver, label]) + self.pred_saver = np.concatenate([self.pred_saver, pred]) + + def eval(self): + """doc""" + return sklearn.metrics.f1_score(self.label_saver, self.pred_saver, average='macro') + + +class Precision(Metrics): + """doc""" + + def __init__(self, label, pred): + """doc""" + if label.shape != pred.shape: + raise ValueError( + 'expect label shape == pred shape, got: label.shape=%s, pred.shape = %s' % (repr(label), repr(pred))) + + self.label = label + self.pred = pred + self.reset() + + def reset(self): + """doc""" + self.label_saver = np.array([], dtype=np.bool) + self.pred_saver = np.array([], dtype=np.bool) + + @property + def tensor(self): + """doc""" + return self.label, self.pred + + def update(self, args): + """doc""" + label, pred = args + label = label.reshape([-1]).astype(np.bool) + pred = pred.reshape([-1]).astype(np.bool) + if label.shape != pred.shape: + raise ValueError('Metrics precesion: input not match: label:%s pred:%s' % (label, pred)) + self.label_saver = np.concatenate([self.label_saver, label]) + self.pred_saver = np.concatenate([self.pred_saver, pred]) + + def eval(self): + """doc""" + tp = (self.label_saver & self.pred_saver).astype(np.int64).sum() + p = self.pred_saver.astype(np.int64).sum() + return tp / p + + +class Recall(Precision): + """doc""" + + def eval(self): + """doc""" + tp = (self.label_saver & self.pred_saver).astype(np.int64).sum() + t = (self.label_saver).astype(np.int64).sum() + return tp / t + + +class F1(Precision): + """doc""" + + def eval(self): + """doc""" + tp = (self.label_saver & self.pred_saver).astype(np.int64).sum() + t = self.label_saver.astype(np.int64).sum() + p = self.pred_saver.astype(np.int64).sum() + precision = tp / (p + 1.e-6) + recall = tp / (t + 1.e-6) + return 2 * precision * recall / (precision + recall + 1.e-6) + + +class Auc(Metrics): + """doc""" + + def __init__(self, label, pred): + """doc""" + if label.shape != pred.shape: + raise ValueError( + 'expect label shape == pred shape, got: label.shape=%s, pred.shape = %s' % (repr(label), repr(pred))) + + self.pred = pred + self.label = label + self.reset() + + def reset(self): + """doc""" + self.pred_saver = np.array([], dtype=np.float32) + self.label_saver = np.array([], dtype=np.bool) + + @property + def tensor(self): + """doc""" + return [self.pred, self.label] + + def update(self, args): + """doc""" + pred, label = args + pred = pred.reshape([-1]).astype(np.float32) + label = label.reshape([-1]).astype(np.bool) + self.pred_saver = np.concatenate([self.pred_saver, pred]) + self.label_saver = np.concatenate([self.label_saver, label]) + + def eval(self): + """doc""" + fpr, tpr, thresholds = sklearn.metrics.roc_curve(self.label_saver.astype(np.int64), self.pred_saver) + auc = sklearn.metrics.auc(fpr, tpr) + return auc + + +class RecallAtPrecision(Auc): + """doc""" + + def __init__(self, label, pred, precision=0.9): + """doc""" + super(RecallAtPrecision, self).__init__(label, pred) + self.precision = precision + + def eval(self): + """doc""" + self.pred_saver = self.pred_saver.reshape([self.label_saver.size, -1])[:, -1] + precision, recall, thresholds = sklearn.metrics.precision_recall_curve(self.label_saver, self.pred_saver) + for p, r in zip(precision, recall): + if p > self.precision: + return r + + +class PrecisionAtThreshold(Auc): + """doc""" + + def __init__(self, label, pred, threshold=0.5): + """doc""" + super().__init__(label, pred) + self.threshold = threshold + + def eval(self): + """doc""" + infered = self.pred_saver > self.threshold + correct_num = np.array(infered & self.label_saver).sum() + infer_num = infered.sum() + return correct_num / (infer_num + 1.e-6) + + +class Mrr(Metrics): + """doc""" + + def __init__(self, qid, label, pred): + """doc""" + if label.shape != pred.shape: + raise ValueError( + 'expect label shape == pred shape, got: label.shape=%s, pred.shape = %s' % (repr(label), repr(pred))) + + self.qid = qid + self.label = label + self.pred = pred + self.reset() + + def reset(self): + """doc""" + self.qid_saver = np.array([], dtype=np.int64) + self.label_saver = np.array([], dtype=np.int64) + self.pred_saver = np.array([], dtype=np.float32) + + @property + def tensor(self): + """doc""" + return [self.qid, self.label, self.pred] + + def update(self, args): + """doc""" + qid, label, pred = args + if not (qid.shape[0] == label.shape[0] == pred.shape[0]): + raise ValueError( + 'Mrr dimention not match: qid[%s] label[%s], pred[%s]' % (qid.shape, label.shape, pred.shape)) + self.qid_saver = np.concatenate([self.qid_saver, qid.reshape([-1]).astype(np.int64)]) + self.label_saver = np.concatenate([self.label_saver, label.reshape([-1]).astype(np.int64)]) + self.pred_saver = np.concatenate([self.pred_saver, pred.reshape([-1]).astype(np.float32)]) + + def eval(self): + """doc""" + + def _key_func(tup): + return tup[0] + + def _calc_func(tup): + ranks = [ + 1. / (rank + 1.) for rank, (_, l, p) in enumerate(sorted(tup, key=lambda t: t[2], reverse=True)) + if l != 0 + ] + if len(ranks): + return ranks[0] + else: + return 0. + + mrr_for_qid = [ + _calc_func(tup) for _, tup in itertools.groupby( + sorted(zip(self.qid_saver, self.label_saver, self.pred_saver), key=_key_func), key=_key_func) + ] + mrr = np.float32(sum(mrr_for_qid) / len(mrr_for_qid)) + return mrr + + +class ChunkF1(Metrics): + """doc""" + + def __init__(self, label, pred, seqlen, num_label): + """doc""" + self.label = label + self.pred = pred + self.seqlen = seqlen + self.null_index = num_label - 1 + self.label_cnt = 0 + self.pred_cnt = 0 + self.correct_cnt = 0 + + def _extract_bio_chunk(self, seq): + chunks = [] + cur_chunk = None + + for index in range(len(seq)): + tag = seq[index] + tag_type = tag // 2 + tag_pos = tag % 2 + + if tag == self.null_index: + if cur_chunk is not None: + chunks.append(cur_chunk) + cur_chunk = None + continue + + if tag_pos == 0: + if cur_chunk is not None: + chunks.append(cur_chunk) + cur_chunk = {} + cur_chunk = {"st": index, "en": index + 1, "type": tag_type} + else: + if cur_chunk is None: + cur_chunk = {"st": index, "en": index + 1, "type": tag_type} + continue + + if cur_chunk["type"] == tag_type: + cur_chunk["en"] = index + 1 + else: + chunks.append(cur_chunk) + cur_chunk = {"st": index, "en": index + 1, "type": tag_type} + + if cur_chunk is not None: + chunks.append(cur_chunk) + return chunks + + def reset(self): + """doc""" + self.label_cnt = 0 + self.pred_cnt = 0 + self.correct_cnt = 0 + + @property + def tensor(self): + """doc""" + return [self.pred, self.label, self.seqlen] + + def update(self, args): + """doc""" + pred, label, seqlen = args + pred = pred.reshape([-1]).astype(np.int32).tolist() + label = label.reshape([-1]).astype(np.int32).tolist() + seqlen = seqlen.reshape([-1]).astype(np.int32).tolist() + + max_len = 0 + for l in seqlen: + max_len = max(max_len, l) + + for i in range(len(seqlen)): + seq_st = i * max_len + 1 + seq_en = seq_st + (seqlen[i] - 2) + pred_chunks = self._extract_bio_chunk(pred[seq_st:seq_en]) + label_chunks = self._extract_bio_chunk(label[seq_st:seq_en]) + self.pred_cnt += len(pred_chunks) + self.label_cnt += len(label_chunks) + + pred_index = 0 + label_index = 0 + while label_index < len(label_chunks) and pred_index < len(pred_chunks): + if pred_chunks[pred_index]['st'] < label_chunks[label_index]['st']: + pred_index += 1 + elif pred_chunks[pred_index]['st'] > label_chunks[label_index]['st']: + label_index += 1 + else: + if pred_chunks[pred_index]['en'] == label_chunks[label_index]['en'] \ + and pred_chunks[pred_index]['type'] == label_chunks[label_index]['type']: + self.correct_cnt += 1 + pred_index += 1 + label_index += 1 + + def eval(self): + """doc""" + if self.pred_cnt == 0: + precision = 0.0 + else: + precision = 1.0 * self.correct_cnt / self.pred_cnt + + if self.label_cnt == 0: + recall = 0.0 + else: + recall = 1.0 * self.correct_cnt / self.label_cnt + + if self.correct_cnt == 0: + f1 = 0.0 + else: + f1 = 2 * precision * recall / (precision + recall) + + return np.float32(f1) + + +class PNRatio(Metrics): + """doc""" + + def __init__(self, qid, label, pred): + """doc""" + if label.shape != pred.shape: + raise ValueError( + 'expect label shape == pred shape, got: label.shape=%s, pred.shape = %s' % (repr(label), repr(pred))) + + self.qid = qid + self.label = label + self.pred = pred + self.saver = {} + + def reset(self): + """doc""" + self.saver = {} + + @property + def tensor(self): + """doc""" + return [self.qid, self.label, self.pred] + + def update(self, args): + """doc""" + qid, label, pred = args + if not (qid.shape[0] == label.shape[0] == pred.shape[0]): + raise ValueError('dimention not match: qid[%s] label[%s], pred[%s]' % (qid.shape, label.shape, pred.shape)) + qid = qid.reshape([-1]).tolist() + label = label.reshape([-1]).tolist() + pred = pred.reshape([-1]).tolist() + assert len(qid) == len(label) == len(pred) + for q, l, p in zip(qid, label, pred): + if q not in self.saver: + self.saver[q] = [] + self.saver[q].append((l, p)) + + def eval(self): + """doc""" + p = 0 + n = 0 + for qid, outputs in self.saver.items(): + for i in range(0, len(outputs)): + l1, p1 = outputs[i] + for j in range(i + 1, len(outputs)): + l2, p2 = outputs[j] + if l1 > l2: + if p1 > p2: + p += 1 + elif p1 < p2: + n += 1 + elif l1 < l2: + if p1 < p2: + p += 1 + elif p1 > p2: + n += 1 + pn = p / n if n > 0 else 0.0 + return np.float32(pn) + + +class BinaryPNRatio(PNRatio): + """doc""" + + def __init__(self, qid, label, pred): + """doc""" + super(BinaryPNRatio, self).__init__(qid, label, pred) + + def eval(self): + """doc""" + p = 0 + n = 0 + for qid, outputs in self.saver.items(): + pos_set = [] + neg_set = [] + for label, score in outputs: + if label == 1: + pos_set.append(score) + else: + neg_set.append(score) + + for ps in pos_set: + for ns in neg_set: + if ps > ns: + p += 1 + elif ps < ns: + n += 1 + else: + continue + pn = p / n if n > 0 else 0.0 + return np.float32(pn) + + +class PrecisionAtK(Metrics): + """doc""" + + def __init__(self, qid, label, pred, k=1): + """doc""" + if label.shape != pred.shape: + raise ValueError( + 'expect label shape == pred shape, got: label.shape=%s, pred.shape = %s' % (repr(label), repr(pred))) + + self.qid = qid + self.label = label + self.pred = pred + self.k = k + self.saver = {} + + def reset(self): + """doc""" + self.saver = {} + + @property + def tensor(self): + """doc""" + return [self.qid, self.label, self.pred] + + def update(self, args): + """doc""" + qid, label, pred = args + if not (qid.shape[0] == label.shape[0] == pred.shape[0]): + raise ValueError('dimention not match: qid[%s] label[%s], pred[%s]' % (qid.shape, label.shape, pred.shape)) + qid = qid.reshape([-1]).tolist() + label = label.reshape([-1]).tolist() + pred = pred.reshape([-1]).tolist() + + assert len(qid) == len(label) == len(pred) + for q, l, p in zip(qid, label, pred): + if q not in self.saver: + self.saver[q] = [] + self.saver[q].append((l, p)) + + def eval(self): + """doc""" + right = 0 + total = 0 + for v in self.saver.values(): + v = sorted(v, key=lambda x: x[1], reverse=True) + k = min(self.k, len(v)) + for i in range(k): + if v[i][0] == 1: + right += 1 + break + total += 1 + + return np.float32(1.0 * right / total) + + +#class SemanticRecallMetrics(Metrics): +# def __init__(self, qid, vec, type_id): +# self.qid = qid +# self.vec = vec +# self.type_id = type_id +# self.reset() +# +# def reset(self): +# self.saver = [] +# +# @property +# def tensor(self): +# return [self.qid, self.vec, self.type_id] +# +# def update(self, args): +# qid, vec, type_id = args +# self.saver.append((qid, vec, type_id)) +# +# def eval(self): +# dic = {} +# for qid, vec, type_id in self.saver(): +# dic.setdefault(i, {}).setdefault(k, []).append(vec) +# +# for qid in dic: +# assert len(dic[qid]) == 3 +# qvec = np.arrray(dic[qid][0]) +# assert len(qvec) == 1 +# ptvec = np.array(dic[qid][1]) +# ntvec = np.array(dic[qid][2]) +# +# np.matmul(qvec, np.transpose(ptvec)) +# np.matmul(qvec, np.transpose(ntvec)) +# diff --git a/modules/text/text_generation/ernie_gen/propeller/paddle/train/monitored_executor.py b/modules/text/text_generation/ernie_gen/propeller/paddle/train/monitored_executor.py new file mode 100644 index 00000000..ab0af294 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/paddle/train/monitored_executor.py @@ -0,0 +1,434 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +doc +""" + +from __future__ import print_function +from __future__ import absolute_import +from __future__ import unicode_literals + +import os +import sys +import json +from functools import reduce +import six +from time import time +import shutil + +import logging +import numpy as np +import paddle.fluid as F +import paddle.fluid.layers as L + +from ernie_gen.propeller import util +from ernie_gen.propeller.types import StopException, ProgramPair, WarmStartSetting, TextoneWarmStartSetting +from ernie_gen.propeller.paddle.train import hooks +from . import distribution + +log = logging.getLogger(__name__) + +__all__ = ['MonitoredExecutor', 'Saver'] + + +def _get_one_place(): + return F.cuda_places()[0] if F.core.is_compiled_with_cuda() else F.cpu_places()[0] + + +class RunState(object): + """serializable Run state object""" + + @classmethod + def from_dict(cls, d): + d['step'] = 0 + r = RunState() + r.__dict__ = d + return r + + @classmethod + def from_str(cls, s): + """doc""" + j = json.loads(s) + return cls.from_dict(j) + + def __init__(self): + """doc""" + self.__dict__ = {'gstep': 0, 'step': 0, 'time': time()} + + @property + def gstep(self): + """doc""" + return self.__dict__.get('gstep', self.__dict__.get('global_step')) # backward compatibility + + @property + def step(self): + """doc""" + return self.__dict__['step'] + + def __setitem__(self, k, v): + self.__dict__[k] = v + + def __getitem__(self, k): + return self.__dict__.get(k, None) + + @property + def time(self): + """doc""" + return self.__dict__['time'] + + def state_dict(self): + return self.__dict__ + + def __repr__(self): + """doc""" + return repr(self.state_dict()) + + def serialize(self): + """doc""" + return json.dumps(self.state_dict()) + + def next(self): + """doc""" + newd = dict(self.__dict__, gstep=self.gstep + 1, step=self.step + 1, time=time()) + ret = RunState() + ret.__dict__ = newd + return ret + + +class Saver(object): + """checkpoint saver and manager""" + + def __init__(self, save_dir, exe, program, save_prefix='model', max_ckpt_to_keep=None): + """doc""" + assert isinstance(exe, F.Executor), 'expect normal executor to save, got executor of type %s' % repr(type(exe)) + self._exe = exe + self._program = program + self._save_dir = save_dir + self._save_prefix = save_prefix + self._max_ckpt_to_keep = 10 if max_ckpt_to_keep is None else max_ckpt_to_keep + + self.ckpt_info_path = os.path.join(save_dir, 'ckpt_info') + + if os.path.exists(self.ckpt_info_path): + self.ckpt_list = [p.strip() for p in open(self.ckpt_info_path).readlines()] + log.debug('ckpt_list in this Saver: %s' % (self.ckpt_list)) + else: + self.ckpt_list = [] + + @property + def last_ckpt(self): + """doc""" + return self.ckpt_list[-1] if len(self.ckpt_list) else None + + def _save_program(self, dir): + F.io.save_persistables(self._exe, dir, self._program.train_program) + + def _load_program(self, dir, predicate_fn=None): + if predicate_fn is None: + + def _fn(v): + vpath = os.path.join(dir, v.name) + if F.io.is_persistable(v): + if os.path.exists(vpath): + return True + else: + log.warning('var %s not found in checkpoint, ignored' % v.name) + return False + + predicate_fn = _fn + try: + F.io.load_vars(self._exe, dir, main_program=self._program.train_program, predicate=predicate_fn) + except F.core.EnforceNotMet as e: + log.exception(e) + raise RuntimeError('can not load model from %s, is this a textone checkpoint?' % dir) + + def save(self, state): + """doc""" + save_name = '%s_%d' % (self._save_prefix, state.gstep) + save_dir = os.path.join(self._save_dir, save_name) + tmp_dir = os.path.join(self._save_dir, 'tmp') + try: + shutil.rmtree(save_dir) + shutil.rmtree(tmp_dir) + except OSError: + pass + log.debug('saving step %d to %s' % (state.gstep, save_dir)) + self._save_program(tmp_dir) + shutil.move(tmp_dir, save_dir) + meta = state.serialize() + open(os.path.join(save_dir, 'meta'), 'w').write(meta) + + self.ckpt_list.append(save_name) + if len(self.ckpt_list) > self._max_ckpt_to_keep: + ckpt_to_keep = self.ckpt_list[-self._max_ckpt_to_keep:] + ckpt_to_remove = set(self.ckpt_list) - set(ckpt_to_keep) + self.ckpt_list = ckpt_to_keep + for ckpt in ckpt_to_remove: + ckpt_dir = os.path.join(self._save_dir, ckpt) + if os.path.exists(ckpt_dir): + shutil.rmtree(ckpt_dir) + log.debug('No. of ckpt exceed %d, clean up: %s' % (self._max_ckpt_to_keep, ckpt_dir)) + open(self.ckpt_info_path, 'w').write('\n'.join(self.ckpt_list)) + + def restore(self, ckpt=-1): + """doc""" + if isinstance(ckpt, int): + try: + path = os.path.join(self._save_dir, self.ckpt_list[ckpt]) + except IndexError: + raise ValueError('invalid restore ckpt number %d' % ckpt) + elif isinstance(ckpt, six.string_types): + if not os.path.exists(ckpt): + raise ValueError('ckpt: %s not found' % ckpt) + path = ckpt + else: + raise ValueError('ckpt type not understood %s' % repr(ckpt)) + + meta_file = os.path.join(path, 'meta') + if not os.path.exists(meta_file): + raise RuntimeError('meta not found in restore dir: %s' % path) + state = RunState.from_str(open(meta_file).read()) + log.info('restore from ckpt %s, ckpt-status: %s' % (path, repr(state))) + + self._load_program(path) + return state + + +class SaverV2(Saver): + def _save_program(self, dir): + save_path = os.path.join(dir, 'ckpt') + F.save(self._program.train_program, save_path) + + def _load_program(self, dir, predicate_fn=None): + try: + save_path = os.path.join(dir, 'ckpt') + F.load( + self._program.train_program, + save_path, + ) + except F.core.EnforceNotMet as e: + log.exception(e) + raise RuntimeError('can not load model from %s, is this a textone checkpoint?' % dir) + + +TextoneTrainer = None + + +class MonitoredExecutor(object): + """An Executor wrapper handling the train loop""" + saver_class = SaverV2 # will change if textone enabled + + def __init__( + self, + executor, + program, + loss=None, #must set in train + state=None, + run_config=None, #none if not load + run_hooks=[], + warm_start_setting=None): + if not isinstance(executor, F.Executor): + raise ValueError('PE is no longer supported') + if isinstance(executor, F.ParallelExecutor): + raise ValueError('ParallelExecutor is deprecatd, use Executor') + if not isinstance(program, ProgramPair): + raise ValueError('Expect ProgramPair, got %r' % type(program)) + self._exe = executor + self._hooks = run_hooks + self._state = RunState() # might be overwrite in freeze + self._program = program + self._loss = loss + self._warm_start_setting = warm_start_setting + self._saver = None # will set in prepare + self.result = None # will set after train + if run_config is not None: + self._model_dir = run_config.model_dir + self._save_dir = run_config.model_dir + self._save_steps = run_config.save_steps + self._skip_steps = run_config.skip_steps if run_config.skip_steps else 100 + self._save_prefix = 'model' + self._max_ckpt = run_config.max_ckpt + + @property + def state(self): + """doc""" + return self._state + + def init_or_restore_variables(self, ckpt=-1): + """ + init vars or restore vars from model_dir + call before train + """ + # The order of this 2 steps really matters + # 1. init train + + F.Executor(_get_one_place()).run(self._program.startup_program) + # 2. restore param + + self._saver = self.saver_class( + self._model_dir, F.Executor(_get_one_place()), program=self._program, max_ckpt_to_keep=self._max_ckpt) + + if self._warm_start_setting is not None: + if not os.path.exists(self._warm_start_setting.from_dir): + raise ValueError('warm start dir not exists: %s' % self._warm_start_setting.from_dir) + + if isinstance(self._warm_start_setting, WarmStartSetting): + log.info("warm start from %s" % self._warm_start_setting.from_dir) + log.info(self._saver) + if (not type(self._saver) is Saver) and (not type(self._saver) is SaverV2): + raise ValueError('try to warm start from standart dir, but textone enabled') + if self._warm_start_setting.predicate_fn is not None: + + def _fn(v): + ret = self._warm_start_setting.predicate_fn(v) + if ret: + log.info('warm start: %s' % v.name) + return ret + + try: + F.io.load_vars( + self._exe, + self._warm_start_setting.from_dir, + main_program=self._program.train_program, + predicate=_fn) + except F.core.EnforceNotMet as e: + log.exception(e) + raise RuntimeError('can not load model from %s, is this a textone checkpoint?' % dir) + else: + raise NotImplementedError() + elif isinstance(self._warm_start_setting, TextoneWarmStartSetting): + if not type(self._saver) is TextoneTrainer: + raise ValueError('try to warm start from textone pretrain dir, but textone not enabled') + log.info("[texone] warm start from %s" % self._warm_start_setting.from_dir) + self._saver._load_pretrained(self._warm_start_setting.from_dir) + else: + raise ValueError('expect _warm_start_setting to be TextoneWarmStartSetting of WarmStartSetting, got %s' + % repr(self._warm_start_setting)) + + if self._saver.last_ckpt is not None: + self._state = self._saver.restore(ckpt) + + def _freeze(self): + """ + call before enter train loop + convert program to compiled program + will do nothing if loss is None i.e. not in train mode + """ + if self._loss is None: + log.debug('will not freeze a program without loss') + return + if isinstance(self._program.train_program, F.compiler.CompiledProgram): + log.debug('program has already been built') + return + exec_strategy = F.ExecutionStrategy() + exec_strategy.num_threads = 4 #2 for fp32 4 for fp16 + exec_strategy.use_experimental_executor = True + exec_strategy.num_iteration_per_drop_scope = 10 #important shit + + build_strategy = F.BuildStrategy() + build_strategy.remove_unnecessary_lock = False + #build_strategy.fuse_broadcast_ops = True + build_strategy.num_trainers = distribution.status.num_replica + build_strategy.trainer_id = distribution.status.replica_id + build_strategy.memory_optimize = True + + log.info('replica id %d of %d' % (distribution.status.replica_id, distribution.status.num_replica)) + + program = F.CompiledProgram(self._program.train_program).with_data_parallel( + loss_name=self._loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy) + self._program = ProgramPair(train_program=program, startup_program=self._program.startup_program) + + def __enter__(self): + """ + prepapre before enter train loop + """ + if F.core.is_compiled_with_cuda(): + log.info('propeller runs in CUDA mode') + else: + log.info('propeller runs in CPU mode') + + #log.debug('freezing program') + self._freeze() + #log.debug('done freezing') + log.info('********** Start Loop ************') + # TODO init + + self.result = None + for h in self._hooks: + log.debug('train loop has hook %s' % h) + h.before_train(self._program) + return self + + def run(self, fetch_list=[], *args, **kwargs): + """ + wrapper for Executor.run + """ + #log.debug('Executor running step %d' % self._state.gstep) + if self._hooks: + fetch_list = [fetch_list] + for h in self._hooks: + #log.debug('calling hook.before_run %s' % h) + fetch = h.before_run(self._state) + fetch_list.append(fetch) + fetch_list_len = map(len, fetch_list) + fetch_list, schema = util.flatten(fetch_list) + fetch_list = [f.name if not isinstance(f, six.string_types) else f for f in fetch_list] + #if len(set(fetch_list)) != len(fetch_list): + # log.error('strange shit happend when fetch list has idetity tensors %s' % fetch_list) + #log.debug(fetch_list) + res = self._exe.run(self._program.train_program, fetch_list=fetch_list, *args, **kwargs) + res = [self._merge_result(r) for r in res] + #log.debug(res) + + res = util.unflatten(res, schema) + ret, res = res[0], res[1:] + for r, h in zip(res, self._hooks): + #log.debug('calling hook.after_run') + h.after_run(r, self._state) + + if any(map(lambda i: i.should_stop(self._state), self._hooks)): + raise StopException('hook call stop') + else: + ret = self._exe.run(self._program.train_program, fetch_list=fetch_list, *args, **kwargs) + self._state = self._state.next() + return ret + + def __exit__(self, err_type, err_value, trace): + """ + clean up things and report hook result when exit train loop + """ + if (err_type is None) or isinstance(err_value, (F.core.EOFException, StopException, KeyboardInterrupt)): + try: + log.info('********** Stop Loop ************') + self.result = [] + for h in self._hooks: + self.result.append(h.after_train()) + except Exception as e: + log.exception('error occur after loop %s' % repr(e)) + else: + log.info('********** Interupt Loop ************') + log.exception('error occur during loop %s: %s' % (err_type, err_value)) + + def _merge_result(self, ls): + """ + merge results from multi gpu cards + """ + dev_count = len(self._program.train_program._places) if isinstance(self._program.train_program, + F.compiler.CompiledProgram) else 1 + if dev_count == 1: + return ls + else: + shape = (-1, ls.shape[0] // dev_count) + ls.shape[1:] + ret = np.reshape(ls, shape).mean(axis=0) + return ret diff --git a/modules/text/text_generation/ernie_gen/propeller/paddle/train/trainer.py b/modules/text/text_generation/ernie_gen/propeller/paddle/train/trainer.py new file mode 100644 index 00000000..1851e0ce --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/paddle/train/trainer.py @@ -0,0 +1,466 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""common ML train and eval procedure""" +from __future__ import print_function +from __future__ import absolute_import +from __future__ import unicode_literals + +import os +import itertools +import six +import inspect +from collections import namedtuple +from contextlib import contextmanager +from six.moves import zip, map +import logging +from time import time + +import paddle.fluid as F +import paddle.fluid.layers as L + +from ernie_gen.propeller.types import RunMode, StopException, SummaryRecord, StopException +from ernie_gen.propeller.types import ModelSpec, InferenceSpec, ProgramPair, RunConfig +from ernie_gen.propeller.paddle import summary, collection +from ernie_gen.propeller.paddle.data.functional import Dataset +from ernie_gen.propeller.paddle.train import distribution +from ernie_gen.propeller.train.model import Model +from ernie_gen.propeller.paddle.train.monitored_executor import Saver +from ernie_gen.propeller.paddle.train import hooks, metrics + +from ernie_gen.propeller.paddle.train.monitored_executor import MonitoredExecutor + +log = logging.getLogger(__name__) + +__all__ = ['train_and_eval', 'Learner'] + + +def _get_summary_writer(path): + summary_writer = None + try: + from visualdl import LogWriter + if distribution.status.is_master: + summary_writer = LogWriter(os.path.join(path)) + except ImportError: + log.warning('VisualDL not installed, will not log to VisualDL') + return summary_writer + + +def _get_one_place(): + return F.cuda_places()[0] if F.core.is_compiled_with_cuda() else F.cpu_places()[0] + + +def _log_eval_result(name, eval_result, swriter, state): + log.debug(eval_result) + printable = [] + for n, val in six.iteritems(eval_result): + assert val.shape == (), 'metrics eval use float' + printable.append('{}\t{}'.format(n, val)) + if swriter is not None: + swriter.add_scalar(n, val, state.gstep) + log.debug('write to VisualDL %s' % swriter.logdir) + + if len(printable): + log.info('*** eval res: %10s ***' % name) + for p in printable: + log.info(p) + log.info('******************************') + + +def _build_net(model_fn, features, mode, params, run_config): + model_spec = model_fn(features=features, mode=mode, params=params, run_config=run_config) + + if mode == RunMode.TRAIN: + if not isinstance(model_spec.loss, F.framework.Variable): + raise ValueError('model_spec.metrics should be Variable, got %s' % repr(model_spec.loss)) + if not (model_spec.loss.shape == () or model_spec.loss.shape == (1, )): + raise ValueError('expect scarlar loss, got %s' % repr(model_spec.loss.shape)) + #model_spec.loss.persistable = True + elif mode == RunMode.EVAL: + if not isinstance(model_spec.metrics, dict): + raise ValueError('model_spec.metrics should be dict, got %s' % repr(model_spec.metrics)) + elif mode == RunMode.PREDICT: + if not isinstance(model_spec.predictions, (list, tuple)): + raise ValueError('model_spec.predictions shuold be list, got %s' % repr(model_spec.predictions)) + else: + raise ValueError('unkonw mode %s' % mode) + return model_spec + + +class Learner(object): + """A Learner can train / eval / predict on a Dataset""" + + def __init__(self, model_class_or_model_fn, run_config, params=None, warm_start_setting=None): + """ + model_class_or_model_fn(callable|propeller.train.Model): `model_class_or_model_fn` be specified in 2 ways: + 1. subclass of propeller.train.Model which implements: + 1. \_\_init\_\_ (hyper_param, mode, run_config) + 2. forward (features) => (prediction) + 3. backword (loss) => None + 4. loss (predictoin) => (loss) + 5. metrics (optional) (prediction) => (dict of propeller.Metrics) + + 2. a model_fn takes following args: + 1. features + 2. param + 3. mode + 4. run_config(optional) + and returns a `propeller.ModelSpec` + + params: any python object, will pass to your `model_fn` or `propeller.train.Model` + run_config (propeller.RunConfig): run_config.max_steps should not be None. + warm_start_setting (propeller.WarmStartSetting): Optional. warm start variable will overwrite model variable. + """ + if run_config.model_dir is None: + raise ValueError('model_dir should specified in run_config') + + if inspect.isfunction(model_class_or_model_fn): + _model_fn = model_class_or_model_fn + elif issubclass(model_class_or_model_fn, Model): + _model_fn = _build_model_fn(model_class_or_model_fn) + else: + raise ValueError('unknown model %s' % model_class_or_model_fn) + + self.model_fn = _model_fn + self.params = params + self.run_config = run_config + self.warm_start_setting = warm_start_setting + + def _build_for_train(self, train_dataset): + train_dataset.name = 'train' + train_program = F.Program() + startup_prog = F.Program() + with F.program_guard(train_program, startup_prog): + with collection.Collections() as collections: + log.info('Building Train Graph...') + fea = train_dataset.features() + model_spec = _build_net(self.model_fn, fea, RunMode.TRAIN, self.params, self.run_config) + log.info('Building Train Graph: Done') + + scalars = collections.get(collection.Key.SUMMARY_SCALAR) + histograms = collections.get(collection.Key.SUMMARY_HISTOGRAM) + skip_optimize_ops = collections.get(collection.Key.SKIP_OPTIMIZE) + skip_opt = set() + if skip_optimize_ops is not None: + skip_opt |= set(skip_optimize_ops) + if scalars is not None: + skip_opt |= {t for _, t in scalars} + if histograms is not None: + skip_opt |= {t for _, t in histograms} + skip_opt = list(skip_opt) + log.info('Train with: \n> Run_config: %s\n> Params: %s\n> Train_model_spec: %s\n' % (repr( + self.run_config), repr(self.params), repr(model_spec))) + + summary_record = SummaryRecord( + scalar=collections.get(collection.Key.SUMMARY_SCALAR), + histogram=collections.get(collection.Key.SUMMARY_HISTOGRAM), + ) + return ProgramPair(train_program=train_program, startup_program=startup_prog), model_spec, summary_record + + def _build_for_eval(self, ds): + ds.name = 'eval' + program = F.Program() + startup_prog = F.Program() + with F.program_guard(program, startup_prog): + #share var with Train net + log.info('Building Eval Graph') + fea = ds.features() + model_spec = _build_net(self.model_fn, fea, RunMode.EVAL, self.params, self.run_config) + log.info('Done') + #program = program.clone(for_test=True) + log.info('Eval with: \n> Run_config: %s\n> Params: %s\n> Train_model_spec: %s\n' % (repr( + self.run_config), repr(self.params), repr(model_spec))) + return ProgramPair(train_program=program, startup_program=startup_prog), model_spec + + def _build_for_predict(self, ds): + ds.name = 'predict' + program = F.Program() + startup_prog = F.Program() + with F.program_guard(program, startup_prog): + #share var with Train net + log.info('Building Predict Graph') + fea = ds.features() + model_spec = _build_net(self.model_fn, fea, RunMode.PREDICT, self.params, self.run_config) + log.info('Done') + + #program = program.clone(for_test=True) + + log.info('Predict with: \n> Run_config: %s\n> Params: %s\n> Train_model_spec: %s\n' % (repr( + self.run_config), repr(self.params), repr(model_spec))) + return ProgramPair(train_program=program, startup_program=startup_prog), model_spec + + def train(self, train_ds, train_hooks=[]): + """train on a `Dataset`""" + if not isinstance(train_ds, Dataset): + raise ValueError('expect dataset to be instance of Dataset, got %s' % repr(train_ds)) + + train_program, model_spec, summary_record = self._build_for_train(train_ds) + train_run_hooks = [ + hooks.StopAtStepHook(self.run_config.max_steps, self.run_config.run_steps), + hooks.LoggingHook( + model_spec.loss, + summary_record=summary_record, + summary_writer=_get_summary_writer(os.path.join(self.run_config.model_dir, 'train_history')), + per_step=self.run_config.log_steps, + skip_step=self.run_config.skip_steps), + ] + if model_spec.train_hooks is not None: + train_run_hooks.extend(model_spec.train_hooks) + train_run_hooks.extend(train_hooks) + + train_executor = F.Executor(_get_one_place()) + + mon_exe = MonitoredExecutor( + train_executor, + train_program, + loss=model_spec.loss, + run_config=self.run_config, + run_hooks=train_run_hooks, + warm_start_setting=self.warm_start_setting) + + distribution.init_distribuition_env(train_program) #only initialize distribute training with + mon_exe.init_or_restore_variables() + if distribution.status.is_master: + mon_exe._hooks.append( + hooks.CheckpointSaverHook(mon_exe._saver, per_step=mon_exe._save_steps, skip_step=mon_exe._skip_steps)) + + try: + with mon_exe: + for data in train_ds.start(): + mon_exe.run(feed=data) + except (StopException, F.core.EOFException) as e: + pass + + return mon_exe.result + + def evaluate(self, eval_dataset, eval_hooks=[]): + """eval on a `Dataset`""" + if not isinstance(eval_dataset, Dataset): + raise ValueError('expect dataset to be instance of Dataset, got %s' % repr(eval_dataset)) + program, model_spec = self._build_for_eval(eval_dataset) + single_card_place = _get_one_place() + eval_executor = F.Executor(single_card_place) + + eval_run_hooks = [ + hooks.StopAtStepHook(self.run_config.eval_max_steps, self.run_config.eval_max_steps), + hooks.EvalHook(model_spec.metrics, ) + ] + + if model_spec.eval_hooks is not None: + eval_run_hooks.extend(model_spec.eval_hooks) + eval_run_hooks.extend(eval_hooks) + + mon_exe = MonitoredExecutor(eval_executor, program, run_config=self.run_config, run_hooks=eval_run_hooks) + mon_exe.init_or_restore_variables() + + try: + with mon_exe: + for data in eval_dataset.start(places=[single_card_place]): + mon_exe.run(feed=data) + except (StopException, F.core.EOFException) as e: + pass + + _, eval_result = mon_exe.result + + summary_writer = _get_summary_writer(os.path.join(self.run_config.model_dir, 'eval_history')) + _log_eval_result('eval', eval_result, summary_writer, mon_exe.state) + + return mon_exe.result + + def predict(self, predict_dataset, ckpt=-1, ckpt_path=None, steps=-1, split_batch=True): + """ + Perform predictoin + will call `model_fn` and initiate user-specifed model in `propeller.RunMode.PREDICT` mode + + Args: + infer_dataset (propeller.data.Dataset): should not `shuffle` or `repeat` + steps (int): steps to predict, if None is specifed, + will stop when `StopException` is raised in `infer_dataset` + ckpt_path (None|str): Path of a specific checkpoint to predict. + If None, the latest checkpoint in model_dir is used. + If there are no checkpoints in model_dir, + prediction is run with newly initialized Variables instead of ones restored from checkpoint. + ckpt (int): deprecated args + split_batch (bool): if True, prediction of each example in a batch is returned. + + Yields: + Evaluated values of predictions tensors. + + """ + if not isinstance(predict_dataset, Dataset): + raise ValueError('expect dataset to be instance of Dataset, got %s' % repr(predict_dataset)) + + program, model_spec = self._build_for_predict(predict_dataset) + single_card_place = _get_one_place() + executor = F.Executor(single_card_place) + pred_run_config = RunConfig(run_steps=steps if steps == -1 else None, model_dir=self.run_config.model_dir) + mon_exe = MonitoredExecutor( + executor, + program, + run_config=pred_run_config, + warm_start_setting=self.warm_start_setting, + ) + mon_exe.init_or_restore_variables(ckpt if ckpt_path is None else ckpt_path) + try: + with mon_exe: + log.info('Runining predict from dir: %s' % repr(mon_exe.state)) + single_card_place = _get_one_place() + for data in predict_dataset.start(places=[single_card_place]): + res = mon_exe.run(fetch_list=model_spec.predictions, feed=data) + if split_batch: + res = map(lambda i: i.tolist(), res) + res = zip(*res) # transpose + for r in res: + yield r + else: + yield list(map(lambda i: i.tolist(), res)) + except (StopException, F.core.EOFException) as e: + pass + + +def train_and_eval(_placeholder=None, + model_class_or_model_fn=None, + params=None, + run_config=None, + train_dataset=None, + eval_dataset=None, + warm_start_setting=None, + train_hooks=[], + eval_hooks=[], + exporters=[]): + """ + Perform train and evaluate procesure. + will call `model_fn` and initiate user-specifed model in `propeller.RunMode.PREDICT` mode + + Args: + model_class_or_model_fn(callable|propeller.train.Model): `model_class_or_model_fn` be specified in 2 ways: + 1. subclass of propeller.train.Model + 2. a model_fn takes following args: 1. features; 2. param; 3. mode; 4. run_config(optional) + and returns a `propeller.ModelSpec` + + params: any python object, will pass to your `model_fn` or `propeller.train.Model` + run_config (propeller.RunConfig): run_config.max_steps should not be None. + train_dataset (propeller.paddle.data.Dataset): training will stop if global_step > run_config.max_steps. + eval_dataset (propeller.paddle.data.Dataset|dict): Optional, if Dict of propeller.data.Dataset were specified, + will perform evluatation on every evaluation sets and report results. + warm_start_setting (propeller.WarmStartSetting): Optional. warm start variable will overwrite model variable. + train_hooks (list of propeller.paddle.train.RunHook): Optional. + eval_hooks (list of propeller.paddle.train.RunHook): Optional. + exporters (list of propeller.paddle.train.Exporter): Optional. + """ + if _placeholder is not None: + raise ValueError('specify keyword args to this function') + if model_class_or_model_fn is None or params is None or run_config is None or train_dataset is None: + raise ValueError('some argument is None: model_class_or_model_fn:%s params:%s run_config:%s train_dataset:%s' % + (model_class_or_model_fn, params, run_config, train_dataset)) + + #init distribution env if envvir PROPELLER_DISCONFIG is set + if train_dataset is None: + raise ValueError('train dataset not specified') + + if eval_dataset is None: + raise ValueError('eval dataset not specifed') + + if not isinstance(eval_dataset, (dict, Dataset)): + raise ValueError('Eval dataset should be propeller.Dataset of a list of that, got: %s' % eval_dataset) + if isinstance(eval_dataset, Dataset): + eval_dataset = {'eval': eval_dataset} + ds_list = list(eval_dataset.values()) + for ds in ds_list: + ds.name = 'eval' + first = ds_list[0] + for d in ds_list[1:]: + if not first.__eq__(d): + raise ValueError('eval dataset has different output_shapes or types: %s' % repr(ds_list)) + + est = Learner(model_class_or_model_fn, run_config, params, warm_start_setting=warm_start_setting) + + class _EvalHookOnTrainLoop(hooks.RunHook): + def __init__(self): + self.program, self.model_spec = est._build_for_eval(list( + eval_dataset.values())[0]) #eval_datasets must have same output shapes + self.summary_writers = { + ds_name: _get_summary_writer(os.path.join(os.path.join(run_config.model_dir, 'eval_history'), ds_name)) + for ds_name in eval_dataset + } + + def after_run(self, _, state): + """doc""" + if state.step > run_config.skip_steps and state.gstep % run_config.eval_steps == 0: + eval_results = {} + for name, ds in six.iteritems(eval_dataset): + ehooks = [ + hooks.StopAtStepHook(est.run_config.eval_max_steps, est.run_config.eval_max_steps), + hooks.EvalHook( + self.model_spec.metrics, + summary_writer=self.summary_writers[name], + ) + ] + single_card_place = _get_one_place() + eval_executor = F.Executor(single_card_place) + mon_exe = MonitoredExecutor( + eval_executor, self.program, run_config=est.run_config, run_hooks=ehooks + eval_hooks) + try: + with mon_exe: + for data in ds.start(places=[single_card_place]): + mon_exe.run(feed=data) + except (StopException, F.core.EOFException) as e: + pass + hook_results = mon_exe.result + eval_res = hook_results[1] # hook_results: [StopAtStepHook, EvalHook, ...] + eval_results[name] = eval_res + _log_eval_result(name, eval_res, self.summary_writers[name], state) + for exporter in exporters: + exporter.export(eval_executor, self.program, self.model_spec, eval_results, state) + else: + eval_results = {} + return eval_results + + if distribution.status.is_master: + train_hooks.append(_EvalHookOnTrainLoop()) + res = est.train(train_dataset, train_hooks=train_hooks) + return res + + +def _build_model_fn(model_class): + def _model_fn(features, mode, params, run_config): + if mode != RunMode.PREDICT: + fea, label = features[:-1], features[-1] + else: + fea = features + + model = model_class(params, mode, run_config=run_config) + pred = model.forward(fea) + if isinstance(pred, F.framework.Variable): + prediction = [pred] + else: + prediction = pred + if mode == RunMode.TRAIN: + loss = model.loss(pred, label) + model.backward(loss) + return ModelSpec(loss=loss, predictions=prediction, mode=mode) + elif mode == RunMode.EVAL: + loss = model.loss(pred, label) + me = model.metrics(pred, label) + + inf_spec = InferenceSpec(inputs=fea, outputs=prediction) + if 'loss' not in me: + me['loss'] = metrics.Mean(loss) + return ModelSpec(loss=loss, predictions=prediction, metrics=me, mode=mode, inference_spec=inf_spec) + elif mode == RunMode.PREDICT: + inf_spec = InferenceSpec(inputs=fea, outputs=prediction) + return ModelSpec(predictions=prediction, mode=mode, inference_spec=inf_spec) + else: + raise RuntimeError('unknown run mode %s' % mode) + + return _model_fn diff --git a/modules/text/text_generation/ernie_gen/propeller/service/__init__.py b/modules/text/text_generation/ernie_gen/propeller/service/__init__.py new file mode 100644 index 00000000..d9051653 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/service/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""server""" diff --git a/modules/text/text_generation/ernie_gen/propeller/service/client.py b/modules/text/text_generation/ernie_gen/propeller/service/client.py new file mode 100644 index 00000000..827541fa --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/service/client.py @@ -0,0 +1,101 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +import asyncio +import threading +import math + +import zmq +import zmq.asyncio +import numpy as np + +from ernie_gen.propeller import log +import ernie_gen.propeller.service.utils as serv_utils + + +class InferenceBaseClient(object): + def __init__(self, address): + self.context = zmq.Context() + self.address = address + self.socket = self.context.socket(zmq.REQ) + self.socket.connect(address) + log.info("Connecting to server... %s" % address) + + def __call__(self, *args): + for arg in args: + if not isinstance(arg, np.ndarray): + raise ValueError('expect ndarray slot data, got %s' % repr(arg)) + request = serv_utils.nparray_list_serialize(args) + + self.socket.send(request) + reply = self.socket.recv() + ret = serv_utils.nparray_list_deserialize(reply) + return ret + + +class InferenceClient(InferenceBaseClient): + def __init__(self, address, batch_size=128, num_coroutine=10, timeout=10.): + self.loop = asyncio.new_event_loop() + asyncio.set_event_loop(self.loop) + context = zmq.asyncio.Context() + self.socket_pool = [context.socket(zmq.REQ) for _ in range(num_coroutine)] + log.info("Connecting to server... %s" % address) + for socket in self.socket_pool: + socket.connect(address) + self.num_coroutine = num_coroutine + self.batch_size = batch_size + self.timeout = int(timeout * 1000) + + #yapf: disable + def __call__(self, *args): + for arg in args: + if not isinstance(arg, np.ndarray): + raise ValueError('expect ndarray slot data, got %s' % + repr(arg)) + + num_tasks = math.ceil(1. * args[0].shape[0] / self.batch_size) + rets = [None] * num_tasks + + async def get(coroutine_idx=0, num_coroutine=1): + socket = self.socket_pool[coroutine_idx] + while coroutine_idx < num_tasks: + begin = coroutine_idx * self.batch_size + end = (coroutine_idx + 1) * self.batch_size + + arr_list = [arg[begin:end] for arg in args] + request = serv_utils.nparray_list_serialize(arr_list) + try: + await socket.send(request) + await socket.poll(self.timeout, zmq.POLLIN) + reply = await socket.recv(zmq.NOBLOCK) + ret = serv_utils.nparray_list_deserialize(reply) + except Exception as e: + log.exception(e) + ret = None + rets[coroutine_idx] = ret + coroutine_idx += num_coroutine + + futures = [ + get(i, self.num_coroutine) for i in range(self.num_coroutine) + ] + self.loop.run_until_complete(asyncio.wait(futures)) + for r in rets: + if r is None: + raise RuntimeError('Client call failed') + return [np.concatenate(col, 0) for col in zip(*rets)] + #yapf: enable diff --git a/modules/text/text_generation/ernie_gen/propeller/service/interface.proto b/modules/text/text_generation/ernie_gen/propeller/service/interface.proto new file mode 100644 index 00000000..e94894c2 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/service/interface.proto @@ -0,0 +1,46 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; +package interface; + +service Inference { + rpc Infer(Slots) returns (Slots){} +} + +message Slots { + repeated Slot slots = 1; +} + +message Slot { + enum Type { + // Pod Types + BOOL = 0; + INT16 = 1; + INT32 = 2; + INT64 = 3; + FP16 = 4; + FP32 = 5; + FP64 = 6; + // Tensor is used in C++. + SIZE_T = 19; + UINT8 = 20; + INT8 = 21; + } + + Type type = 1; + repeated int64 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480] + bytes data = 3; +} + diff --git a/modules/text/text_generation/ernie_gen/propeller/service/interface_pb2.py b/modules/text/text_generation/ernie_gen/propeller/service/interface_pb2.py new file mode 100644 index 00000000..4509705b --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/service/interface_pb2.py @@ -0,0 +1,208 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: interface.proto + +import sys +_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode('latin1')) +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + +DESCRIPTOR = _descriptor.FileDescriptor( + name='interface.proto', + package='interface', + syntax='proto3', + serialized_options=None, + serialized_pb=_b( + '\n\x0finterface.proto\x12\tinterface\"\'\n\x05Slots\x12\x1e\n\x05slots\x18\x01 \x03(\x0b\x32\x0f.interface.Slot\"\xb8\x01\n\x04Slot\x12\"\n\x04type\x18\x01 \x01(\x0e\x32\x14.interface.Slot.Type\x12\x0c\n\x04\x64ims\x18\x02 \x03(\x03\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\"p\n\x04Type\x12\x08\n\x04\x42OOL\x10\x00\x12\t\n\x05INT16\x10\x01\x12\t\n\x05INT32\x10\x02\x12\t\n\x05INT64\x10\x03\x12\x08\n\x04\x46P16\x10\x04\x12\x08\n\x04\x46P32\x10\x05\x12\x08\n\x04\x46P64\x10\x06\x12\n\n\x06SIZE_T\x10\x13\x12\t\n\x05UINT8\x10\x14\x12\x08\n\x04INT8\x10\x15\x32:\n\tInference\x12-\n\x05Infer\x12\x10.interface.Slots\x1a\x10.interface.Slots\"\x00\x62\x06proto3' + )) + +_SLOT_TYPE = _descriptor.EnumDescriptor( + name='Type', + full_name='interface.Slot.Type', + filename=None, + file=DESCRIPTOR, + values=[ + _descriptor.EnumValueDescriptor(name='BOOL', index=0, number=0, serialized_options=None, type=None), + _descriptor.EnumValueDescriptor(name='INT16', index=1, number=1, serialized_options=None, type=None), + _descriptor.EnumValueDescriptor(name='INT32', index=2, number=2, serialized_options=None, type=None), + _descriptor.EnumValueDescriptor(name='INT64', index=3, number=3, serialized_options=None, type=None), + _descriptor.EnumValueDescriptor(name='FP16', index=4, number=4, serialized_options=None, type=None), + _descriptor.EnumValueDescriptor(name='FP32', index=5, number=5, serialized_options=None, type=None), + _descriptor.EnumValueDescriptor(name='FP64', index=6, number=6, serialized_options=None, type=None), + _descriptor.EnumValueDescriptor(name='SIZE_T', index=7, number=19, serialized_options=None, type=None), + _descriptor.EnumValueDescriptor(name='UINT8', index=8, number=20, serialized_options=None, type=None), + _descriptor.EnumValueDescriptor(name='INT8', index=9, number=21, serialized_options=None, type=None), + ], + containing_type=None, + serialized_options=None, + serialized_start=144, + serialized_end=256, +) +_sym_db.RegisterEnumDescriptor(_SLOT_TYPE) + +_SLOTS = _descriptor.Descriptor( + name='Slots', + full_name='interface.Slots', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='slots', + full_name='interface.Slots.slots', + index=0, + number=1, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[], + serialized_start=30, + serialized_end=69, +) + +_SLOT = _descriptor.Descriptor( + name='Slot', + full_name='interface.Slot', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='type', + full_name='interface.Slot.type', + index=0, + number=1, + type=14, + cpp_type=8, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='dims', + full_name='interface.Slot.dims', + index=1, + number=2, + type=3, + cpp_type=2, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='data', + full_name='interface.Slot.data', + index=2, + number=3, + type=12, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b(""), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR), + ], + extensions=[], + nested_types=[], + enum_types=[ + _SLOT_TYPE, + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[], + serialized_start=72, + serialized_end=256, +) + +_SLOTS.fields_by_name['slots'].message_type = _SLOT +_SLOT.fields_by_name['type'].enum_type = _SLOT_TYPE +_SLOT_TYPE.containing_type = _SLOT +DESCRIPTOR.message_types_by_name['Slots'] = _SLOTS +DESCRIPTOR.message_types_by_name['Slot'] = _SLOT +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +Slots = _reflection.GeneratedProtocolMessageType( + 'Slots', + (_message.Message, ), + { + 'DESCRIPTOR': _SLOTS, + '__module__': 'interface_pb2' + # @@protoc_insertion_point(class_scope:interface.Slots) + }) +_sym_db.RegisterMessage(Slots) + +Slot = _reflection.GeneratedProtocolMessageType( + 'Slot', + (_message.Message, ), + { + 'DESCRIPTOR': _SLOT, + '__module__': 'interface_pb2' + # @@protoc_insertion_point(class_scope:interface.Slot) + }) +_sym_db.RegisterMessage(Slot) + +_INFERENCE = _descriptor.ServiceDescriptor( + name='Inference', + full_name='interface.Inference', + file=DESCRIPTOR, + index=0, + serialized_options=None, + serialized_start=258, + serialized_end=316, + methods=[ + _descriptor.MethodDescriptor( + name='Infer', + full_name='interface.Inference.Infer', + index=0, + containing_service=None, + input_type=_SLOTS, + output_type=_SLOTS, + serialized_options=None, + ), + ]) +_sym_db.RegisterServiceDescriptor(_INFERENCE) + +DESCRIPTOR.services_by_name['Inference'] = _INFERENCE + +# @@protoc_insertion_point(module_scope) diff --git a/modules/text/text_generation/ernie_gen/propeller/service/server.py b/modules/text/text_generation/ernie_gen/propeller/service/server.py new file mode 100644 index 00000000..161cd02a --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/service/server.py @@ -0,0 +1,182 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Never Never Never import paddle.fluid in main process, or any module would import fluid. +""" + +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +import os +import logging +import six +from time import sleep, time +import multiprocessing + +import zmq + +log = logging.getLogger(__name__) + + +def _profile(msg): + def _decfn(fn): + def _retfn(*args, **kwargs): + start = time() + ret = fn(*args, **kwargs) + end = time() + log.debug('%s timecost: %.5f' % (msg, end - start)) + return ret + + return _retfn + + return _decfn + + +class Predictor(object): + """paddle predictor wrapper""" + + def __init__(self, model_dir, device_idx=0): + import paddle.fluid as F + log.debug('create predictor on card %d' % device_idx) + config = F.core.AnalysisConfig(model_dir) + config.enable_use_gpu(5000, device_idx) + self._predictor = F.core.create_paddle_predictor(config) + + @_profile('paddle') + def __call__(self, args): + for i, a in enumerate(args): + a.name = 'placeholder_%d' % i + res = self._predictor.run(args) + return res + + +def run_worker(model_dir, device_idx, endpoint="ipc://worker.ipc"): + """worker process entrence""" + try: + log.debug("run_worker %s" % device_idx) + os.environ["CUDA_VISIBLE_DEVICES"] = os.getenv("CUDA_VISIBLE_DEVICES").split(",")[device_idx] + log.debug('cuda_env %s' % os.environ["CUDA_VISIBLE_DEVICES"]) + import paddle.fluid as F + from ernie_gen.propeller.service import interface_pb2 + import ernie_gen.propeller.service.utils as serv_utils + context = zmq.Context() + socket = context.socket(zmq.REP) + socket.connect(endpoint) + #socket.bind(endpoint) + log.debug("Predictor building %s" % device_idx) + predictor = Predictor(model_dir, 0) + log.debug("Predictor %s" % device_idx) + except Exception as e: + log.exception(e) + + while True: + # Wait for next request from client + try: + message = socket.recv() + log.debug("get message %s" % device_idx) + slots = interface_pb2.Slots() + slots.ParseFromString(message) + pts = [serv_utils.slot_to_paddlearray(s) for s in slots.slots] + ret = predictor(pts) + slots = interface_pb2.Slots(slots=[serv_utils.paddlearray_to_slot(r) for r in ret]) + socket.send(slots.SerializeToString()) + except Exception as e: + log.exception(e) + socket.send(e.message) + + +class InferencePredictor(object): + """control Predictor for multi gpu card""" + + def __init__(self, backend_addr, model_dir, n_devices=1): + self.backend_addr = backend_addr + self.model_dir = model_dir + self.n_devices = n_devices + self.children = [] + + def start(self): + """doc""" + for device_idx in range(self.n_devices): + p = multiprocessing.Process(target=run_worker, args=(self.model_dir, device_idx, self.backend_addr)) + p.start() + self.children.append(p) + return self + + def join(self): + """doc""" + for p in self.children: + p.join() + + def term(self): + """doc""" + for p in self.children: + log.debug("terminating children %s" % repr(p)) + p.terminate() + + +class InferenceProxy(object): + """zmq proxy""" + + def __init__(self): + """doc""" + self.backend = None + self.frontend = None + + def listen(self, frontend_addr, backend_addr): + """doc""" + log.info("InferenceProxy starting...") + try: + context = zmq.Context(1) + # Socket facing clients + self.frontend = context.socket(zmq.ROUTER) + self.frontend.bind(frontend_addr) + # Socket facing services + self.backend = context.socket(zmq.DEALER) + self.backend.bind(backend_addr) + log.info("Queue init done") + zmq.device(zmq.QUEUE, self.frontend, self.backend) + except Exception as e: + log.exception(e) + log.info("Bringing down zmq device") + finally: + log.debug('terminating proxy') + if self.frontend is not None: + self.frontend.close() + if self.backend is not None: + self.backend.close() + context.term() + + +class InferenceServer(object): + """start InferencePredictor and InferenceProxy""" + + def __init__(self, model_dir, n_devices): + """doc""" + self.model_dir = model_dir + self.n_devices = n_devices + + def listen(self, port): + """doc""" + frontend_addr = "tcp://*:%s" % port + backend_addr = "ipc://backend.ipc" + predictor = InferencePredictor(backend_addr, self.model_dir, self.n_devices).start() + try: + proxy = InferenceProxy() + proxy.listen(frontend_addr, backend_addr) + predictor.join() + except KeyboardInterrupt: + log.debug('terminating server') + predictor.term() diff --git a/modules/text/text_generation/ernie_gen/propeller/service/utils.py b/modules/text/text_generation/ernie_gen/propeller/service/utils.py new file mode 100644 index 00000000..25d06249 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/service/utils.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""utils for server""" + +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +import numpy as np +import struct + +from ernie_gen.propeller.service import interface_pb2 + + +def slot_to_numpy(slot): + """doc""" + if slot.type == interface_pb2.Slot.FP32: + dtype = np.float32 + type_str = 'f' + elif slot.type == interface_pb2.Slot.INT32: + type_str = 'i' + dtype = np.int32 + elif slot.type == interface_pb2.Slot.INT64: + dtype = np.int64 + type_str = 'q' + else: + raise RuntimeError('know type %s' % slot.type) + num = len(slot.data) // struct.calcsize(type_str) + arr = struct.unpack('%d%s' % (num, type_str), slot.data) + shape = slot.dims + ret = np.array(arr, dtype=dtype).reshape(shape) + return ret + + +def numpy_to_slot(arr): + """doc""" + if arr.dtype == np.float32: + dtype = interface_pb2.Slot.FP32 + elif arr.dtype == np.int32: + dtype = interface_pb2.Slot.INT32 + elif arr.dtype == np.int64: + dtype = interface_pb2.Slot.INT64 + else: + raise RuntimeError('know type %s' % arr.dtype) + pb = interface_pb2.Slot(type=dtype, dims=list(arr.shape), data=arr.tobytes()) + return pb + + +def slot_to_paddlearray(slot): + """doc""" + import paddle.fluid.core as core + if slot.type == interface_pb2.Slot.FP32: + dtype = np.float32 + type_str = 'f' + elif slot.type == interface_pb2.Slot.INT32: + dtype = np.int32 + type_str = 'i' + elif slot.type == interface_pb2.Slot.INT64: + dtype = np.int64 + type_str = 'q' + else: + raise RuntimeError('know type %s' % slot.type) + num = len(slot.data) // struct.calcsize(type_str) + arr = struct.unpack('%d%s' % (num, type_str), slot.data) + ret = core.PaddleTensor(data=np.array(arr, dtype=dtype).reshape(slot.dims)) + return ret + + +def paddlearray_to_slot(arr): + """doc""" + import paddle.fluid.core as core + if arr.dtype == core.PaddleDType.FLOAT32: + dtype = interface_pb2.Slot.FP32 + type_str = 'f' + arr_data = arr.data.float_data() + elif arr.dtype == core.PaddleDType.INT32: + dtype = interface_pb2.Slot.INT32 + type_str = 'i' + arr_data = arr.data.int32_data() + elif arr.dtype == core.PaddleDType.INT64: + dtype = interface_pb2.Slot.INT64 + type_str = 'q' + arr_data = arr.data.int64_data() + else: + raise RuntimeError('know type %s' % arr.dtype) + data = struct.pack('%d%s' % (len(arr_data), type_str), *arr_data) + pb = interface_pb2.Slot(type=dtype, dims=list(arr.shape), data=data) + return pb + + +def nparray_list_serialize(arr_list): + """doc""" + slot_list = [numpy_to_slot(arr) for arr in arr_list] + slots = interface_pb2.Slots(slots=slot_list) + return slots.SerializeToString() + + +def nparray_list_deserialize(string): + """doc""" + slots = interface_pb2.Slots() + slots.ParseFromString(string) + return [slot_to_numpy(slot) for slot in slots.slots] diff --git a/modules/text/text_generation/ernie_gen/propeller/tools/__init__.py b/modules/text/text_generation/ernie_gen/propeller/tools/__init__.py new file mode 100644 index 00000000..d0c32e26 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/tools/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/modules/text/text_generation/ernie_gen/propeller/tools/ckpt_inspector.py b/modules/text/text_generation/ernie_gen/propeller/tools/ckpt_inspector.py new file mode 100644 index 00000000..9ba8a9e6 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/tools/ckpt_inspector.py @@ -0,0 +1,116 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys +import os +import struct +import logging +import argparse +import numpy as np +import collections +from distutils import dir_util +import pickle + +import paddle.fluid as F +from paddle.fluid.proto import framework_pb2 + +log = logging.getLogger(__name__) +formatter = logging.Formatter(fmt='[%(levelname)s] %(asctime)s [%(filename)12s:%(lineno)5d]:\t%(message)s') +console = logging.StreamHandler() +console.setFormatter(formatter) +log.addHandler(console) +log.setLevel(logging.DEBUG) + + +def gen_arr(data, dtype): + num = len(data) // struct.calcsize(dtype) + arr = struct.unpack('%d%s' % (num, dtype), data) + return arr + + +def parse(filename): + with open(filename, 'rb') as f: + read = lambda fmt: struct.unpack(fmt, f.read(struct.calcsize(fmt))) + _, = read('I') # version + lodsize, = read('Q') + if lodsize != 0: + log.warning('shit, it is LOD tensor!!! skipped!!') + return None + _, = read('I') # version + pbsize, = read('i') + data = f.read(pbsize) + proto = framework_pb2.VarType.TensorDesc() + proto.ParseFromString(data) + log.info('type: [%s] dim %s' % (proto.data_type, proto.dims)) + if proto.data_type == framework_pb2.VarType.FP32: + arr = np.array(gen_arr(f.read(), 'f'), dtype=np.float32).reshape(proto.dims) + elif proto.data_type == framework_pb2.VarType.INT64: + arr = np.array(gen_arr(f.read(), 'q'), dtype=np.int64).reshape(proto.dims) + elif proto.data_type == framework_pb2.VarType.INT32: + arr = np.array(gen_arr(f.read(), 'i'), dtype=np.int32).reshape(proto.dims) + elif proto.data_type == framework_pb2.VarType.INT8: + arr = np.array(gen_arr(f.read(), 'B'), dtype=np.int8).reshape(proto.dims) + elif proto.data_type == framework_pb2.VarType.FP16: + arr = np.array(gen_arr(f.read(), 'H'), dtype=np.uint16).view(np.float16).reshape(proto.dims) + else: + raise RuntimeError('Unknown dtype %s' % proto.data_type) + + return arr + + +def show(arr): + print(repr(arr)) + + +def dump(arr, path): + path = os.path.join(args.to, path) + log.info('dump to %s' % path) + try: + os.makedirs(os.path.dirname(path)) + except FileExistsError: + pass + pickle.dump(arr, open(path, 'wb'), protocol=4) + + +def list_dir(dir_or_file): + if os.path.isfile(dir_or_file): + return [dir_or_file] + else: + return [os.path.join(i, kk) for i, _, k in os.walk(dir_or_file) for kk in k] + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('mode', choices=['show', 'dump'], type=str) + parser.add_argument('file_or_dir', type=str) + parser.add_argument('-t', "--to", type=str, default=None) + parser.add_argument('-v', "--verbose", action='store_true') + args = parser.parse_args() + + files = list_dir(args.file_or_dir) + parsed_arr = map(parse, files) + if args.mode == 'show': + for arr in parsed_arr: + if arr is not None: + show(arr) + elif args.mode == 'dump': + if args.to is None: + raise ValueError('--to dir_name not specified') + for arr, path in zip(parsed_arr, files): + if arr is not None: + dump(arr, path.replace(args.file_or_dir, '')) diff --git a/modules/text/text_generation/ernie_gen/propeller/tools/start_server.py b/modules/text/text_generation/ernie_gen/propeller/tools/start_server.py new file mode 100644 index 00000000..58e45af6 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/tools/start_server.py @@ -0,0 +1,39 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +import sys +import os +import argparse +import logging +import logging.handlers +from ernie_gen.propeller.service.server import InferenceServer +from ernie_gen.propeller import log + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('-m', '--model_dir', type=str, required=True) + parser.add_argument('-p', '--port', type=int, required=True) + parser.add_argument('-v', '--verbose', action='store_true') + args = parser.parse_args() + + if args.verbose: + log.setLevel(logging.DEBUG) + n_devices = len(os.getenv("CUDA_VISIBLE_DEVICES").split(",")) + server = InferenceServer(args.model_dir, n_devices) + log.info('propeller server listent on port %d' % args.port) + server.listen(args.port) diff --git a/modules/text/text_generation/ernie_gen/propeller/train/__init__.py b/modules/text/text_generation/ernie_gen/propeller/train/__init__.py new file mode 100644 index 00000000..31701fc0 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/train/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +doc +""" diff --git a/modules/text/text_generation/ernie_gen/propeller/train/model.py b/modules/text/text_generation/ernie_gen/propeller/train/model.py new file mode 100644 index 00000000..a920cae6 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/train/model.py @@ -0,0 +1,88 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Model template +""" + +from __future__ import print_function +from __future__ import absolute_import +from __future__ import unicode_literals + +import sys +import six +import logging +import os +import itertools +import json +import abc +import numpy as np + + +@six.add_metaclass(abc.ABCMeta) +class Model(object): + """ + template + """ + + def __init__(self, config, mode): + """ + Args: + config (dict): hyper param + mode (propeller.RunMode): will creat `TRAIN` and `EVAL` model in propeller.train_and_eval + """ + self.mode = mode + + @abc.abstractmethod + def forward(self, features): + """ + Args: + features (list of Tensor): inputs features that depends on your Dataset.output_shapes + Returns: + return (Tensor): prediction + """ + pass + + @abc.abstractmethod + def loss(self, predictions, label): + """ + Args: + predictions (Tensor): result of `self.forward` + label (Tensor): depends on your Dataset.output_shapes + Returns: + return (paddle scalar): loss + """ + pass + + @abc.abstractmethod + def backward(self, loss): + """ + Call in TRAIN mode + Args: + loss (Tensor): result of `self.loss` + Returns: + None + """ + pass + + @abc.abstractmethod + def metrics(self, predictions, label): + """ + Call in EVAL mode + Args: + predictions (Tensor): result of `self.forward` + label (Tensor): depends on your Dataset.output_shapes + Returns: + (dict): k-v map like: {"metrics_name": propeller.Metrics } + """ + return {} diff --git a/modules/text/text_generation/ernie_gen/propeller/types.py b/modules/text/text_generation/ernie_gen/propeller/types.py new file mode 100644 index 00000000..c30758ad --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/types.py @@ -0,0 +1,118 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Basic types""" + +from __future__ import print_function +from __future__ import absolute_import +from __future__ import unicode_literals + +import os +import json +from collections import namedtuple + + +class RunMode(object): + """model_fn will be called in 3 modes""" + TRAIN = 1 + PREDICT = 2 + EVAL = 3 + + +class HParams(object): + """Hyper paramerter""" + + def __init__(self, **kwargs): + for k, v in kwargs.items(): + self.__dict__[k] = v + + def __contains__(self, key): + return key in self.__dict__ + + def __getitem__(self, key): + if key not in self.__dict__: + raise ValueError('key(%s) not in HParams.' % key) + return self.__dict__[key] + + def __repr__(self): + return repr(self.to_dict()) + + def __setitem__(self, key, val): + self.__dict__[key] = val + + @classmethod + def from_json(cls, json_str): + """doc""" + d = json.loads(json_str) + if type(d) != dict: + raise ValueError('json object must be dict.') + return HParams.from_dict(d) + + def get(self, key, default=None): + """doc""" + return self.__dict__.get(key, default) + + @classmethod + def from_dict(cls, d): + """doc""" + if type(d) != dict: + raise ValueError('input must be dict.') + hp = HParams(**d) + return hp + + def to_json(self): + """doc""" + return json.dumps(self.__dict__) + + def to_dict(self): + """doc""" + return self.__dict__ + + def join(self, other): + """doc""" + if not isinstance(other, HParams): + raise ValueError('input must be HParams instance. got %s' % type(other)) + self.__dict__.update(**other.__dict__) + return self + + +SummaryRecord = namedtuple('SummaryRecord', ['scalar', 'histogram']) + +WarmStartSetting = namedtuple('WarmStartSetting', ['predicate_fn', 'from_dir']) +TextoneWarmStartSetting = namedtuple('TextoneWarmStartSetting', ['from_dir']) + +RunConfig = namedtuple('RunConfig', [ + 'model_dir', 'run_steps', 'max_steps', 'save_steps', 'eval_steps', 'eval_max_steps', 'skip_steps', 'log_steps', + 'max_ckpt', 'shit' +]) +RunConfig.__new__.__defaults__ = (None, ) * len(RunConfig._fields) + +ProgramPair = namedtuple('ProgramPair', ['train_program', 'startup_program']) + +InferenceSpec = namedtuple('InferenceSpec', ['inputs', 'outputs']) + +ModelSpec = namedtuple('ModelSpec', [ + 'loss', + 'predictions', + 'metrics', + 'mode', + 'inference_spec', + 'train_hooks', + 'eval_hooks', +]) +ModelSpec.__new__.__defaults__ = (None, ) * len(ModelSpec._fields) + + +class StopException(Exception): + """doc""" + pass diff --git a/modules/text/text_generation/ernie_gen/propeller/util.py b/modules/text/text_generation/ernie_gen/propeller/util.py new file mode 100644 index 00000000..53f7d789 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/propeller/util.py @@ -0,0 +1,126 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""global utils""" +from __future__ import print_function +from __future__ import absolute_import +from __future__ import unicode_literals + +import os +import six +import re +import json +import argparse +import itertools +import logging +from functools import reduce + +from ernie_gen.propeller.types import RunConfig +from ernie_gen.propeller.types import HParams + +log = logging.getLogger(__name__) + + +def ArgumentParser(name): + """predefined argparser""" + parser = argparse.ArgumentParser('propeller model') + parser.add_argument('--run_config', type=str, default='') + parser.add_argument('--hparam', type=str, nargs='*', action='append', default=[['']]) + return parser + + +def _get_dict_from_environ_or_json_or_file(args, env_name): + if args == '': + return None + if args is None: + s = os.environ.get(env_name) + else: + s = args + if os.path.exists(s): + s = open(s).read() + if isinstance(s, six.string_types): + try: + r = json.loads(s) + except ValueError: + try: + r = eval(s) + except SyntaxError as e: + raise ValueError('json parse error: %s \n>Got json: %s' % (repr(e), s)) + return r + else: + return s #None + + +def parse_file(filename): + """useless api""" + d = _get_dict_from_environ_or_json_or_file(filename, None) + if d is None: + raise ValueError('file(%s) not found' % filename) + return d + + +def parse_runconfig(args=None): + """get run_config from env or file""" + d = _get_dict_from_environ_or_json_or_file(args.run_config, 'PROPELLER_RUNCONFIG') + if d is None: + raise ValueError('run_config not found') + return RunConfig(**d) + + +def parse_hparam(args=None): + """get hparam from env or file""" + if args is not None: + hparam_strs = reduce(list.__add__, args.hparam) + else: + hparam_strs = [None] + + hparams = [_get_dict_from_environ_or_json_or_file(hp, 'PROPELLER_HPARAMS') for hp in hparam_strs] + hparams = [HParams(**h) for h in hparams if h is not None] + if len(hparams) == 0: + return HParams() + else: + hparam = reduce(lambda x, y: x.join(y), hparams) + return hparam + + +def flatten(s): + """doc""" + assert is_struture(s) + schema = [len(ss) for ss in s] + flt = list(itertools.chain(*s)) + return flt, schema + + +def unflatten(structure, schema): + """doc""" + start = 0 + res = [] + for _range in schema: + res.append(structure[start:start + _range]) + start += _range + return res + + +def is_struture(s): + """doc""" + return isinstance(s, list) or isinstance(s, tuple) + + +def map_structure(func, s): + """same sa tf.map_structure""" + if isinstance(s, list) or isinstance(s, tuple): + return [map_structure(func, ss) for ss in s] + elif isinstance(s, dict): + return {k: map_structure(func, v) for k, v in six.iteritems(s)} + else: + return func(s) diff --git a/modules/text/text_generation/ernie_gen/template/__init__.py b/modules/text/text_generation/ernie_gen/template/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modules/text/text_generation/ernie_gen/template/assets/ernie_config.json b/modules/text/text_generation/ernie_gen/template/assets/ernie_config.json new file mode 100644 index 00000000..1f8c5930 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/template/assets/ernie_config.json @@ -0,0 +1,12 @@ +{ + "attention_probs_dropout_prob": 0.1, + "hidden_act": "relu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "max_position_embeddings": 513, + "num_attention_heads": 12, + "num_hidden_layers": 12, + "type_vocab_size": 2, + "vocab_size": 18000 +} diff --git a/modules/text/text_generation/ernie_gen/template/assets/vocab.txt b/modules/text/text_generation/ernie_gen/template/assets/vocab.txt new file mode 100644 index 00000000..5db20b3b --- /dev/null +++ b/modules/text/text_generation/ernie_gen/template/assets/vocab.txt @@ -0,0 +1,17964 @@ +[PAD] +[CLS] +[SEP] +[MASK] +, +的 +、 +一 +人 +有 +是 +在 +中 +为 +和 +了 +不 +年 +学 +大 +国 +生 +以 +“ +” +作 +业 +个 +上 +用 +, +地 +会 +成 +发 +工 +时 +于 +理 +出 +行 +要 +. +等 +他 +到 +之 +这 +可 +后 +家 +对 +能 +公 +与 +》 +《 +主 +方 +分 +经 +来 +全 +其 +部 +多 +产 +自 +文 +高 +动 +进 +法 +化 +: +我 +面 +) +( +实 +教 +建 +体 +而 +长 +子 +下 +现 +开 +本 +力 +定 +性 +过 +设 +合 +小 +同 +机 +市 +品 +水 +新 +内 +事 +也 +种 +及 +制 +入 +所 +心 +务 +就 +管 +们 +得 +展 +重 +民 +加 +区 +物 +者 +通 +天 +政 +三 +电 +关 +度 +第 +名 +术 +最 +系 +月 +外 +资 +日 +代 +员 +如 +间 +位 +并 +书 +科 +村 +应 +量 +道 +前 +当 +无 +里 +相 +平 +从 +计 +提 +保 +任 +程 +技 +都 +研 +十 +基 +特 +好 +被 +或 +目 +将 +使 +山 +二 +说 +数 +点 +明 +情 +元 +着 +收 +组 +然 +美 +各 +由 +场 +金 +形 +农 +期 +因 +表 +此 +色 +起 +还 +立 +世 +安 +活 +专 +质 +1 +规 +社 +万 +信 +西 +统 +结 +路 +利 +次 +南 +式 +意 +级 +常 +师 +校 +你 +育 +果 +究 +司 +服 +门 +海 +导 +流 +项 +她 +总 +处 +两 +传 +东 +正 +省 +院 +户 +手 +具 +2 +原 +强 +北 +向 +先 +但 +米 +城 +企 +件 +风 +军 +身 +更 +知 +已 +气 +战 +至 +单 +口 +集 +创 +解 +四 +标 +交 +比 +商 +论 +界 +题 +变 +花 +3 +改 +类 +运 +指 +型 +调 +女 +神 +接 +造 +受 +广 +只 +委 +去 +共 +治 +达 +持 +条 +网 +头 +构 +县 +些 +该 +又 +那 +想 +样 +办 +济 +5 +格 +责 +车 +很 +施 +求 +己 +光 +精 +林 +完 +爱 +线 +参 +少 +积 +清 +看 +优 +报 +王 +直 +没 +每 +据 +游 +效 +感 +五 +影 +别 +获 +领 +称 +选 +供 +乐 +老 +么 +台 +问 +划 +带 +器 +源 +织 +放 +深 +备 +视 +白 +功 +取 +装 +营 +见 +记 +环 +队 +节 +准 +石 +它 +回 +历 +负 +真 +增 +医 +联 +做 +职 +容 +士 +包 +义 +观 +团 +病 +4 +府 +息 +则 +考 +料 +华 +州 +语 +证 +整 +让 +江 +史 +空 +验 +需 +支 +命 +给 +离 +认 +艺 +较 +土 +古 +养 +才 +境 +推 +把 +均 +图 +际 +斯 +近 +片 +局 +修 +字 +德 +权 +步 +始 +复 +转 +协 +即 +打 +画 +投 +决 +何 +约 +反 +quot +费 +议 +护 +极 +河 +房 +查 +布 +思 +干 +价 +儿 +非 +马 +党 +奖 +模 +故 +编 +音 +范 +识 +率 +存 +引 +客 +属 +评 +采 +尔 +配 +镇 +室 +再 +案 +监 +习 +注 +根 +克 +演 +食 +族 +示 +球 +状 +青 +号 +张 +百 +素 +首 +易 +热 +阳 +今 +园 +防 +版 +太 +乡 +英 +6 +材 +列 +便 +写 +住 +置 +层 +助 +确 +试 +难 +承 +象 +居 +10 +黄 +快 +断 +维 +却 +红 +速 +连 +众 +0 +细 +态 +话 +周 +言 +药 +培 +血 +亩 +龙 +越 +值 +几 +边 +读 +未 +曾 +测 +算 +京 +景 +余 +站 +低 +温 +消 +必 +切 +依 +随 +且 +志 +卫 +域 +照 +许 +限 +著 +销 +落 +足 +适 +争 +策 +8 +控 +武 +按 +7 +初 +角 +核 +死 +检 +富 +满 +显 +审 +除 +致 +亲 +占 +失 +星 +章 +善 +续 +千 +叶 +火 +副 +告 +段 +什 +声 +终 +况 +走 +木 +益 +戏 +独 +纪 +植 +财 +群 +六 +赛 +远 +拉 +亚 +密 +排 +超 +像 +课 +围 +往 +响 +击 +疗 +念 +八 +云 +险 +律 +请 +革 +诗 +批 +底 +压 +双 +男 +训 +例 +汉 +升 +拥 +势 +酒 +眼 +官 +牌 +油 +曲 +友 +望 +黑 +歌 +筑 +础 +香 +仅 +担 +括 +湖 +严 +秀 +剧 +九 +举 +执 +充 +兴 +督 +博 +草 +般 +李 +健 +喜 +授 +普 +预 +灵 +突 +良 +款 +罗 +9 +微 +七 +录 +朝 +飞 +宝 +令 +轻 +劳 +距 +异 +简 +兵 +树 +序 +候 +含 +福 +尽 +留 +20 +丰 +旅 +征 +临 +破 +移 +篇 +抗 +典 +端 +苏 +奇 +止 +康 +店 +毛 +觉 +春 +售 +络 +降 +板 +坚 +母 +讲 +早 +印 +略 +孩 +夫 +藏 +铁 +害 +互 +帝 +田 +融 +皮 +宗 +岁 +载 +析 +斗 +须 +伤 +12 +介 +另 +00 +半 +班 +馆 +味 +楼 +卡 +射 +述 +杀 +波 +绿 +免 +兰 +绝 +刻 +短 +察 +输 +择 +综 +杂 +份 +纳 +父 +词 +银 +送 +座 +左 +继 +固 +宣 +厂 +肉 +换 +补 +税 +派 +套 +欢 +播 +吸 +圆 +攻 +阿 +购 +听 +右 +减 +激 +巴 +背 +够 +遇 +智 +玉 +找 +宽 +陈 +练 +追 +毕 +彩 +软 +帮 +股 +荣 +托 +予 +佛 +堂 +障 +皇 +若 +守 +似 +届 +待 +货 +散 +额 +30 +尚 +穿 +丽 +骨 +享 +差 +针 +索 +稳 +宁 +贵 +酸 +液 +唐 +操 +探 +玩 +促 +笔 +库 +救 +虽 +久 +闻 +顶 +床 +港 +鱼 +亿 +登 +11 +永 +毒 +桥 +冷 +魔 +秘 +陆 +您 +童 +归 +侧 +沙 +染 +封 +紧 +松 +川 +刘 +15 +雄 +希 +毫 +卷 +某 +季 +菜 +庭 +附 +逐 +夜 +宫 +洲 +退 +顾 +尼 +胜 +剂 +纯 +舞 +遗 +苦 +梦 +挥 +航 +愿 +街 +招 +矿 +夏 +盖 +献 +怎 +茶 +申 +39 +吧 +脑 +亦 +吃 +频 +宋 +央 +威 +厚 +块 +冲 +叫 +熟 +礼 +厅 +否 +渐 +笑 +钱 +钟 +甚 +牛 +丝 +靠 +岛 +绍 +盘 +缘 +聚 +静 +雨 +氏 +圣 +顺 +唱 +刊 +阶 +困 +急 +饰 +弹 +庄 +既 +野 +阴 +混 +饮 +损 +齐 +末 +错 +轮 +宜 +鲜 +兼 +敌 +粉 +祖 +延 +100 +钢 +辑 +欧 +硬 +甲 +诉 +册 +痛 +订 +缺 +晚 +衣 +佳 +脉 +gt +盛 +乎 +拟 +贸 +扩 +船 +仪 +谁 +警 +50 +停 +席 +竞 +释 +庆 +汽 +仍 +掌 +诸 +仙 +弟 +吉 +洋 +奥 +票 +危 +架 +买 +径 +塔 +休 +付 +恶 +雷 +怀 +秋 +借 +巨 +透 +誉 +厘 +句 +跟 +胞 +婚 +幼 +烈 +峰 +寻 +君 +汇 +趣 +纸 +假 +肥 +患 +杨 +雅 +罪 +谓 +亮 +脱 +寺 +烟 +判 +绩 +乱 +刚 +摄 +洞 +践 +码 +启 +励 +呈 +曰 +呢 +符 +哥 +媒 +疾 +坐 +雪 +孔 +倒 +旧 +菌 +岩 +鼓 +亡 +访 +症 +暗 +湾 +幸 +池 +讨 +努 +露 +吗 +繁 +途 +殖 +败 +蛋 +握 +刺 +耕 +洗 +沉 +概 +哈 +泛 +凡 +残 +隐 +虫 +朋 +虚 +餐 +殊 +慢 +询 +蒙 +孙 +谈 +鲁 +裂 +贴 +污 +漫 +谷 +违 +泉 +拿 +森 +横 +扬 +键 +膜 +迁 +尤 +涉 +净 +诚 +折 +冰 +械 +拍 +梁 +沿 +避 +吴 +惊 +犯 +灭 +湿 +迷 +姓 +阅 +灯 +妇 +触 +冠 +答 +俗 +档 +尊 +谢 +措 +筹 +竟 +韩 +签 +剑 +鉴 +灾 +贯 +迹 +洛 +沟 +束 +翻 +巧 +坏 +弱 +零 +壁 +枝 +映 +恩 +抓 +屋 +呼 +脚 +绘 +40 +淡 +辖 +2010 +伊 +粒 +欲 +震 +伯 +私 +蓝 +甘 +储 +胡 +卖 +梅 +16 +耳 +疑 +润 +伴 +泽 +牧 +烧 +尾 +累 +糖 +怪 +唯 +莫 +粮 +柱 +18 +竹 +灰 +岸 +缩 +井 +伦 +柔 +盟 +珠 +丹 +amp +皆 +哪 +迎 +颜 +衡 +啊 +塑 +寒 +13 +紫 +镜 +25 +氧 +误 +伍 +彻 +刀 +览 +炎 +津 +耐 +秦 +尖 +潮 +描 +浓 +召 +禁 +阻 +胶 +译 +腹 +泰 +乃 +盐 +潜 +鸡 +诺 +遍 +2000 +纹 +冬 +牙 +麻 +辅 +猪 +弃 +楚 +羊 +晋 +14 +鸟 +赵 +洁 +谋 +隆 +滑 +60 +2008 +籍 +臣 +朱 +泥 +墨 +辆 +墙 +浪 +姐 +赏 +纵 +2006 +拔 +倍 +纷 +摩 +壮 +苗 +偏 +塞 +贡 +仁 +宇 +卵 +瓦 +枪 +覆 +殿 +刑 +贫 +妈 +幅 +幕 +忆 +丁 +估 +废 +萨 +舍 +详 +旗 +岗 +洪 +80 +贝 +2009 +迅 +凭 +勇 +雕 +奏 +旋 +杰 +煤 +阵 +乘 +溪 +奉 +畜 +挑 +昌 +硕 +庙 +惠 +薄 +逃 +爆 +哲 +浙 +珍 +炼 +栏 +暴 +币 +隔 +吨 +倾 +嘉 +址 +陶 +绕 +诊 +遭 +桃 +魂 +兽 +豆 +闲 +箱 +拓 +燃 +裁 +晶 +掉 +脂 +溶 +顿 +肤 +虑 +鬼 +2007 +灌 +徐 +龄 +陵 +恋 +侵 +坡 +寿 +勤 +磨 +妹 +瑞 +缓 +轴 +麦 +羽 +咨 +凝 +默 +驻 +敢 +债 +17 +浮 +幻 +株 +浅 +敬 +敏 +陷 +凤 +坛 +虎 +乌 +铜 +御 +乳 +讯 +循 +圈 +肌 +妙 +奋 +忘 +闭 +墓 +21 +汤 +忠 +2005 +跨 +怕 +振 +宾 +跑 +屏 +坦 +粗 +租 +悲 +伟 +拜 +24 +妻 +赞 +兄 +宿 +碑 +貌 +勒 +罚 +夺 +偶 +截 +纤 +2011 +齿 +郑 +聘 +偿 +扶 +豪 +慧 +跳 +the +疏 +莱 +腐 +插 +恐 +郎 +辞 +挂 +娘 +肿 +徒 +伏 +磁 +杯 +丛 +旨 +琴 +19 +炮 +醒 +砖 +替 +辛 +暖 +锁 +杜 +肠 +孤 +饭 +脸 +邮 +贷 +lt +俄 +毁 +荷 +谐 +荒 +肝 +链 +2004 +2012 +尺 +尘 +援 +a +疫 +崇 +恢 +扎 +伸 +幽 +抵 +胸 +谱 +舒 +迫 +200 +畅 +泡 +岭 +喷 +70 +窗 +捷 +宏 +肯 +90 +狂 +铺 +骑 +抽 +券 +俱 +徽 +胆 +碎 +邀 +褐 +斤 +涂 +赋 +署 +颗 +2003 +渠 +仿 +迪 +炉 +辉 +涵 +耗 +22 +返 +邻 +斑 +董 +魏 +午 +娱 +浴 +尿 +曼 +锅 +柳 +舰 +搭 +旁 +宅 +趋 +of +凉 +赢 +伙 +爷 +廷 +戴 +壤 +奶 +页 +玄 +驾 +阔 +轨 +朗 +捕 +肾 +稿 +惯 +侯 +乙 +渡 +稍 +恨 +脏 +2002 +姆 +腔 +抱 +杆 +垂 +赴 +赶 +莲 +辽 +荐 +旦 +妖 +2013 +稀 +驱 +沈 +役 +晓 +亭 +仲 +澳 +500 +炸 +绪 +28 +陕 +and +23 +恒 +堡 +纠 +仇 +懂 +焦 +搜 +s +忍 +贤 +添 +i +艾 +赤 +犹 +尝 +锦 +稻 +撰 +填 +衰 +栽 +邪 +粘 +跃 +桌 +胃 +悬 +c +翼 +彼 +睡 +曹 +刷 +摆 +悉 +锋 +26 +摇 +抢 +乏 +廉 +鼠 +盾 +瓷 +抑 +埃 +邦 +遂 +寸 +渔 +祥 +胎 +牵 +壳 +甜 +卓 +瓜 +袭 +遵 +巡 +逆 +玛 +韵 +2001 +桑 +酷 +赖 +桂 +郡 +肃 +仓 +寄 +塘 +瘤 +300 +碳 +搞 +燕 +蒸 +允 +忽 +斜 +穷 +郁 +囊 +奔 +昆 +盆 +愈 +递 +1000 +黎 +祭 +怒 +辈 +腺 +滚 +暂 +郭 +璃 +踪 +芳 +碍 +肺 +狱 +冒 +阁 +砂 +35 +苍 +揭 +踏 +颇 +柄 +闪 +孝 +葡 +腾 +茎 +鸣 +撤 +仰 +伐 +丘 +於 +泪 +荡 +扰 +纲 +拼 +欣 +纽 +癌 +堆 +27 +菲 +b +披 +挖 +寓 +履 +捐 +悟 +乾 +嘴 +钻 +拳 +吹 +柏 +遥 +抚 +忧 +赠 +霸 +艰 +淋 +猫 +帅 +奈 +寨 +滴 +鼻 +掘 +狗 +驶 +朴 +拆 +惜 +玻 +扣 +萄 +蔬 +宠 +2014 +缴 +赫 +凯 +滨 +乔 +腰 +葬 +孟 +吾 +枚 +圳 +忙 +扫 +杭 +凌 +1998 +梯 +丈 +隶 +1999 +剪 +盗 +擅 +疆 +弯 +携 +拒 +秒 +颁 +醇 +割 +浆 +姑 +爸 +螺 +穗 +缝 +慈 +喝 +瓶 +漏 +悠 +猎 +番 +孕 +伪 +漂 +腿 +吐 +坝 +滤 +函 +匀 +偷 +浩 +矛 +僧 +辨 +俊 +棉 +铸 +29 +诞 +丧 +夹 +to +姿 +睛 +淮 +阀 +姜 +45 +尸 +猛 +1997 +芽 +账 +旱 +醉 +弄 +坊 +烤 +萧 +矣 +雾 +倡 +榜 +弗 +氨 +朵 +锡 +袋 +拨 +湘 +岳 +烦 +肩 +熙 +炭 +婆 +棋 +禅 +穴 +宙 +汗 +艳 +儒 +叙 +晨 +颈 +峡 +拖 +烂 +茂 +戒 +飘 +氛 +蒂 +撞 +瓣 +箭 +叛 +1996 +31 +鞋 +劲 +祝 +娜 +饲 +侍 +诱 +叹 +卢 +弥 +32 +鼎 +厦 +屈 +慕 +魅 +m +厨 +嫁 +绵 +逼 +扮 +叔 +酶 +燥 +狼 +滋 +汁 +辐 +怨 +翅 +佩 +坑 +旬 +沃 +剩 +蛇 +颖 +篮 +锐 +侠 +匹 +唤 +熊 +漠 +迟 +敦 +雌 +谨 +婴 +浸 +磷 +筒 +2015 +滩 +埋 +框 +弘 +吕 +碰 +纺 +硫 +堪 +契 +蜜 +蓄 +1995 +阐 +apos +傲 +碱 +晰 +狭 +撑 +叉 +卧 +劫 +闹 +赐 +邓 +奴 +溉 +浦 +蹈 +辣 +遣 +耀 +耶 +翠 +t +叠 +迈 +霍 +碧 +恰 +脊 +昭 +摸 +饱 +赔 +泄 +哭 +讼 +逝 +逻 +廊 +擦 +渗 +彰 +you +卿 +旺 +宪 +36 +顷 +妆 +陪 +葛 +仔 +淀 +翰 +悦 +穆 +煮 +辩 +弦 +in +串 +押 +蚀 +逢 +贺 +焊 +煌 +缔 +惑 +鹿 +袁 +糊 +逸 +舟 +勃 +侦 +涯 +蔡 +辟 +涌 +枯 +痕 +疼 +莉 +柴 +1993 +眉 +1992 +罢 +催 +衔 +秉 +妃 +鸿 +傅 +400 +辰 +聪 +咸 +1994 +扇 +盈 +勘 +佐 +泊 +抛 +搬 +牢 +宴 +牲 +贾 +摘 +姻 +慎 +帕 +忌 +卒 +夕 +卜 +惟 +挺 +崖 +炒 +爵 +冻 +椒 +鳞 +祸 +潭 +腊 +蒋 +缠 +寂 +眠 +冯 +芯 +槽 +吊 +33 +150 +聊 +梗 +嫩 +凶 +铭 +爽 +筋 +韦 +脾 +铝 +肢 +栋 +勾 +萌 +渊 +掩 +狮 +撒 +漆 +骗 +禽 +38 +蕴 +坪 +洒 +冶 +兹 +椭 +喻 +泵 +哀 +翔 +1990 +棒 +芝 +x +扑 +3000 +毅 +衍 +惨 +疯 +欺 +贼 +肖 +轰 +巢 +臂 +轩 +扁 +淘 +犬 +宰 +祠 +挡 +厌 +帐 +蜂 +狐 +垃 +昂 +圾 +秩 +芬 +瞬 +枢 +舌 +唇 +棕 +1984 +霞 +霜 +艇 +侨 +鹤 +硅 +靖 +哦 +削 +泌 +奠 +d +吏 +夷 +咖 +彭 +窑 +胁 +肪 +120 +贞 +劝 +钙 +柜 +鸭 +75 +庞 +兔 +荆 +丙 +纱 +34 +戈 +藤 +矩 +泳 +惧 +铃 +渴 +胀 +袖 +丸 +狠 +豫 +茫 +1985 +浇 +菩 +氯 +啡 +1988 +葱 +37 +梨 +霉 +脆 +氢 +巷 +丑 +娃 +锻 +愤 +贪 +蝶 +1991 +厉 +闽 +浑 +斩 +栖 +l +茅 +昏 +龟 +碗 +棚 +滞 +慰 +600 +2016 +斋 +虹 +屯 +萝 +饼 +窄 +潘 +绣 +丢 +芦 +鳍 +42 +裕 +誓 +腻 +48 +95 +锈 +吞 +蜀 +啦 +扭 +5000 +巩 +髓 +1987 +劣 +拌 +谊 +涛 +勋 +郊 +莎 +痴 +窝 +驰 +1986 +跌 +笼 +挤 +溢 +1989 +隙 +55 +鹰 +诏 +帽 +65 +芒 +爬 +凸 +牺 +熔 +吻 +竭 +瘦 +冥 +800 +搏 +屡 +昔 +萼 +愁 +捉 +翁 +怖 +汪 +烯 +疲 +缸 +溃 +85 +泼 +剖 +涨 +橡 +谜 +悔 +嫌 +盒 +苯 +凹 +绳 +畏 +罐 +虾 +柯 +邑 +馨 +兆 +帖 +陌 +禄 +垫 +壶 +逊 +骤 +祀 +晴 +蓬 +e +苞 +煎 +菊 +堤 +甫 +拱 +氮 +罕 +舶 +伞 +姚 +弓 +嵌 +1983 +1982 +馈 +琼 +噪 +雀 +呵 +汝 +焉 +陀 +胺 +惩 +沼 +枣 +桐 +酱 +遮 +孢 +钝 +呀 +锥 +妥 +酿 +巫 +闯 +沧 +崩 +蕊 +酬 +匠 +躲 +43 +喊 +98 +琳 +46 +绎 +喉 +凰 +抬 +93 +膨 +盲 +剥 +喂 +庸 +奸 +n +钩 +冈 +募 +苑 +杏 +杉 +辱 +隋 +薪 +绒 +1980 +99 +欠 +尉 +r +攀 +抹 +巾 +1958 +渣 +苹 +猴 +悄 +屠 +41 +颂 +湛 +魄 +颠 +1949 +呆 +粤 +岂 +娇 +暑 +44 +56 +52 +鹅 +筛 +膏 +樱 +p +缆 +襄 +瑟 +恭 +泻 +匪 +兮 +恼 +吟 +仕 +蔽 +骄 +蚕 +斥 +椅 +姬 +谦 +for +椎 +搅 +卸 +沫 +怜 +坎 +瑰 +1978 +钦 +h +拾 +厕 +後 +逾 +薯 +衬 +钾 +崔 +稽 +蛮 +殷 +晒 +47 +菇 +臭 +弧 +擎 +粹 +纬 +1500 +焰 +玲 +竣 +咒 +歇 +糕 +诵 +茨 +妮 +酯 +麟 +卑 +浏 +咽 +罩 +舱 +酵 +晕 +顽 +赁 +咬 +枫 +冀 +贮 +艘 +亏 +薛 +瀑 +篆 +膀 +沸 +雍 +咳 +尹 +愉 +烹 +坠 +勿 +钠 +64 +坤 +甸 +墅 +闸 +藻 +韧 +鄂 +58 +51 +91 +j +瑶 +舆 +夸 +54 +蕾 +栗 +咏 +丞 +抄 +鹏 +弊 +檐 +骂 +仆 +峻 +爪 +赚 +帆 +娶 +嘛 +钓 +澄 +猜 +1979 +裔 +抒 +铅 +卉 +彦 +f +删 +衷 +禹 +寡 +蒲 +砌 +on +棱 +72 +拘 +堵 +雁 +仄 +荫 +53 +k +1981 +祈 +49 +奢 +赌 +寇 +3d +隧 +摊 +雇 +卦 +婉 +敲 +挣 +皱 +虞 +亨 +懈 +挽 +珊 +饶 +滥 +锯 +闷 +it +酮 +虐 +兑 +僵 +傻 +62 +沦 +巅 +鞭 +梳 +赣 +锌 +庐 +薇 +庵 +57 +96 +慨 +肚 +妄 +g +仗 +绑 +2017 +枕 +牡 +000 +胖 +沪 +垒 +捞 +捧 +竖 +蜡 +桩 +厢 +孵 +黏 +拯 +63 +谭 +68 +诈 +灿 +釉 +1956 +裹 +钮 +俩 +o +灶 +彝 +蟹 +涩 +醋 +110 +匙 +歧 +刹 +玫 +棘 +橙 +凑 +桶 +刃 +伽 +4000 +硝 +怡 +籽 +敞 +淳 +矮 +镶 +戚 +幢 +涡 +66 +尧 +膝 +is +哉 +肆 +畔 +溯 +97 +媚 +烘 +01 +67 +窃 +焚 +澜 +愚 +棵 +乞 +86 +78 +佑 +76 +iphone +暨 +敷 +饥 +俯 +蔓 +v +05 +88 +暮 +砍 +邵 +仑 +毗 +剿 +馀 +180 +锤 +刮 +1950 +梭 +摧 +250 +掠 +躯 +诡 +匈 +侣 +胚 +疮 +59 +裙 +windows +裸 +08 +塌 +吓 +俘 +糙 +藩 +楷 +羞 +with +鲍 +帘 +裤 +宛 +憾 +桓 +痰 +寞 +骚 +惹 +笋 +萃 +92 +栓 +61 +挫 +矢 +垦 +09 +垄 +绸 +凄 +your +镀 +熏 +钉 +1945 +led +粪 +缅 +洽 +鞘 +蔗 +82 +迄 +沐 +凿 +勉 +昨 +喘 +700 +爹 +屑 +耻 +沥 +庶 +涅 +腕 +袍 +懒 +阜 +嗜 +朔 +1200 +蒜 +沛 +坟 +轿 +喀 +笛 +狄 +饿 +蓉 +泣 +窟 +130 +豹 +屿 +73 +崛 +迦 +诠 +贬 +腥 +83 +钥 +嗣 +瑜 +07 +倦 +萎 +拦 +冤 +讽 +潇 +谣 +趁 +1960 +妨 +84 +贩 +74 +萍 +窦 +纂 +缀 +矫 +淑 +墩 +梵 +沾 +淫 +乖 +汰 +莞 +81 +旷 +浊 +挚 +撼 +69 +87 +氟 +焕 +06 +庚 +掀 +诀 +kg +盼 +71 +疹 +窖 +匆 +厥 +轧 +89 +淹 +94 +160 +亥 +鸦 +棍 +谅 +歼 +汕 +挪 +蚁 +敛 +魁 +畴 +炫 +丫 +奎 +菱 +沂 +撕 +阎 +詹 +03 +蛛 +77 +靡 +瞻 +咱 +愧 +烷 +畸 +灸 +眸 +that +觅 +芜 +1955 +廓 +斌 +躁 +麓 +摔 +1970 +烛 +睹 +孜 +缚 +堕 +昼 +睿 +琪 +琉 +贱 +6000 +渝 +跋 +1959 +茄 +1957 +舜 +1976 +诛 +1952 +捣 +芙 +04 +1961 +倚 +1938 +酰 +澈 +慌 +帜 +颤 +陇 +1962 +02 +颌 +昧 +佣 +眷 +徙 +禾 +逮 +1948 +79 +莹 +碟 +梢 +朽 +粥 +喇 +1964 +榆 +驳 +楔 +1965 +啸 +肋 +dna +踢 +1975 +1937 +u +傍 +桔 +肴 +呕 +旭 +埠 +贿 +曝 +杖 +俭 +栩 +1953 +斧 +镁 +匾 +踩 +橘 +颅 +1963 +囚 +蛙 +1946 +膳 +坞 +琐 +荧 +瘟 +涤 +胰 +衫 +噬 +皖 +邱 +埔 +汀 +羡 +睐 +葵 +耿 +糟 +厄 +秧 +黔 +蹄 +140 +漳 +鞍 +谏 +腋 +簇 +梧 +戎 +1977 +榴 +诣 +宦 +苔 +揽 +簧 +狸 +阙 +扯 +耍 +棠 +脓 +烫 +翘 +芭 +躺 +羁 +藉 +拐 +1966 +陡 +1954 +漓 +棺 +钧 +琅 +扔 +寝 +绚 +熬 +驿 +邹 +杠 +1972 +w +绥 +窥 +晃 +渭 +1947 +樊 +鑫 +祁 +陋 +哺 +堰 +祛 +y +梓 +崎 +1968 +孽 +蝴 +蔚 +抖 +苟 +肇 +溜 +绅 +妾 +1940 +跪 +沁 +q +1973 +莽 +虏 +be +瞄 +砸 +稚 +僚 +崭 +迭 +皂 +彬 +雏 +ip +羲 +缕 +绞 +俞 +簿 +耸 +廖 +嘲 +can +1969 +翌 +榄 +裴 +槐 +1939 +洼 +睁 +1951 +灼 +啤 +臀 +啥 +濒 +醛 +峨 +葫 +悍 +笨 +嘱 +1935 +稠 +360 +韶 +1941 +陛 +峭 +1974 +酚 +翩 +舅 +8000 +寅 +1936 +蕉 +阮 +垣 +戮 +me +趾 +犀 +巍 +re +霄 +1942 +1930 +饪 +sci +秆 +朕 +驼 +肛 +揉 +ipad +楠 +岚 +疡 +帧 +柑 +iso9001 +赎 +逍 +滇 +璋 +礁 +黛 +钞 +邢 +涧 +劈 +瞳 +砚 +驴 +1944 +锣 +恳 +栅 +吵 +牟 +沌 +瞩 +咪 +毯 +炳 +淤 +盯 +芋 +粟 +350 +栈 +戊 +盏 +峪 +拂 +暇 +酥 +汛 +900 +pc +嚣 +2500 +轼 +妒 +匿 +1934 +鸽 +蝉 +cd +痒 +宵 +瘫 +1927 +1943 +璧 +汲 +1971 +冢 +碌 +琢 +磅 +卤 +105 +剔 +谎 +圩 +酌 +捏 +渺 +媳 +1933 +穹 +谥 +骏 +哨 +骆 +乒 +10000 +摹 +兜 +柿 +喧 +呜 +捡 +橄 +逗 +瑚 +呐 +檀 +辜 +妊 +祯 +1931 +苷 +don +衙 +笃 +芸 +霖 +荔 +闺 +羌 +芹 +dvd +哼 +糯 +吼 +蕃 +嵩 +矶 +绽 +坯 +娠 +1928 +祷 +锰 +qq +by +瘀 +108 +岐 +1932 +茵 +筝 +斐 +肽 +歉 +1929 +嗽 +恤 +汶 +聂 +樟 +擒 +鹃 +拙 +鲤 +絮 +鄙 +彪 +ipod +z +嗓 +墟 +骼 +渤 +僻 +豁 +谕 +荟 +姨 +婷 +挠 +哇 +炙 +220 +诅 +娥 +哑 +阱 +嫉 +圭 +乓 +橱 +歪 +禧 +甩 +坷 +晏 +驯 +讳 +泗 +煞 +my +淄 +倪 +妓 +窍 +竿 +襟 +匡 +钛 +侈 +ll +侄 +铲 +哮 +厩 +1967 +亢 +101 +辕 +瘾 +辊 +狩 +掷 +潍 +240 +伺 +嘿 +弈 +嘎 +陨 +娅 +1800 +昊 +犁 +屁 +蜘 +170 +寥 +滕 +毙 +as +涝 +谛 +all +郝 +痹 +溺 +汾 +脐 +馅 +蠢 +珀 +腌 +扼 +敕 +莓 +峦 +铬 +谍 +炬 +龚 +麒 +睦 +磺 +吁 +掺 +烁 +靶 +or +圃 +饵 +褶 +娟 +滔 +挨 +android +褒 +胱 +cpu +晖 +脖 +垢 +抉 +冉 +茧 +from +渲 +癫 +125 +de +悼 +嫂 +瞒 +纶 +肘 +炖 +瀚 +皋 +姊 +颐 +1600 +俏 +颊 +gps +讶 +札 +奕 +磊 +镖 +遐 +眺 +腑 +boss +琦 +蚊 +窜 +渍 +嗯 +102 +1926 +touch +夯 +1300 +笙 +蘑 +翡 +碘 +卯 +啼 +靓 +辍 +莺 +躬 +猿 +杞 +眩 +虔 +凋 +遁 +泾 +岔 +羟 +弛 +娄 +茸 +皓 +峙 +逅 +邂 +苇 +楹 +蹲 +拢 +甄 +鳃 +104 +邯 +捆 +勺 +450 +酉 +荚 +唑 +臻 +辗 +绰 +徊 +榨 +苛 +赦 +盔 +壬 +恍 +缉 +2020 +熨 +7000 +澡 +桨 +匣 +兢 +106 +驭 +x1 +镍 +孰 +绮 +馏 +蝇 +佼 +鲸 +128 +哎 +裳 +蜕 +嚼 +嘻 +web +庇 +绢 +倩 +钵 +ii +恪 +帷 +莆 +柠 +藕 +砾 +115 +绊 +喙 +坂 +徘 +荀 +瞧 +蛾 +1925 +晦 +ph +mm +铎 +107 +紊 +锚 +酪 +稷 +聋 +闵 +熹 +冕 +诫 +珑 +曦 +篷 +320 +迥 +蘖 +胤 +103 +檬 +瑾 +钳 +遏 +辄 +嬉 +隅 +ps +秃 +112 +帛 +聆 +芥 +诬 +1100 +挟 +宕 +2018 +鹊 +琶 +膛 +mv +兀 +gb +懿 +碾 +叮 +863 +蠕 +譬 +缮 +烽 +妍 +榕 +260 +1920 +邃 +焙 +倘 +210 +戌 +茹 +豚 +晾 +浒 +玺 +醚 +祐 +炽 +this +缪 +凛 +噩 +溅 +毋 +槛 +ei +are +嫡 +蝠 +娴 +稣 +禀 +壑 +殆 +敖 +cm +ios +倭 +挛 +侃 +蚌 +咀 +盎 +殉 +岑 +浚 +谬 +狡 +1924 +癸 +280 +逛 +耽 +俺 +璨 +巳 +茜 +郸 +蒴 +琵 +we +230 +叩 +泸 +塾 +one +稼 +reg +侮 +锂 +曙 +3500 +up +薰 +婿 +惶 +拭 +篱 +恬 +淌 +烙 +袜 +徵 +慷 +夭 +噶 +莘 +135 +鸳 +殡 +蚂 +1900 +憎 +喃 +佚 +龛 +潢 +烃 +at +岱 +潺 +109 +衢 +璀 +5cm +1400 +鹭 +揣 +痢 +know +厮 +氓 +怠 +no +nbsp +痘 +硒 +镌 +乍 +咯 +惬 +not +桦 +骇 +枉 +蜗 +睾 +淇 +耘 +娓 +弼 +鳌 +嗅 +gdp +狙 +箫 +朦 +椰 +胥 +丐 +陂 +唾 +鳄 +柚 +谒 +journal +戍 +1912 +刁 +鸾 +缭 +骸 +铣 +酋 +蝎 +掏 +耦 +怯 +娲 +拇 +汹 +胧 +疤 +118 +硼 +恕 +哗 +眶 +痫 +凳 +鲨 +擢 +歹 +樵 +瘠 +app +茗 +翟 +黯 +蜒 +壹 +殇 +伶 +辙 +an +瑕 +町 +孚 +痉 +铵 +搁 +漾 +戟 +镰 +鸯 +猩 +190 +蔷 +缤 +叭 +垩 +113 +曳 +usb +奚 +毓 +ibm +颓 +汐 +靴 +china +傣 +尬 +濮 +赂 +媛 +懦 +扦 +111 +韬 +like +戳 +java +雯 +114 +蜿 +116 +1923 +笺 +裘 +尴 +侗 +mba +3g +钨 +1919 +苓 +1922 +寰 +蛊 +扳 +搓 +涟 +睫 +淬 +5mm +123 +ve +121 +赈 +恺 +瞎 +蝙 +1921 +枸 +萱 +颚 +憩 +秽 +秸 +拷 +阑 +貂 +粱 +煲 +隘 +暧 +惕 +沽 +time +菠 +1911 +趟 +磋 +偕 +涕 +邸 +so +踞 +惫 +122 +阪 +鞠 +饺 +汞 +颍 +氰 +屹 +蛟 +跻 +哟 +have +126 +臼 +熄 +绛 +弩 +褪 +117 +渎 +亟 +匮 +撇 +internet +霆 +攒 +舵 +扛 +彤 +nba +蛤 +婢 +偃 +胫 +姥 +睑 +love +iso +pk +诙 +what +诲 +锭 +悚 +扒 +洱 +劾 +惰 +篡 +瓯 +徇 +铀 +骋 +flash +1918 +out +筷 +渚 +踵 +俨 +ceo +榻 +糜 +捻 +釜 +哩 +萤 +270 +蛹 +隽 +垮 +鸠 +鸥 +漕 +瑙 +礴 +憧 +殴 +潼 +悯 +砺 +拽 +钗 +ct +酣 +镂 +mp3 +膺 +楞 +竺 +迂 +嫣 +忱 +cad +哄 +疣 +鹦 +1700 +枭 +憬 +疱 +will +婪 +沮 +1914 +怅 +119 +筱 +扉 +瞰 +linux +旌 +蔑 +铠 +瀛 +vip +琥 +750 +127 +懵 +谴 +捍 +蟾 +漩 +1913 +拣 +汴 +university +刨 +叱 +曜 +妞 +澎 +镑 +翎 +瞪 +sh +倔 +芍 +璞 +瓮 +驹 +芷 +寐 +擂 +丕 +蟠 +诃 +悸 +亘 +溴 +宸 +廿 +恃 +棣 +1917 +荼 +筠 +羚 +慑 +唉 +纣 +麼 +蹦 +锄 +145 +international +124 +淆 +甙 +132 +蚜 +椿 +禺 +绯 +冗 +168 +葩 +厝 +媲 +蒿 +痪 +650 +菁 +炊 +wifi +俑 +new +讥 +min +桀 +祺 +129 +吡 +迩 +do +john +箔 +皿 +缎 +萦 +剃 +霓 +酝 +mg +诰 +茉 +just +get +飙 +湍 +蜥 +箕 +蘸 +550 +4500 +柬 +韭 +溥 +but +熠 +鹉 +咐 +剌 +138 +悖 +瞿 +槟 +娩 +闾 +pvc +遴 +咫 +20000 +孺 +彷 +茬 +211 +蓟 +li +if +憨 +袅 +佬 +炯 +erp +1910 +啶 +昙 +蚩 +136 +痔 +蕨 +瓢 +夔 +毡 +赃 +鳖 +沅 +wang +go +饷 +165 +臧 +掖 +褚 +羹 +ic +勐 +tv +谚 +畦 +眨 +贻 +攸 +涎 +弑 +咎 +铂 +瑛 +1905 +矗 +虱 +more +133 +秤 +谟 +漱 +俸 +夙 +1915 +br +game +雉 +螨 +恣 +斛 +175 +谙 +隍 +131 +奄 +480 +yy +1916 +壕 +髻 +155 +鄱 +嘶 +磕 +濡 +赘 +荞 +讹 +猕 +痞 +鬓 +铮 +腱 +幡 +榭 +爻 +5m +涓 +晤 +咕 +惭 +钼 +匕 +ok +撮 +庾 +笠 +窘 +癖 +365 +垛 +窒 +畲 +甬 +彗 +缨 +湮 +寮 +et +衅 +谪 +156 +绫 +9000 +152 +兖 +疽 +磐 +380 +菏 +沱 +骁 +嫔 +盂 +娆 +钊 +蟒 +忏 +谤 +148 +137 +server +2200 +晟 +ng +15000 +google +痈 +耆 +谧 +簪 +134 +ml +疟 +扈 +脍 +琛 +咋 +胄 +142 +144 +葆 +轶 +桢 +973 +攘 +was +邕 +拧 +茯 +205 +摒 +1908 +intel +傀 +祚 +嘟 +帼 +1906 +wto +筵 +when +馒 +疚 +璇 +砧 +merge +槃 +microsoft +犷 +exe +腓 +煜 +弋 +疸 +濑 +310 +201 +麝 +嗟 +忻 +愣 +facebook +斓 +吝 +咧 +矾 +愫 +151 +158 +漪 +珂 +rna +逞 +146 +206 +糠 +璐 +藓 +昕 +妩 +屌 +疵 +excel +嘘 +he +plc +袂 +2400 +139 +稃 +剁 +侏 +掐 +猾 +匍 +2800 +坳 +黜 +邺 +闫 +猥 +湃 +斟 +癣 +1904 +185 +匐 +粳 +sql +330 +141 +cp +1909 +叟 +俾 +儡 +莒 +12000 +骥 +跤 +耙 +矜 +翱 +zhang +ms +赡 +1907 +浣 +栾 +拈 +science +420 +螟 +aaa +桧 +坍 +睢 +趴 +id +伎 +2100 +婺 +霹 +痊 +膊 +眯 +豌 +202 +驮 +骈 +850 +iii +嶂 +淞 +143 +腮 +髅 +炀 +啄 +亳 +麾 +147 +筐 +叨 +徨 +跷 +ac +楂 +郴 +绶 +hp +羔 +xp +ieee +咤 +now +there +靳 +they +屎 +雳 +瘘 +蹬 +2300 +惮 +acid +涪 +阖 +煽 +蹊 +225 +栉 +153 +俟 +涸 +辫 +锢 +佟 +176 +皎 +cctv +啮 +钰 +螂 +dc +啪 +绷 +204 +闰 +畿 +2d +覃 +2600 +惘 +贰 +154 +碉 +卞 +酐 +枷 +葺 +芪 +207 +蕙 +192 +咚 +籁 +pro +钴 +162 +冽 +玮 +骷 +啃 +焖 +猝 +榈 +滁 +拮 +跗 +讷 +蝗 +208 +蠡 +world +烨 +been +hd +gmp +256 +脯 +歙 +泠 +刍 +掳 +pe +his +僳 +340 +1902 +螯 +胳 +髦 +粽 +戾 +祜 +178 +186 +岷 +懋 +馥 +昵 +踊 +湄 +郢 +斡 +迢 +ce +photoshop +嗪 +about +裨 +1903 +羧 +膈 +翊 +lcd +鲫 +163 +螃 +沓 +疝 +笈 +ktv +榔 +157 +诘 +autocad +195 +颉 +蛀 +鸢 +焯 +囧 +make +梆 +npc +潞 +戛 +see +system +149 +佗 +艮 +chinese +let +霾 +鬟 +215 +net +玖 +1898 +腭 +喔 +172 +罔 +佥 +粑 +visual +舷 +泯 +m2 +198 +has +203 +sd +泓 +炜 +谗 +烬 +跆 +rpg +傩 +飓 +浔 +钤 +惚 +胭 +踝 +镯 +ep +221 +臆 +196 +蜚 +揪 +觞 +皈 +dj +183 +api +迸 +匝 +筏 +167 +醴 +黍 +洮 +滦 +侬 +甾 +290 +way +3200 +188 +diy +2cm +com +澧 +阈 +袱 +迤 +衮 +166 +濂 +娑 +砥 +砷 +铨 +缜 +箴 +30000 +逵 +猖 +159 +蛰 +箍 +侥 +2mm +搂 +纨 +裱 +枋 +嫦 +敝 +挝 +贲 +潦 +235 +撩 +惺 +铰 +f1 +忒 +咆 +哆 +莅 +164 +炕 +抨 +涿 +龈 +猷 +got +b1 +182 +2m +212 +遒 +缥 +vs +捂 +俐 +la +瘙 +搐 +牍 +isbn +馍 +our +痿 +袤 +峥 +184 +栎 +罹 +燎 +喵 +209 +1901 +璜 +飒 +蔼 +珞 +澹 +奘 +岖 +芡 +簸 +杵 +甥 +骊 +216 +悴 +173 +惆 +5mg +殃 +1895 +呃 +161 +5g +祗 +3600 +髋 +169 +liu +who +幔 +down +榛 +犊 +霁 +芮 +520 +牒 +佰 +her +狈 +薨 +co +吩 +鳝 +嵘 +濠 +呤 +纫 +3mm +檄 +214 +浜 +370 +189 +缙 +缢 +煦 +蓦 +揖 +拴 +缈 +218 +褥 +铿 +312 +燮 +life +锵 +174 +荥 +187 +忿 +4s +僖 +婶 +171 +chen +芾 +镐 +痣 +research +眈 +460 +祇 +邈 +翳 +碣 +遨 +鳗 +诂 +never +岫 +焘 +3cm +co2 +茱 +tcp +only +255 +gsm +say +洵 +晁 +right +噢 +she +over +偈 +旖 +david +181 +232 +蚓 +柘 +珐 +遽 +岌 +桅 +213 +唔 +222 +鄞 +雹 +michael +驸 +苻 +恻 +鬃 +玑 +磬 +崂 +304 +祉 +荤 +淼 +560 +264 +肱 +呗 +pp +b2 +骡 +囱 +10cm +佞 +back +1890 +226 +耒 +伫 +嚷 +粼 +aa +歆 +佃 +旎 +惋 +殁 +杳 +their +阡 +red +畈 +蔺 +os +177 +map +巽 +cbd +昱 +啰 +吠 +179 +199 +嗔 +涮 +238 +奂 +1896 +撷 +301 +袒 +720 +爰 +捶 +赭 +蜓 +姗 +蔻 +垠 +193 +gis +噻 +ab +峒 +皙 +want +245 +憔 +帚 +office +xx +杷 +蟆 +iso14001 +觐 +钒 +岙 +2700 +1899 +栀 +幄 +啧 +癜 +擀 +轲 +铆 +them +讴 +樽 +霏 +mtv +肮 +枳 +骞 +诧 +瘢 +虬 +拗 +play +219 +蕲 +316 +茁 +唆 +technology +word +沭 +毂 +蛎 +芊 +銮 +瞥 +呱 +223 +羿 +吒 +傥 +髯 +濯 +蜻 +皴 +802 +430 +邳 +燧 +1860 +獭 +垭 +祟 +217 +虢 +how +枇 +abs +鹫 +194 +颞 +1894 +333 +皑 +脲 +197 +舔 +魇 +霭 +org +坨 +郧 +baby +椽 +舫 +228 +oh +305 +荠 +琊 +溟 +1897 +煨 +265 +谯 +粲 +罂 +gonna +屉 +佯 +郦 +亵 +诽 +芩 +嵇 +蚤 +哒 +315 +啬 +ain +嚎 +玥 +twitter +191 +隼 +唢 +铛 +cause +壅 +藜 +won +吱 +rom +楣 +璟 +锆 +憋 +罡 +al +咙 +1850 +腈 +oslash +job +233 +廪 +堑 +into +诩 +b2c +溧 +鹑 +讫 +哌 +铢 +蜴 +1ml +稹 +噜 +镉 +224 +愕 +桁 +晔 +琰 +陲 +疙 +667 +崮 +need +540 +8mm +html +颛 +through +asp +桡 +钜 +580 +take +谑 +仞 +咦 +珪 +揍 +鱿 +阉 +3800 +瘩 +410 +槌 +滓 +茴 +tft +泮 +涣 +atm +pci +柞 +渥 +飨 +孪 +沔 +谲 +桉 +vcd +慵 +318 +oem +other +俚 +paul +跖 +纭 +恙 +which +fi +佘 +236 +荃 +咄 +鞅 +叁 +james +恽 +m3 +253 +炔 +萘 +钺 +6500 +1880 +ccd +楫 +塬 +钡 +琮 +苄 +950 +325 +275 +1g +day +o2o +960 +music +骰 +偎 +粕 +amd +咔 +鹄 +瓒 +阆 +捅 +嬴 +adobe +箨 +name +390 +680 +640 +氦 +倜 +b2b +觊 +xml +婕 +229 +jar +锑 +撬 +chem +掰 +嗷 +5500 +1cm +饯 +蓓 +234 +good +鼬 +spa +佤 +5a +ss +蚯 +挞 +臾 +where +atp +227 +嶙 +幂 +饬 +闱 +live +high +煅 +嘧 +1mm +蹭 +sun +abc +瞭 +顼 +箐 +here +徉 +231 +骜 +302 +嗨 +邛 +庑 +柩 +饕 +俎 +4mm +15g +嘌 +50000 +颏 +cssci +椁 +崧 +锉 +籼 +1870 +狞 +弁 +6mm +羯 +踹 +糅 +248 +1840 +砼 +263 +嫖 +tmp +252 +mac +285 +豉 +啉 +榷 +嘈 +en +俪 +痂 +308 +inf +630 +儋 +4a +芎 +ai +man +繇 +1889 +bt +239 +meta +蹇 +242 +530 +诋 +bbc +煸 +峋 +淙 +324 +management +1885 +泱 +徜 +crm +4cm +free +汩 +纥 +246 +蝼 +囿 +uv +暹 +谆 +蹂 +鞣 +3c +mr +螳 +cs +馗 +幺 +鞑 +贽 +268 +istp +243 +漯 +237 +牦 +淖 +engineering +dr +囤 +than +gprs +sp +440 +晗 +1888 +258 +忡 +懊 +呋 +埂 +pcb +307 +first +321 +robert +鲈 +sup2 +阕 +3m +幌 +cg +303 +鳅 +勰 +find +8cm +萸 +剽 +蚝 +wi +绔 +pdf +1250 +262 +php +辇 +10mg +use +ie +麋 +1884 +陟 +宥 +oracle +锺 +喽 +620 +1892 +1893 +淅 +熵 +荨 +247 +忤 +american +266 +seo +轭 +嗦 +荪 +also +骠 +鹘 +p2p +4g +聿 +绾 +诶 +985 +怆 +244 +喋 +恸 +湟 +睨 +翦 +fe +蜈 +1875 +褂 +娼 +1886 +羸 +觎 +470 +瘁 +306 +蚣 +呻 +241 +1882 +昶 +谶 +猬 +荻 +school +286 +酗 +unit +肄 +躏 +膑 +288 +2g +嗡 +273 +iv +cam +510 +庠 +崽 +254 +搪 +pcr +胯 +309 +铉 +峤 +郯 +藐 +舂 +come +蓼 +some +薏 +窿 +羣 +氽 +徕 +冼 +rs +阂 +欤 +殒 +窈 +脘 +780 +篝 +yang +1861 +3300 +iso9000 +麸 +砭 +max +砰 +骶 +豺 +lg +窠 +獒 +think +腴 +苕 +any +its +缇 +骅 +劭 +college +卅 +ups +揆 +垅 +na +6cm +琏 +镗 +苜 +胛 +1881 +black +珏 +吮 +抠 +搔 +276 +rock +251 +槎 +4200 +323 +掣 +pet +1887 +ap +琨 +餮 +375 +舛 +give +si +痤 +us +311 +278 +埭 +english +peter +1891 +820 +胪 +喹 +妲 +婀 +帙 +10g +oa +7500 +箩 +灏 +霎 +logo +袄 +dsp +bl +镭 +蓿 +power +long +墉 +too +嵊 +1862 +girl +堇 +king +蟋 +610 +叽 +249 +钎 +30cm +fm +録 +group +1883 +郓 +瘴 +vol +丶 +呦 +邬 +頫 +272 +馁 +hiv +鄢 +257 +1876 +ordm +蛭 +322 +愍 +锲 +槿 +珈 +best +4800 +mri +1080 +fda +10mm +261 +nt +660 +super +1m +center +ui +335 +蜃 +298 +拎 +鎏 +裟 +沏 +np +螭 +7mm +觑 +墒 +捺 +轸 +micro +榫 +based +319 +怔 +ram +618 +昀 +even +泷 +1864 +ca +凫 +唠 +狰 +鲛 +氐 +呛 +绀 +碛 +茏 +盅 +蟀 +洙 +off +訇 +蠹 +auml +dos +20cm +267 +棂 +18000 +蚴 +篾 +two +靛 +暄 +show +1868 +泞 +cdma +mark +vc +洄 +赓 +麽 +25000 +篓 +孑 +860 +烩 +980 +design +颢 +钣 +var +髂 +蹴 +wanna +筮 +蝌 +醮 +home +菖 +fun +cmos +獗 +friends +business +岘 +570 +鼐 +1865 +姣 +national +1874 +蟑 +袈 +葶 +掬 +most +vga +emba +躇 +30g +鹌 +city +踌 +282 +钹 +蚪 +颧 +001 +13000 +鹳 +274 +km +345 +1050 +stop +328 +then +鲲 +驷 +潴 +295 +386 +焱 +稔 +悌 +mpeg +st +suv +vista +a1 +vi +283 +help +basic +唏 +11000 +苒 +蹙 +house +heart +ouml +281 +氩 +bug +mobile +宓 +service +dll +綦 +苎 +application +疃 +methyl +攫 +rfid +100g +287 +掾 +1871 +徭 +490 +舀 +逶 +嗤 +760 +0m +ge +1872 +people +hr +蜷 +茔 +512 +疳 +迳 +罄 +瓠 +100mg +讪 +psp +av +傈 +ppp +杲 +灞 +氲 +鬲 +獠 +柒 +骧 +1848 +away +william +326 +搀 +珩 +绦 +1879 +嚏 +710 +镛 +喱 +倏 +馋 +茭 +擘 +斫 +284 +1mg +怂 +hdmi +唧 +犍 +谩 +赊 +317 +271 +wu +鬻 +禛 +15cm +259 +840 +feel +485 +圻 +10m +蹶 +5kg +1877 +1873 +缄 +瘿 +黠 +甑 +矸 +嘀 +il +蹼 +jack +lee +269 +叼 +di +313 +旻 +auc +502 +1350 +鹜 +289 +fc +稗 +336 +999 +association +many +293 +雒 +george +td +赉 +style +馔 +颦 +ul +ld50 +1867 +颔 +掇 +1863 +each +赅 +桎 +inc +痧 +dv +谄 +孛 +笆 +鲶 +铳 +3100 +mc +tell +4m +blue +327 +299 +bios +龋 +385 +盱 +笏 +2030 +窕 +苴 +314 +big +1866 +296 +萋 +355 +辘 +琬 +cu +梏 +much +蚧 +3400 +1280 +镳 +24h +own +670 +studio +瞅 +keep +6g +ppt +conference +around +information +睬 +1878 +class +偌 +鲵 +惦 +1830 +蜍 +mp4 +why +靼 +1851 +332 +阗 +菟 +黝 +1650 +control +挈 +嵴 +剡 +358 +楸 +dha +氤 +m1 +vr +呎 +珲 +5ml +馄 +滂 +338 +蹉 +蓑 +锷 +297 +279 +啜 +1644 +sm +婵 +well +鬣 +7cm +钿 +bbs +晌 +蛆 +隗 +酞 +枞 +352 +work +always +9g +戬 +獾 +镕 +star +easy +饨 +娣 +缰 +邾 +334 +8m +ni +鹗 +277 +425 +end +had +嗒 +苋 +薮 +棹 +type +richard +880 +6m +拄 +air +埕 +勖 +鹞 +殚 +鲢 +pop +a4 +1750 +ftp +16000 +啖 +ad +沣 +501 +靥 +葭 +诿 +htc +鸪 +007 +饴 +t1 +疖 +抟 +睽 +770 +access +tcl +稞 +吋 +谀 +澍 +杈 +妤 +sata +part +峄 +systems +漉 +40000 +ever +気 +368 +咲 +qs +ta +璘 +ltd +mol +media +萜 +僭 +朐 +742 +1855 +cc +圜 +癞 +藿 +555 +珉 +isp +set +1450 +陉 +him +僮 +292 +膻 +1853 +薹 +810 +汊 +still +锗 +昉 +pvp +猗 +http +1859 +3700 +strong +3a +锶 +real +跛 +art +1869 +331 +1368 +嘹 +337 +瓤 +402 +衄 +1856 +1820 +1150 +matlab +豕 +吆 +腆 +thomas +a2 +294 +le +366 +using +356 +bb +喆 +smith +different +莴 +401 +谌 +ci +珙 +疥 +kw +鲑 +405 +玷 +蛔 +砀 +361 +zh +nasa +materials +329 +nature +1h +谔 +睥 +ch +20mg +2mg +du +mail +data +every +蹑 +诒 +逋 +372 +while +姝 +刈 +婧 +going +喳 +镞 +铌 +291 +712 +辎 +鹧 +檩 +740 +扪 +10ml +霰 +ar +裆 +ol +嬷 +0mm +ufo +charles +20mm +tvb +apple +刎 +iec +project +sbs +嵋 +342 +690 +悱 +920 +嘤 +jean +篁 +荸 +瞑 +殓 +搽 +50mg +343 +橇 +include +eva +雎 +弭 +獐 +haccp +恿 +video +cf +vpn +society +眦 +730 +铐 +song +尕 +捎 +诟 +institute +痨 +cn +369 +笞 +756 +version +des +sns +趺 +590 +award +唬 +苣 +css +lte +xu +fbi +啾 +瘪 +垸 +357 +橹 +after +濛 +曷 +level +樾 +very +汨 +仟 +姒 +1858 +again +怦 +荏 +tom +诤 +苡 +吭 +830 +dm +before +406 +崆 +氡 +young +脩 +lan +胝 +钏 +3ds +cr +arm +pos +night +屐 +395 +忐 +彧 +拚 +鏖 +344 +100ml +525 +孳 +1024 +yu +忑 +384 +邝 +穰 +403 +摈 +庖 +351 +鸵 +398 +hello +矽 +354 +鲟 +said +381 +768 +発 +762 +sap +1854 +msn +菅 +book +353 +true +339 +javascript +348 +2900 +圪 +蹋 +衾 +簋 +璎 +367 +噎 +911 +嬗 +346 +肼 +362 +359 +跎 +滟 +little +4300 +701 +戦 +嵬 +look +仝 +phys +club +惇 +纾 +times +14000 +炁 +382 +xyz +number +ak +mind +huang +闳 +骐 +秣 +眙 +谘 +碓 +iso9002 +疔 +412 +恂 +am +top +master +鳕 +green +鸱 +int +爨 +镊 +404 +were +4600 +em +better +钯 +圮 +楽 +堀 +1852 +408 +sat +1857 +378 +422 +膘 +705 +噗 +347 +start +486 +锹 +505 +杼 +酊 +same +376 +white +挎 +箸 +郗 +垌 +sa +溏 +martin +蔫 +偻 +364 +妫 +飚 +625 +601 +辔 +濬 +666 +ds +瑄 +621 +觚 +5600 +nhk +415 +express +铍 +bit +跚 +9mm +翕 +煊 +these +50mm +gpu +b6 +hip +耄 +铋 +篦 +zhou +阇 +骛 +nvidia +莪 +吲 +youtube +唁 +870 +箧 +503 +tm +8500 +really +珅 +潋 +迨 +哽 +without +砦 +model +缗 +hey +謇 +呸 +mrna +垓 +糍 +park +wap +璠 +妣 +狎 +攥 +396 +闇 +york +蛉 +瑁 +joe +腼 +蹒 +great +review +200mg +chris +www +嶷 +online +莠 +沤 +哚 +475 +遑 +v1 +such +跺 +膦 +蹿 +unix +hard +40cm +50cm +nothing +郫 +zhao +玳 +ma +boy +埚 +url +432 +network +aaaa +衿 +371 +try +醪 +full +挹 +raid +bg +绡 +汜 +digital +mb +c1 +坩 +ccc +旃 +5200 +607 +itunes +powerpoint +鸨 +between +407 +翈 +1842 +1844 +435 +838 +抡 +chemistry +team +party +die +晞 +place +care +盥 +藁 +蓖 +383 +cv +臊 +made +state +465 +羰 +388 +1620 +sas +楝 +噱 +ji +饽 +苌 +soho +褓 +佶 +mp +581 +years +1260 +1680 +hop +稜 +瞠 +仡 +25mm +605 +423 +341 +363 +374 +627 +text +development +518 +伉 +襁 +ug +change +713 +涞 +1849 +蜇 +抿 +瑗 +pda +418 +un +line +958 +孱 +懑 +416 +von +373 +淦 +赝 +core +dns +747 +427 +387 +would +ipo +醌 +551 +缫 +蠲 +alt +嚓 +鲷 +湫 +捋 +1845 +咩 +裏 +avi +犒 +2050 +墀 +yeah +god +445 +lesson +硐 +蔸 +399 +758 +pu +computer +456 +钽 +1847 +麂 +brown +store +蒡 +鼹 +绻 +1821 +錾 +仃 +515 +篙 +蕤 +589 +applied +737 +930 +c3 +1841 +铤 +billboard +apec +槁 +牖 +螈 +mary +俦 +family +笄 +color +啻 +対 +jsp +郤 +next +iq +645 +506 +hbv +闼 +a3 +349 +value +413 +igg +411 +426 +醺 +赍 +檗 +usa +裾 +head +噫 +掸 +mike +箓 +usb2 +things +5800 +5v +o2 +妪 +乂 +蝈 +砻 +胍 +220v +392 +cba +397 +535 +idc +analysis +25mg +蜱 +ti +2h +聃 +雠 +碚 +椤 +缯 +昴 +890 +缱 +祎 +der +缬 +ex +508 +铙 +cnc +pentium +孀 +533 +advanced +mpa +yl +笳 +蘇 +愆 +685 +榉 +old +氙 +call +alex +燹 +撂 +菽 +583 +箬 +蛄 +瘸 +嬛 +495 +橐 +could +60000 +something +纡 +刽 +辂 +hong +377 +law +蒯 +邨 +1846 +1550 +r2 +1837 +赀 +player +414 +跸 +phone +邙 +hold +rgb +421 +henry +2025 +黟 +409 +磴 +1815 +mode +1843 +闿 +504 +letters +1780 +428 +垟 +389 +t2 +london +528 +jpeg +嵯 +钚 +steve +跄 +30min +527 +潸 +h2 +35000 +崴 +eric +379 +run +three +rf +left +455 +恁 +open +楮 +556 +bc +476 +腧 +458 +plus +1812 +1839 +胨 +b12 +4d +芫 +america +est +dream +碴 +隰 +杓 +md +ya +global +436 +15mm +2ml +貉 +欹 +sup3 +侑 +ea +鳜 +910 +ben +铄 +椴 +昇 +醍 +1020 +798 +midi +肓 +features +lc +brian +akb48 +缂 +1835 +test +铡 +light +978 +s1 +1799 +key +sim +1795 +simple +energy +蹠 +徂 +west +725 +body +豢 +424 +face +蒽 +lin +805 +1120 +479 +菡 +bill +433 +衲 +阚 +believe +brt +pa +last +芗 +hu +sam +wei +adsl +602 +mk +痍 +玠 +1832 +523 +晷 +604 +jj +468 +淝 +1560 +鄯 +ck +473 +糗 +耨 +榧 +394 +940 +eq +498 +used +sc +胴 +c2 +蕈 +screen +镬 +635 +鼾 +431 +education +wwe +摭 +鸮 +cl +5400 +fpga +恚 +419 +実 +asia +534 +552 +砝 +100mm +pid +741 +珣 +under +603 +寤 +埙 +mbc +tc +xxx +didn +478 +mn +p1 +锏 +simon +ansi +438 +hi +615 +喟 +蘅 +骺 +cell +捭 +study +586 +393 +莜 +should +xi +缶 +f2 +games +0g +1760 +mini +johnson +jones +yes +锟 +1825 +叵 +cm3 +炷 +1580 +stay +675 +another +6800 +鲧 +1736 +ps2 +胼 +517 +査 +岬 +2019 +1640 +rose +鹂 +牯 +珥 +entertainment +448 +und +496 +莼 +software +970 +邠 +5300 +h1n1 +488 +da +眇 +卟 +変 +20m +may +417 +lady +galaxy +4100 +惴 +1789 +846 +801 +渑 +907 +put +蚱 +gone +606 +t3 +company +632 +454 +516 +998 +548 +391 +4700 +瞌 +ide +瘰 +7200 +佝 +together +street +旸 +626 +衽 +郅 +奁 +731 +30mg +mvp +1370 +60cm +12cm +魑 +1828 +628 +everything +612 +san +937 +缛 +2gb +lu +angel +20ml +576 +颙 +sony +790 +press +镫 +hall +簌 +beautiful +豇 +711 +453 +pm +姹 +thing +442 +邋 +alpha +leave +暝 +441 +30mm +chapter +507 +100000 +526 +directx +511 +9cm +words +釐 +619 +洹 +444 +frank +咿 +eyes +483 +俳 +522 +蜊 +醐 +541 +water +499 +聩 +non +bob +坻 +532 +757 +545 +毽 +oo +喾 +alone +scott +744 +辋 +river +zhu +倌 +媪 +蛳 +滹 +哙 +nc +20g +阊 +gs +queen +趸 +1130 +1645 +祢 +4mg +1814 +girls +544 +e1 +籀 +1210 +1573 +徼 +ipv6 +訾 +髁 +1a +jackson +砜 +1836 +les +4gb +撸 +瓘 +1790 +缁 +镓 +sars +eps +519 +sod +bp +1810 +year +縻 +sound +617 +菀 +1125 +598 +酢 +桠 +466 +emc +撵 +怏 +429 +1838 +ready +渌 +546 +taylor +452 +news +1180 +568 +2a +af +538 +list +hot +1380 +etc +1796 +摞 +mo +槲 +levels +ht +浠 +诜 +魉 +韫 +daniel +亓 +盤 +pv +瑭 +魍 +1831 +emi +襞 +social +dreamweaver +爿 +kbs +565 +613 +990 +浃 +樯 +jb +讵 +揩 +physics +耋 +帏 +lng +崃 +bs +457 +enough +shy +521 +596 +ec +451 +鸩 +遢 +turn +臃 +available +4400 +585 +粿 +1010 +禳 +hand +439 +536 +桫 +link +side +earth +mx +髹 +7m +482 +诳 +472 +1140 +707 +622 +wcdma +513 +must +492 +462 +踉 +40mg +948 +cmax +郃 +1320 +v2 +542 +email +493 +嗖 +sup +讧 +cnn +446 +碁 +17000 +湎 +30m +529 +653 +531 +575 +阏 +sr +united +pm2 +mt +媾 +443 +様 +aac +806 +哔 +舸 +vb +611 +曩 +821 +gre +gl +cisco +忝 +峁 +掂 +464 +葳 +487 +437 +including +715 +鄄 +558 +both +谵 +463 +jim +608 +m4 +5100 +彊 +锴 +war +郜 +money +481 +葖 +1824 +tnt +蓇 +瓴 +鳟 +橼 +5s +louis +434 +鲇 +邗 +el +犄 +秭 +3900 +records +view +chemical +1001 +1mol +dance +668 +dl +槭 +缵 +que +624 +rt +1823 +1805 +005 +1826 +巯 +sgs +user +龊 +qc +狍 +island +language +space +擞 +saint +2n +pt +share +瞽 +hotel +christian +557 +栲 +撅 +2b +1801 +447 +1822 +瑀 +smt +hk +1834 +戢 +825 +50ml +朓 +逖 +general +椹 +nm +洺 +cae +484 +艏 +wma +zn +苁 +single +599 +c4 +滘 +777 +铧 +侪 +ocirc +1kg +684 +豳 +skf +12mm +489 +hla +竦 +貔 +ld +being +562 +圄 +van +gm +688 +655 +special +呷 +edition +1s +jiang +131108 +514 +1792 +ncaa +1833 +旄 +遛 +jr +program +656 +467 +ing +901 +755 +509 +芈 +kong +rp +砣 +桷 +audio +icp +happy +龌 +done +疬 +japan +ts +mit +p2 +524 +looking +miss +缟 +582 +洌 +35mm +494 +grand +跏 +those +joseph +ctrl +547 +1040 +686 +蝮 +lp +cod +菰 +sio2 +txt +1770 +1060 +帑 +767 +north +fcc +怙 +ester +718 +story +edi +634 +1360 +豸 +1660 +lh +雩 +1230 +magic +誊 +549 +臬 +4k +op +1662 +651 +镣 +箇 +616 +title +sciences +25cm +踱 +s2 +t4 +钍 +648 +100m +543 +588 +苫 +554 +蝽 +r1 +3mg +amino +1776 +浯 +609 +772 +ca2 +vlan +469 +500mg +単 +road +亶 +636 +metal +device +40mm +囹 +穑 +1730 +佻 +1818 +绌 +12g +537 +诔 +pve +autodesk +477 +v8 +ray +gp +span +gc +size +716 +鹬 +ssl +crt +1670 +925 +髌 +pn +1127 +702 +658 +services +support +1802 +蒌 +coming +experience +nbc +鳏 +631 +638 +ace +0cm +ems +9001 +殄 +yen +soc +ethyl +怛 +tf +筌 +刳 +studies +theory +1030 +578 +radio +翮 +卍 +畹 +471 +704 +because +1610 +箜 +save +燔 +赳 +553 +1809 +篌 +窨 +翥 +785 +炅 +钕 +lett +803 +1827 +academy +ed +629 +sf +pr +hill +explorer +future +food +莳 +662 +567 +dcs +忖 +戡 +1086 +1190 +1829 +bad +es +15m +order +spring +沢 +south +497 +025 +move +狒 +1630 +圉 +abb +449 +learn +l0 +d2 +5d +wav +琯 +邰 +cis +quality +odm +926 +acta +root +smart +1661 +苾 +cm2 +photos +l2 +via +sk +犸 +623 +邡 +feeling +572 +郏 +襦 +python +bmw +888 +guo +epa +williams +沆 +813 +bot +read +function +wilson +1723 +enterprise +玟 +50hz +s26 +fire +engineer +tony +1819 +濉 +rh +洎 +莨 +氘 +pb +咛 +1720 +佺 +1460 +815 +cbs +腩 +beta +鳔 +1735 +yan +1gb +x2 +剜 +秕 +牝 +芨 +din +関 +del +sms +649 +pal +1369 +far +maya +654 +拊 +812 +595 +竑 +50m +圹 +close +eos +颡 +1420 +6300 +1816 +wrong +break +573 +765 +file +friend +002 +摺 +683 +nx +沩 +蜉 +please +1170 +ro +6400 +筚 +nick +acm +愔 +ati +point +肟 +766 +俶 +fast +ata +d1 +678 +geforce +1710 +yahoo +堃 +绉 +mysql +1793 +奭 +gap +iso14000 +uk +astm +h2o +n2 +film +method +1804 +罅 +so2 +嗳 +665 +adam +uc +蜢 +1806 +1775 +photo +疠 +474 +image +200mm +sure +561 +帔 +髡 +643 +黥 +1813 +proceedings +褛 +柰 +beyond +royal +else +eda +808 +ddr +gif +鏊 +l1 +痼 +571 +waiting +堞 +code +652 +rss +learning +嗝 +461 +beijing +娉 +566 +577 +708 +1520 +689 +kevin +human +661 +539 +875 +1811 +ssci +6600 +戕 +587 +735 +3s +铱 +耜 +觥 +867 +镒 +584 +呓 +1522 +904 +case +1101 +491 +1080p +history +蒹 +栱 +im +564 +f4 +卮 +琚 +salt +jason +rohs +12v +hydroxy +逦 +modem +font +酩 +蓍 +cry +65536 +health +虺 +1798 +tonight +small +谠 +1570 +1220 +jane +against +597 +751 +459 +bd +鼋 +焗 +udp +process +1070 +1807 +children +8g +eb +62mm +22000 +add +1440 +褴 +rm +25g +ccedil +706 +714 +5l +砒 +赧 +蛏 +709 +蚬 +1530 +瘕 +5h +559 +jay +iga +020 +fall +scsi +顗 +isdn +death +563 +today +愠 +dvi +勣 +wait +1642 +飕 +徳 +滢 +琇 +鳙 +db +瞟 +尻 +force +400mg +澶 +荽 +舐 +arts +ha +east +lost +effects +1628 +album +harry +633 +dark +public +2250 +soul +826 +659 +exo +侂 +733 +se +黼 +icu +4h +market +潟 +7800 +绂 +瘗 +ngc +1794 +crazy +蓥 +竽 +濞 +igm +scdma +6200 +cb +835 +699 +骖 +偁 +bmp +809 +1270 +oled +応 +1160 +1621 +锜 +g3 +ova +cheng +614 +匏 +thinkpad +赑 +fps +create +kim +讦 +1480 +诨 +1540 +rev +1v1 +罘 +fans +巖 +1740 +ag +嫘 +1649 +ps3 +908 +颀 +g1 +703 +岿 +v3 +虻 +936 +fl +c2c +罴 +environmental +paris +594 +hear +囗 +jump +communications +溆 +talk +噤 +824 +骝 +003 +咂 +695 +728 +e2 +nec +iptv +1797 +kelly +500ml +锛 +721 +rc +1808 +ldl +1240 +槊 +radeon +676 +啕 +tang +plant +50g +驽 +professional +凇 +698 +s36 +lord +search +alan +籴 +pd +1403 +硖 +1791 +816 +1636 +3h +gsp +811 +sky +1632 +铯 +christmas +怿 +笥 +matter +574 +噙 +倨 +effect +647 +779 +1803 +657 +sorry +awards +igbt +pwm +坭 +醅 +sos +976 +592 +滏 +10min +682 +cs3 +悻 +did +mater +579 +聒 +1724 +feng +low +mhz +836 +722 +枥 +726 +昺 +bank +memory +rap +975 +663 +ips +酆 +2kg +787 +簟 +睇 +轫 +溱 +骢 +榘 +642 +珺 +跹 +677 +series +nlp +raquo +蚶 +stone +1672 +1817 +1646 +827 +驺 +ko +security +perfect +alexander +746 +tt +check +804 +饧 +15mg +sir +moon +doesn +591 +inside +tim +672 +641 +噼 +儆 +1w +氚 +646 +哧 +1783 +旒 +鸬 +1648 +夥 +ev +1688 +score +standard +玦 +723 +貅 +揄 +戗 +fx +938 +璩 +fu +1654 +剐 +010 +cpi +垴 +蘼 +hz +1521 +1067 +727 +ah +lv +916 +裒 +639 +han +躅 +1715 +唳 +form +second +嗑 +荦 +674 +霈 +jin +缦 +啭 +pi +1788 +rx +隈 +gao +sdk +zheng +悫 +745 +href +593 +ngo +multi +d3 +彀 +637 +1276 +悭 +found +jis +5700 +焓 +1234 +80cm +磔 +aim +1778 +蓊 +act +569 +xiao +郾 +717 +786 +return +5min +1582 +etf +1590 +action +1625 +sarah +yourself +枧 +鹚 +10kg +80000 +検 +775 +818 +stephen +gui +屃 +644 +9500 +v6 +馑 +wlan +hs +2048 +area +1616 +andrew +8226 +6mg +1567 +1763 +1470 +嗲 +pps +铟 +rca +pierre +687 +null +manager +738 +sdh +828 +薤 +60g +300mg +jun +1685 +favorite +making +playing +summer +754 +692 +涔 +樗 +664 +忾 +収 +绺 +945 +h2s +bis +self +300mm +烊 +opengl +912 +acute +螫 +黩 +996 +magazine +edward +su +elisa +hdl +cyp3a4 +鞫 +foundation +alice +ddr3 +915 +923 +tbs +andy +field +date +transactions +limited +during +1126 +鲠 +1057 +fan +嘭 +缣 +845 +681 +rw +mean +1566 +become +economic +852 +johnny +蒺 +unique +黒 +tu +boys +1330 +885 +getting +cj +1072 +nh +ne +band +cool +724 +771 +骘 +氖 +content +842 +镝 +俅 +谮 +te +9600 +drive +phenyl +1275 +屦 +cao +menu +823 +摁 +氪 +蘧 +active +sb +appl +988 +1622 +伝 +1725 +zero +1008 +3kg +腠 +叡 +hit +鲂 +mi +0kg +748 +lite +enjoy +local +789 +続 +1506 +seen +s3 +1765 +european +讣 +gold +1279 +736 +965 +pl +button +耷 +1430 +986 +763 +toefl +燊 +鸷 +jimmy +dota +955 +861 +猊 +732 +xbox +days +dan +673 +833 +囡 +崤 +4c +economics +23000 +agent +html5 +points +ryan +shi +砬 +湜 +reading +918 +mine +adc +917 +1592 +1781 +翚 +峯 +909 +once +exchange +choose +current +symbian +ts16949 +dave +machine +鲎 +qos +蕖 +1785 +9m +cia +until +cs4 +759 +f3 +903 +24000 +968 +8mg +lewis +鹈 +凼 +snh48 +866 +泫 +荑 +黻 +牂 +1722 +鄣 +篑 +ho +1110 +1784 +髭 +陬 +寔 +dt +shanghai +疴 +邽 +987 +45000 +1042 +喏 +彖 +sl +saas +814 +28000 +a5 +彘 +赟 +819 +foxpro +shit +822 +盹 +诮 +鸫 +per +does +150mm +products +camp +select +capital +茕 +corporation +26000 +铖 +954 +dd +闩 +string +page +ba +671 +読 +782 +鄜 +漈 +盍 +dlp +729 +甭 +愎 +outlook +wii +ue +1787 +festival +communication +channel +gary +1755 +1774 +8600 +copy +150mg +魃 +dragon +1056 +c5 +炆 +track +hdpe +liang +鍊 +1800mhz +1619 +蛐 +995 +21000 +薜 +win +1394 +1786 +rain +楯 +table +鲀 +逡 +itu +applications +mmorpg +嘞 +s7 +696 +侔 +1069 +觇 +lbs +0mg +car +wave +糸 +踮 +狷 +1552 +1627 +latest +step +886 +761 +菘 +783 +寳 +esp +扃 +865 +jazz +k1 +fine +child +kind +anna +60mg +997 +maria +nk +792 +raw +late +soa +905 +cai +ttl +delphi +prince +1340 +禊 +synthesis +喑 +rmb +miller +patrick +933 +running +50kg +1398 +ast +752 +location +dead +塍 +chateau +allows +forget +tg +921 +栝 +5w +kiss +1690 +691 +arthur +瓿 +index +csa +rmvb +msc +廨 +cas +known +h1 +tj +j2ee +asian +841 +1227 +g20 +cross +cos +ntilde +719 +貘 +dnf +california +france +modern +pacific +769 +1066 +turbo +753 +795 +669 +1764 +868 +馕 +僰 +union +1772 +2150 +1063 +哏 +double +fight +858 +math +bo +瑷 +men +sea +6700 +sem +697 +疎 +882 +note +qi +uml +902 +1637 +tp +1290 +1085 +776 +蝣 +怵 +阃 +dps +1687 +弢 +镲 +hcl +al2o3 +js +auto +螅 +1683 +v5 +culture +935 +吖 +edge +碲 +voice +1007 +bridge +855 +008 +夼 +茌 +battle +嗬 +靺 +dp +ae +1090 +895 +1012 +1162 +bi +778 +髀 +1575 +pcm +15min +1598 +铊 +secret +739 +200m +6h +matt +谡 +card +mic +癔 +ecu +16mm +984 +镠 +5km +dhcp +1753 +巻 +秾 +living +gn +1643 +framework +菪 +679 +赜 +1782 +four +铈 +1777 +british +shell +santa +yuan +20ma +fly +927 +qu +nds +qaq +bar +髙 +arp +1667 +1773 +693 +main +鲳 +1510 +1002 +2022 +cdna +box +珰 +100km +004 +畋 +bring +泅 +959 +hpv +makes +cmv +鲅 +tmd +1762 +854 +泚 +ghost +short +mcu +1768 +cat +963 +1757 +1206 +1207 +puzzle +793 +central +859 +飏 +walter +60hz +anderson +1727 +thought +屍 +仨 +864 +molecular +856 +dong +financial +1728 +surface +g2 +mf +葚 +叻 +solidworks +res +speed +1195 +咻 +ascii +1404 +784 +jeff +衩 +1371 +land +biology +1655 +郄 +otc +sio +1310 +1605 +蹩 +mems +1618 +m16 +complete +industrial +acs +1603 +kids +tour +u2 +allen +1756 +743 +嬖 +踽 +davis +柽 +鞨 +65279 +7600 +30ml +957 +0l +734 +p450 +956 +ir +麴 +500mm +casio +1038 +roger +library +015 +1652 +薙 +within +hands +874 +ntsc +钇 +whole +jq +氵 +垆 +post +sweet +wall +898 +cs5 +feo +9800 +cms +1390 +since +medical +犟 +1492 +罍 +stand +justin +lake +i5 +1729 +bell +ruby +important +bout +images +lab +962 +1759 +rj +cache +nb +production +経 +807 +1771 +doing +粜 +tnf +ws +guide +bim +events +1626 +1016 +焜 +performance +ra +zl +牀 +1568 +1647 +埝 +洧 +1615 +shift +788 +shen +1588 +60mm +覧 +tuv +1673 +electronic +mos +蓣 +8kg +862 +echo +1572 +section +981 +甯 +sg +1664 +understand +hsk +delta +x86 +eap +block +1578 +er +xl +蒐 +馐 +nox +畑 +ib +trying +ann +1635 +apache +naoh +12345 +缑 +礽 +1624 +694 +瞋 +1601 +浍 +983 +773 +1000m +someone +15kg +25m +847 +袢 +桕 +1037 +jerry +843 +picture +919 +e3 +printf +3gs +marie +853 +rj45 +侩 +913 +896 +lose +unicode +100cm +1711 +charlie +詈 +戸 +1689 +room +烝 +beat +堌 +伋 +hplc +9300 +110kv +nfc +倬 +764 +iis +圯 +solo +碇 +ef +round +chang +1366 +781 +1585 +982 +socket +df +892 +1536 +831 +ren +6kg +4900 +纰 +object +forever +832 +951 +qr +1023 +8800 +4kg +磾 +泔 +1131 +纮 +蓁 +971 +building +1021 +铗 +939 +弇 +挲 +crystal +艉 +smtp +鱬 +cims +fang +1265 +trans +pan +1745 +1604 +泺 +橛 +817 +796 +袴 +cosplay +1154 +1189 +749 +794 +1068 +881 +hc +hope +1410 +couldn +1638 +992 +along +age +250mg +clear +aps +1631 +1011 +provides +1123 +1701 +36000 +csf +韪 +n1 +works +籓 +967 +ptc +贶 +1111 +1651 +棰 +1726 +sar +1666 +qvga +hf +coreldraw +possible +趵 +1629 +943 +marc +luo +樨 +848 +county +944 +tb +dts +junior +vba +lot +傕 +玕 +毎 +direct +839 +繸 +2350 +774 +劵 +fsh +wmv +镧 +秫 +1094 +osi +1602 +邶 +猞 +dior +1766 +1623 +廛 +栌 +钲 +镦 +1607 +psa +spss +xy +1769 +cells +1465 +1577 +gon +send +vision +thinking +imf +嘏 +carl +蝰 +32000 +bay +928 +is09001 +镏 +20kg +淠 +imax +novel +qt +1684 +荇 +逄 +au +author +mod +80mm +1748 +849 +1612 +yet +嘅 +929 +6l +karl +6100 +students +gmat +myself +kate +jpg +979 +1752 +829 +2450 +914 +876 +祕 +瑠 +48h +mpv +1734 +mis +1565 +walk +941 +1075 +1235 +natural +k2 +977 +炝 +杪 +4050 +1669 +p3 +1004 +fn +埴 +1555 +vmware +chloride +942 +steven +1078 +獬 +966 +1135 +country +947 +柢 +捱 +跣 +887 +涑 +75mm +1278 +1583 +western +watch +撃 +伢 +堠 +1045 +12m +museum +1215 +document +marketing +952 +卽 +猁 +usb3 +906 +厣 +physical +辏 +1668 +旆 +agp +茆 +1488 +pg +乜 +deep +1082 +961 +踯 +1526 +# +[ +yam +lofter +##s +##0 +##a +##2 +##1 +##3 +##e +##8 +##5 +##6 +##4 +##9 +##7 +##t +##o +##d +##i +##n +##m +##c +##l +##y +##r +##g +##p +##f +pixnet +cookies +tripadvisor +##er +##k +##h +##b +##x +##u +##w +##ing +ctrip +##on +##v +llc +##an +##z +blogthis +##le +##in +##mm +##00 +ig +##ng +##us +##te +##ed +ncc +blog +##10 +##al +##ic +##ia +##q +##ce +##en +##is +##ra +##es +##j +##cm +tw +##ne +##re +##tion +pony +##2017 +##ch +##or +##na +cafe +pinterest +pixstyleme3c +##ta +##2016 +##ll +##20 +##ie +##ma +##17 +##ion +##th +##st +##se +##et +##ck +##ly +web885 +##ge +xd +##ry +##11 +0fork +##12 +##ter +##ar +##la +##os +##30 +##el +##50 +##ml +tue +posted +##at +##man +##15 +ago +##it +##me +##de +##nt +##mb +##16 +##ve +##da +##ps +##to +https +momo +##son +##ke +##80 +ebd +apk +##88 +##um +wiki +brake +mon +po +june +##ss +fb +##as +leonardo +safari +##60 +wed +win7 +kiehl +##co +##go +vfm +kanye +##90 +##2015 +##id +##ey +##sa +##ro +##am +##no +thu +fri +##sh +##ki +comments +##pe +##ine +uber +##mi +##ton +wordpress +##ment +win10 +##ld +##li +gmail +##rs +##ri +##rd +##21 +##io +##99 +paypal +policy +##40 +##ty +##18 +##01 +##ba +taiwan +##ga +privacy +agoda +##13 +##ny +##24 +##22 +##by +##ur +##hz +##ang +cookie +netscape +##ka +##ad +nike +survey +##016 +wikia +##32 +##017 +cbc +##tor +##kg +##rt +##14 +campaign +##ct +##ts +##ns +##ao +##nd +##70 +##ya +##il +##25 +0020 +897 +##23 +hotels +##ian +6606 +##ers +##26 +##day +##ay +##line +##be +talk2yam +yamservice +coco +##dy +##ies +##ha +instagram +##ot +##va +##mo +##land +ltxsw +##ation +##pa +##ol +tag +##ue +##31 +oppo +##ca +##om +chrome +##ure +lol +##19 +##bo +##100 +##way +##ko +##do +##un +##ni +herme +##28 +##up +##06 +##ds +admin +##48 +##015 +##35 +##ee +tpp +##ive +##cc +##ble +##ity +##ex +##ler +##ap +##book +##ice +##km +##mg +##ms +ebay +##29 +ubuntu +##cy +##view +##lo +##oo +##02 +step1 +july +##net +##ls +##ii +##05 +##33 +step2 +ios9 +##box +##ley +samsung +pokemon +##ent +##les +s8 +atom +##said +##55 +##2014 +##66 +adidas +amazon +##ber +##ner +visa +##77 +##der +connectivity +##hi +firefox +skip +##27 +##ir +##61 +##ai +##ver +cafe2017 +##ron +##ster +##sk +##ft +longchamp +ssd +##ti +reply +##my +apr +##ker +source +##one +##2013 +##ow +goods +##lin +##ip +##ics +##45 +##03 +##ff +##47 +ganji +##nce +##per +faq +comment +##ock +##bs +##ah +##lv +##mp +##000 +melody +17life +##au +##71 +##04 +##95 +##age +tips +##68 +##ting +##ung +wonderland +##ction +mar +article +##db +##07 +##ore +##op +##78 +##38 +##ong +##73 +##08 +##ica +##36 +##wa +##64 +homemesh +##85 +##tv +##di +macbook +##ier +##si +##75 +##ok +goris +lock +##ut +carol +##vi +##ac +anti +jan +tags +##98 +##51 +august +##86 +##fs +##sion +jordan +##tt +##lt +##42 +##bc +vivi +##rry +##ted +##rn +usd +##t00 +##58 +##09 +##34 +goo +##ui +##ary +item +##pm +##41 +##za +##2012 +blogabstract +##ger +##62 +##44 +gr2 +asus +cindy +##hd +esc +##od +booking +##53 +fed +##81 +##ina +chan +distribution +steam +pk10 +##ix +##65 +##91 +dec +##ana +icecat +00z +##46 +##ji +##ard +oct +##ain +jp +##ze +##bi +cio +##56 +h5 +##39 +##port +curve +##nm +##dia +utc +12345678910 +##52 +chanel +##and +##im +##63 +vera +vivo +##ei +2756 +##69 +msci +##po +##89 +##bit +##out +##zz +##97 +##67 +opec +##96 +##tes +##ast +##ling +##ory +##ical +kitty +##43 +step3 +##cn +win8 +iphone7 +beauty +##87 +dollars +##ys +##oc +pay +##2011 +##lly +##ks +download +sep +##board +##37 +##lan +winrar +##que +##ua +##com +ettoday +##54 +##ren +##via +##72 +##79 +##tch +##49 +##ial +##nn +step4 +2765 +gov +##xx +mandy +##ser +copyright +fashion +##ist +##art +##lm +##ek +##ning +##if +##ite +iot +##84 +##2010 +##ku +october +##ux +trump +##hs +##ide +##ins +april +##ight +##83 +protected +##fe +##ho +ofo +gomaji +march +##lla +##pp +##ec +6s +720p +##rm +##ham +##92 +fandom +##ell +info +##82 +sina +4066 +##able +##ctor +rights +jul +##76 +mall +##59 +donald +sodu +##light +reserved +htm +##han +##57 +##ise +##tions +##shi +doc +055 +##ram +shopping +aug +##pi +##well +wam +##hu +##gb +##93 +mix +##ef +##uan +bwl +##plus +##res +##ess +tea +hktvmall +##ate +##ese +feb +inn +nov +##ci +pass +##bet +##nk +coffee +airbnb +##ute +woshipm +skype +##fc +##www +##94 +##ght +##gs +##ile +##wood +##uo +icon +##em +says +##king +##tive +blogger +##74 +##ox +##zy +##red +##ium +##lf +nokia +claire +##ding +november +lohas +##500 +##tic +##cs +##che +##ire +##gy +##ult +january +ptt +##fa +##mer +pchome +udn +##time +##tte +garden +eleven +309b +bat +##123 +##tra +kindle +##ern +xperia +ces +travel +##ous +##int +edu +cho +##car +##our +##ant +rends +##jo +mastercard +##2000 +kb +##min +##ino +##ris +##ud +##set +##her +##ou +taipei +##fi +##ill +aphojoy +december +meiki +##ick +tweet +##av +iphone6 +##dd +views +##mark +##ash +##ome +koreanmall +##ak +q2 +##200 +mlb +##lle +##watch +##und +##tal +##less +4399 +##rl +update +shop +##mhz +##house +##key +##001 +##hy +##web +##2009 +##gg +##wan +##val +2021 +##ons +doi +trivago +overdope +##ance +573032185 +wx17house +##so +audi +##he +##rp +##ake +beach +cfa +ps4 +##800 +##link +##hp +ferragamo +##eng +##style +##gi +i7 +##ray +##max +##pc +september +##ace +vps +february +pantos +wp +lisa +jquery +offer +##berg +##news +fks +##all +##rus +##888 +##works +blogtitle +loftpermalink +ling +##ja +outlet +##ea +##top +##ness +salvatore +##lu +swift +##ul +week +##ean +##300 +##gle +##back +powered +##tan +##nes +canon +##zi +##las +##oe +##sd +##bot +##world +##zo +top100 +pmi +##vr +ball +vogue +ofweek +##list +##ort +##lon +##tc +##of +##bus +##gen +nas +##lie +##ria +##coin +##bt +nata +vive +cup +##ook +##sy +msg +3ce +##word +ebooks +r8 +nice +months +rewards +##ther +0800 +##xi +##sc +gg +blogfp +daily +##bb +##tar +##ky +anthony +##yo +##ara +##aa +##rc +##tz +##ston +gear +##eo +##ade +##win +##ura +##den +##ita +##sm +png +rakuten +whatsapp +##use +pad +gucci +##ode +##fo +chicago +##hone +io +sogo +be2 +##ology +cloud +##con +##ford +##joy +##kb +##rade +##ach +docker +##ful +##ase +ford +##star +edited +##are +##mc +siri +##ella +bloomberg +##read +pizza +##ison +##vm +node +18k +##play +##cer +##yu +##ings +asr +##lia +step5 +##cd +pixstyleme +##600 +##tus +tokyo +##rial +##life +##ae +tcs +##rk +##wang +##sp +##ving +premium +netflix +##lton +##ple +##cal +021 +##sen +##ville +nexus +##ius +##mah +tila +##tin +resort +##ws +p10 +report +##360 +##ru +bus +vans +##est +links +rebecca +##dm +azure +##365 +##mon +moto +##eam +blogspot +##ments +##ik +##kw +##bin +##ata +##vin +##tu +##ula +station +##ature +files +zara +hdr +top10 +s6 +marriott +avira +tab +##ran +##home +oculus +##ral +rosie +##force +##ini +ice +##bert +##nder +##mber +plurk +##sis +00kg +##ence +##nc +##name +log +ikea +malaysia +##ncy +##nie +##ye +##oid +##chi +xuehai +##1000 +##orm +##rf +##ware +##pro +##era +##ub +##2008 +8891 +scp +##zen +qvod +jcb +##hr +weibo +##row +##ish +github +mate +##lot +##ane +##tina +ed2k +##vel +##900 +final +ns +bytes +##ene +##cker +##2007 +##px +topapp +helpapp +14k +g4g +ldquo +##fork +##gan +##zon +##qq +##google +##ism +##zer +toyota +category +##labels +restaurant +##md +posts +##ico +angelababy +123456 +sports +candy +##new +##here +swissinfo +dram +##ual +##vice +##wer +sport +q1 +ios10 +##mll +wan +##uk +x3 +0t +##ming +e5 +##3d +h7n9 +worldcat +##vo +##led +##580 +##ax +##ert +polo +##lr +##hing +##chat +##ule +hotmail +##pad +bbq +##ring +wali +2k +costco +switch +##city +philips +##mann +panasonic +##cl +##vd +##ping +##rge +##lk +css3 +##ney +##ular +##400 +##tter +lz +##tm +##yan +##let +coach +##pt +a8 +follow +##berry +##ew +##wn +##og +##code +##rid +villa +git +r11 +##cket +error +##anonymoussaid +##ag +##ame +##gc +qa +##lis +##gin +vmalife +##cher +wedding +##tis +demo +bye +##rant +orz +acer +##ats +##ven +macd +yougou +##dn +##ano +##urt +##rent +continue +script +##wen +##ect +paper +##chel +##cat +x5 +fox +##blog +loading +##yn +##tp +kuso +799 +vdc +forest +prime +ultra +##rmb +square +##field +##reen +##ors +##ju +##air +##map +cdn +##wo +m8 +##get +opera +##base +##ood +vsa +##aw +##ail +count +##een +##gp +vsc +tree +##eg +##ose +##ories +##shop +alphago +v4 +fluke62max +zip +##sta +bas +##yer +hadoop +##ube +##wi +0755 +hola +##low +centre +##fer +##750 +##media +##san +##bank +q3 +##nge +##mail +##lp +client +event +vincent +##nse +sui +adchoice +##stry +##zone +ga +apps +##ab +##rner +kymco +##care +##pu +##yi +minkoff +annie +collection +kpi +playstation +bh +##bar +armani +##xy +iherb +##ery +##share +##ob +volvo +##ball +##hk +##cp +##rie +##ona +##sl +gtx +rdquo +jayz +##lex +##rum +namespace +##ale +##atic +##erson +##ql +##ves +##type +enter +##168 +##mix +##bian +a9 +ky +##lc +movie +##hc +tower +##ration +##mit +##nch +ua +tel +prefix +##o2 +##point +ott +##http +##ury +baidu +##ink +member +##logy +bigbang +nownews +##js +##shot +##tb +eba +##tics +##lus +spark +##ama +##ions +##lls +##down +##ress +burberry +day2 +##kv +related +edit +##ark +cx +32gb +g9 +##ans +##tty +s5 +##bee +thread +xr +buy +spotify +##ari +##verse +7headlines +nego +sunny +dom +positioning +fit +##tton +alexa +##ties +##llow +amy +##du +##rth +##lar +2345 +##des +sidebar +site +##cky +##kit +##ime +##009 +season +##fun +gogoro +a7 +lily +twd600 +##vis +##cture +friday +yi +##tta +##tel +##lock +economy +tinker +8gb +##app +oops +##right +edm +##cent +supreme +##its +##asia +dropbox +##tti +books +##tle +##ller +##ken +##more +##boy +sex +##dom +##ider +##unch +##put +##gh +ka +amoled +div +##tr +##n1 +port +howard +##tags +ken +##nus +adsense +buff +thunder +##town +##ique +##body +pin +##erry +tee +##the +##013 +udnbkk +16gb +##mic +miui +##tro +##alk +##nity +s4 +##oa +docomo +##tf +##ack +fc2 +##ded +##sco +##014 +##rite +linkedin +##ada +##now +##ndy +ucbug +sputniknews +legalminer +##ika +##xp +##bu +q10 +##rman +cheese +ming +maker +##gm +nikon +##fig +ppi +jchere +ted +fgo +tech +##tto +##gl +##len +hair +img +##pper +##a1 +acca +##ition +##ference +suite +##ig +##mond +##cation +##pr +101vip +##999 +64gb +airport +##over +##ith +##su +town +piece +##llo +no1 +##qi +focus +reader +##admin +##ora +false +##log +##ces +##ume +motel +##oper +flickr +netcomponents +##af +pose +##ound +##cg +##site +##iko +con +##ath +##hip +##rey +cream +##cks +012 +##dp +facebooktwitterpinterestgoogle +sso +shtml +swiss +##mw +lumia +xdd +tiffany +insee +russell +dell +##ations +camera +##vs +##flow +##late +classic +##nter +##ever +##lab +##nger +qe +##cing +editor +##nap +sunday +##ens +##700 +##bra +acg +sofascore +mkv +##ign +jonathan +build +labels +##oto +tesla +moba +gohappy +ajax +##test +##urs +wps +fedora +##ich +mozilla +##480 +##dr +urn +##lina +grace +##die +##try +##ader +elle +##chen +price +##ten +uhz +##ough +##hen +states +push +session +balance +wow +##cus +##py +##ward +##ep +34e +wong +prada +##cle +##ree +q4 +##ctive +##ool +##ira +##163 +rq +buffet +e6 +##ez +##card +##cha +day3 +eye +##end +adi +tvbs +##ala +nova +##tail +##ries +##ved +base +##ways +hero +hgih +profile +fish +mu +ssh +##wd +click +cake +##ond +pre +##tom +kic +pixel +##ov +##fl +product +6a +##pd +dear +##gate +yumi +##sky +bin +##ture +##ape +isis +nand +##101 +##load +##ream +a6 +##post +##we +zenfone +##ike +gd +forum +jessica +##ould +##ious +lohasthree +##gar +##ggle +##ric +##own +eclipse +##side +061 +##other +##tech +##ator +engine +##ged +plaza +##fit +westbrook +reuters +##ily +contextlink +##hn +##cil +##cel +cambridge +##ize +##aid +##data +frm +##head +butler +##sun +##mar +puma +pmid +kitchen +##lic +day1 +##text +##page +##rris +pm1 +##ket +trackback +##hai +display +##hl +idea +##sent +airmail +##ug +##men +028 +##lution +schemas +asics +wikipedia +##tional +##vy +##dget +##ein +contact +pepper +##uel +##ument +##hang +q5 +##sue +##ndi +swatch +##cept +popular +##ste +##tag +trc +##west +##live +honda +ping +messenger +##rap +v9 +unity +appqq +leo +##tone +##ass +uniqlo +##010 +moneydj +##tical +12306 +##m2 +coc +miacare +##mn +tmt +##core +vim +kk +##may +target +##2c +##ope +omega +pinkoi +##rain +##ement +p9 +rd +##tier +##vic +zone +isofix +cpa +kimi +##lay +lulu +##uck +050 +weeks +##hop +##ear +eia +##fly +korea +boost +##ship +eur +valley +##iel +##ude +rn +##ena +feed +5757 +qqmei +##thing +aws +pink +##ters +##kin +board +##vertisement +wine +##ien +##dge +##tant +##twitter +##3c +cool1 +##012 +##150 +##fu +##iner +googlemsn +pixnetfacebookyahoo +x7 +##uce +sao +##ev +##file +9678 +xddd +shirt +##rio +##hat +givenchy +bang +##lio +monday +##abc +ubuntuforumwikilinuxpastechat +##vc +##rity +7866 +##ost +imsean +tiger +##fet +dji +##come +##beth +##aft +##don +3p +emma +##khz +x6 +##face +pptv +x4 +##mate +sophie +##jing +fifa +##mand +sale +inwedding +##gn +##mmy +##pmlast +nana +##wu +note7 +##340 +##bel +window +##dio +##ht +##ivity +domain +neo +##isa +##lter +5k +f5 +##cts +ft +zol +##act +mwc +nbapop +eds +##room +previous +tomtom +##ets +5t +chi +##hg +fairmont +gay +1b +##raph +##ils +i3 +avenue +##host +##bon +##tsu +message +navigation +fintech +h6 +##ject +##vas +##firm +credit +##wf +xxxx +##nor +##space +huawei +plan +json +sbl +##dc +wish +##120 +##sol +windows7 +washington +##nsis +lo +##sio +##ym +##bor +planet +##wt +gpa +##tw +##oka +connect +##rss +##work +##atus +chicken +##times +fa +##ather +##cord +009 +##eep +hitachi +##pan +disney +##press +wind +frigidaire +##tl +hsu +##ull +expedia +archives +##wei +cut +ins +6gb +brand +cf1 +##rip +##nis +128gb +3t +##oon +quick +15058 +wing +##bug +##cms +##dar +##oh +zoom +trip +##nba +rcep +aspx +080 +gnu +##count +##url +##ging +8591 +am09 +shadow +##cia +emily +##tation +host +ff +techorz +##mini +##mporary +##ering +##next +cma +##mbps +##gas +##ift +##dot +amana +##ros +##eet +##ible +##aka +##lor +maggie +##011 +##iu +##gt +1tb +articles +##burg +##iki +database +fantasy +##rex +##cam +dlc +dean +##you +path +gaming +victoria +maps +##lee +##itor +overchicstoretvhome +##xt +##nan +x9 +install +##ann +##ph +##rcle +##nic +##nar +metro +chocolate +##rian +##table +skin +##sn +mountain +##0mm +inparadise +7x24 +##jia +eeworld +creative +g5 +parker +ecfa +village +sylvia +hbl +##ques +##onsored +##x2 +##v4 +##tein +ie6 +##stack +ver +##ads +##baby +bbe +##110 +##lone +##uid +ads +022 +gundam +006 +scrum +match +##ave +##470 +##oy +##talk +glass +lamigo +##eme +##a5 +wade +kde +##lace +ocean +tvg +##covery +##r3 +##ners +##rea +##aine +cover +##ision +##sia +##bow +msi +##love +soft +z2 +##pl +mobil +##uy +nginx +##oi +##rr +6221 +##mple +##sson +##nts +91tv +comhd +crv3000 +##uard +gallery +##bia +rate +spf +redis +traction +icloud +011 +jose +##tory +sohu +899 +kicstart2 +##hia +##sit +##walk +##xure +500g +##pact +xa +carlo +##250 +##walker +##can +cto +gigi +pen +##hoo +ob +##yy +13913459 +##iti +mango +##bbs +sense +oxford +walker +jennifer +##ola +course +##bre +##pus +##rder +lucky +075 +ivy +##nia +sotheby +##ugh +joy +##orage +##ush +##bat +##dt +r9 +##2d +##gio +wear +##lax +##moon +seven +lonzo +8k +evolution +##kk +kd +arduino +##lux +arpg +##rdon +cook +##x5 +five +##als +##ida +sign +##nda +##posted +fresh +##mine +##skip +##form +##ssion +##tee +dyson +stage +##jie +##night +epson +pack +##ppy +wd +##eh +##rence +##lvin +golden +discovery +##trix +##n2 +loft +##uch +##dra +##sse +1mdb +welcome +##urn +gaga +##lmer +teddy +##160 +##f2016 +##sha +rar +holiday +074 +##vg +##nos +##rail +gartner +gi +6p +##dium +kit +b3 +eco +sean +##stone +nu +##np +f16 +write +029 +m5 +##ias +##dk +fsm +52kb +##xxx +##cake +lim +ru +1v +##ification +published +angela +16g +analytics +##nel +gmt +##icon +##bby +ios11 +waze +9985 +##ust +##007 +delete +52sykb +wwdc +027 +##fw +1389 +##xon +brandt +##ses +##dragon +vetements +anne +monte +official +##ere +##nne +##oud +etnews +##a2 +##graphy +##rtex +##gma +mount +archive +morning +tan +ddos +e7 +day4 +factory +bruce +##ito +guest +##lling +n3 +mega +women +dac +church +##jun +singapore +##facebook +6991 +starbucks +##tos +##stin +##shine +zen +##mu +tina +request +##gence +q7 +##zzi +diary +##tore +##ead +cst +##osa +canada +va +##jiang +##lam +##nix +##sday +g6 +##master +bing +##zl +nb40 +thai +ln284ct +##itz +##2f +bonnie +##food +##lent +originals +##stro +##lts +##bscribe +ntd +yesstyle +hmv +##tment +d5 +##pn +topios9 +lifestyle +virtual +##ague +xz +##deo +muji +024 +unt +##nnis +faq1 +##ette +curry +##pop +release +##cast +073 +##ews +5c +##stle +ios7 +##ima +dog +lenovo +##r4 +013 +vornado +##desk +##ald +9595 +##van +oil +common +##jy +##lines +g7 +twice +ella +nano +belle +##mes +##self +##note +benz +##ova +##wing +kai +##hua +##rect +rainer +##unge +##0m +guestname +##uma +##kins +##zu +tokichoi +##price +##med +##mus +rmk +address +vm +openload +##group +##hin +##iginal +amg +urban +##oz +jobs +##public +##sch +##dden +##bell +hostel +##drive +##rmin +boot +##370 +##fx +##nome +##ctionary +##oman +##lish +##cr +##hm +##how +francis +c919 +b5 +evernote +##uc +##3000 +coupe +##urg +##cca +##uality +019 +##ett +##ani +##tax +##rma +leonnhurt +##jin +ict +bird +notes +##dical +##lli +result +iu +ee +smap +gopro +##last +yin +pure +32g +##dan +##rame +mama +##oot +bean +##hur +2l +bella +sync +xuite +##ground +discuz +##getrelax +##ince +##bay +##5s +apt +##pass +jing +##rix +rich +niusnews +##ello +bag +##eting +##mobile +##ience +details +universal +silver +dit +private +ddd +u11 +kanshu +##ified +fung +##nny +dx +##520 +tai +023 +##fr +##lean +##pin +##rin +ly +rick +##bility +banner +##baru +##gion +vdf +qualcomm +bear +oldid +ian +jo +##tors +population +##ernel +##mv +##bike +ww +##ager +exhibition +##del +##pods +fpx +structure +##free +##tings +kl +##rley +##copyright +##mma +orange +yoga +4l +canmake +honey +##anda +nikkie +dhl +publishing +##mall +##gnet +e88 +##dog +fishbase +### +##[ +。 +! +? +! +? +; +: +; +##, +##的 +##、 +##一 +##人 +##有 +##是 +##在 +##中 +##为 +##和 +##了 +##不 +##年 +##学 +##大 +##国 +##生 +##以 +##“ +##” +##作 +##业 +##个 +##上 +##用 +##, +##地 +##会 +##成 +##发 +##工 +##时 +##于 +##理 +##出 +##行 +##要 +##. +##等 +##他 +##到 +##之 +##这 +##可 +##后 +##家 +##对 +##能 +##公 +##与 +##》 +##《 +##主 +##方 +##分 +##经 +##来 +##全 +##其 +##部 +##多 +##产 +##自 +##文 +##高 +##动 +##进 +##法 +##化 +##: +##我 +##面 +##) +##( +##实 +##教 +##建 +##体 +##而 +##长 +##子 +##下 +##现 +##开 +##本 +##力 +##定 +##性 +##过 +##设 +##合 +##小 +##同 +##机 +##市 +##品 +##水 +##新 +##内 +##事 +##也 +##种 +##及 +##制 +##入 +##所 +##心 +##务 +##就 +##管 +##们 +##得 +##展 +##重 +##民 +##加 +##区 +##物 +##者 +##通 +##天 +##政 +##三 +##电 +##关 +##度 +##第 +##名 +##术 +##最 +##系 +##月 +##外 +##资 +##日 +##代 +##员 +##如 +##间 +##位 +##并 +##书 +##科 +##村 +##应 +##量 +##道 +##前 +##当 +##无 +##里 +##相 +##平 +##从 +##计 +##提 +##保 +##任 +##程 +##技 +##都 +##研 +##十 +##基 +##特 +##好 +##被 +##或 +##目 +##将 +##使 +##山 +##二 +##说 +##数 +##点 +##明 +##情 +##元 +##着 +##收 +##组 +##然 +##美 +##各 +##由 +##场 +##金 +##形 +##农 +##期 +##因 +##表 +##此 +##色 +##起 +##还 +##立 +##世 +##安 +##活 +##专 +##质 +##规 +##社 +##万 +##信 +##西 +##统 +##结 +##路 +##利 +##次 +##南 +##式 +##意 +##级 +##常 +##师 +##校 +##你 +##育 +##果 +##究 +##司 +##服 +##门 +##海 +##导 +##流 +##项 +##她 +##总 +##处 +##两 +##传 +##东 +##正 +##省 +##院 +##户 +##手 +##具 +##原 +##强 +##北 +##向 +##先 +##但 +##米 +##城 +##企 +##件 +##风 +##军 +##身 +##更 +##知 +##已 +##气 +##战 +##至 +##单 +##口 +##集 +##创 +##解 +##四 +##标 +##交 +##比 +##商 +##论 +##界 +##题 +##变 +##花 +##改 +##类 +##运 +##指 +##型 +##调 +##女 +##神 +##接 +##造 +##受 +##广 +##只 +##委 +##去 +##共 +##治 +##达 +##持 +##条 +##网 +##头 +##构 +##县 +##些 +##该 +##又 +##那 +##想 +##样 +##办 +##济 +##格 +##责 +##车 +##很 +##施 +##求 +##己 +##光 +##精 +##林 +##完 +##爱 +##线 +##参 +##少 +##积 +##清 +##看 +##优 +##报 +##王 +##直 +##没 +##每 +##据 +##游 +##效 +##感 +##五 +##影 +##别 +##获 +##领 +##称 +##选 +##供 +##乐 +##老 +##么 +##台 +##问 +##划 +##带 +##器 +##源 +##织 +##放 +##深 +##备 +##视 +##白 +##功 +##取 +##装 +##营 +##见 +##记 +##环 +##队 +##节 +##准 +##石 +##它 +##回 +##历 +##负 +##真 +##增 +##医 +##联 +##做 +##职 +##容 +##士 +##包 +##义 +##观 +##团 +##病 +##府 +##息 +##则 +##考 +##料 +##华 +##州 +##语 +##证 +##整 +##让 +##江 +##史 +##空 +##验 +##需 +##支 +##命 +##给 +##离 +##认 +##艺 +##较 +##土 +##古 +##养 +##才 +##境 +##推 +##把 +##均 +##图 +##际 +##斯 +##近 +##片 +##局 +##修 +##字 +##德 +##权 +##步 +##始 +##复 +##转 +##协 +##即 +##打 +##画 +##投 +##决 +##何 +##约 +##反 +##费 +##议 +##护 +##极 +##河 +##房 +##查 +##布 +##思 +##干 +##价 +##儿 +##非 +##马 +##党 +##奖 +##模 +##故 +##编 +##音 +##范 +##识 +##率 +##存 +##引 +##客 +##属 +##评 +##采 +##尔 +##配 +##镇 +##室 +##再 +##案 +##监 +##习 +##注 +##根 +##克 +##演 +##食 +##族 +##示 +##球 +##状 +##青 +##号 +##张 +##百 +##素 +##首 +##易 +##热 +##阳 +##今 +##园 +##防 +##版 +##太 +##乡 +##英 +##材 +##列 +##便 +##写 +##住 +##置 +##层 +##助 +##确 +##试 +##难 +##承 +##象 +##居 +##黄 +##快 +##断 +##维 +##却 +##红 +##速 +##连 +##众 +##细 +##态 +##话 +##周 +##言 +##药 +##培 +##血 +##亩 +##龙 +##越 +##值 +##几 +##边 +##读 +##未 +##曾 +##测 +##算 +##京 +##景 +##余 +##站 +##低 +##温 +##消 +##必 +##切 +##依 +##随 +##且 +##志 +##卫 +##域 +##照 +##许 +##限 +##著 +##销 +##落 +##足 +##适 +##争 +##策 +##控 +##武 +##按 +##初 +##角 +##核 +##死 +##检 +##富 +##满 +##显 +##审 +##除 +##致 +##亲 +##占 +##失 +##星 +##章 +##善 +##续 +##千 +##叶 +##火 +##副 +##告 +##段 +##什 +##声 +##终 +##况 +##走 +##木 +##益 +##戏 +##独 +##纪 +##植 +##财 +##群 +##六 +##赛 +##远 +##拉 +##亚 +##密 +##排 +##超 +##像 +##课 +##围 +##往 +##响 +##击 +##疗 +##念 +##八 +##云 +##险 +##律 +##请 +##革 +##诗 +##批 +##底 +##压 +##双 +##男 +##训 +##例 +##汉 +##升 +##拥 +##势 +##酒 +##眼 +##官 +##牌 +##油 +##曲 +##友 +##望 +##黑 +##歌 +##筑 +##础 +##香 +##仅 +##担 +##括 +##湖 +##严 +##秀 +##剧 +##九 +##举 +##执 +##充 +##兴 +##督 +##博 +##草 +##般 +##李 +##健 +##喜 +##授 +##普 +##预 +##灵 +##突 +##良 +##款 +##罗 +##微 +##七 +##录 +##朝 +##飞 +##宝 +##令 +##轻 +##劳 +##距 +##异 +##简 +##兵 +##树 +##序 +##候 +##含 +##福 +##尽 +##留 +##丰 +##旅 +##征 +##临 +##破 +##移 +##篇 +##抗 +##典 +##端 +##苏 +##奇 +##止 +##康 +##店 +##毛 +##觉 +##春 +##售 +##络 +##降 +##板 +##坚 +##母 +##讲 +##早 +##印 +##略 +##孩 +##夫 +##藏 +##铁 +##害 +##互 +##帝 +##田 +##融 +##皮 +##宗 +##岁 +##载 +##析 +##斗 +##须 +##伤 +##介 +##另 +##半 +##班 +##馆 +##味 +##楼 +##卡 +##射 +##述 +##杀 +##波 +##绿 +##免 +##兰 +##绝 +##刻 +##短 +##察 +##输 +##择 +##综 +##杂 +##份 +##纳 +##父 +##词 +##银 +##送 +##座 +##左 +##继 +##固 +##宣 +##厂 +##肉 +##换 +##补 +##税 +##派 +##套 +##欢 +##播 +##吸 +##圆 +##攻 +##阿 +##购 +##听 +##右 +##减 +##激 +##巴 +##背 +##够 +##遇 +##智 +##玉 +##找 +##宽 +##陈 +##练 +##追 +##毕 +##彩 +##软 +##帮 +##股 +##荣 +##托 +##予 +##佛 +##堂 +##障 +##皇 +##若 +##守 +##似 +##届 +##待 +##货 +##散 +##额 +##尚 +##穿 +##丽 +##骨 +##享 +##差 +##针 +##索 +##稳 +##宁 +##贵 +##酸 +##液 +##唐 +##操 +##探 +##玩 +##促 +##笔 +##库 +##救 +##虽 +##久 +##闻 +##顶 +##床 +##港 +##鱼 +##亿 +##登 +##永 +##毒 +##桥 +##冷 +##魔 +##秘 +##陆 +##您 +##童 +##归 +##侧 +##沙 +##染 +##封 +##紧 +##松 +##川 +##刘 +##雄 +##希 +##毫 +##卷 +##某 +##季 +##菜 +##庭 +##附 +##逐 +##夜 +##宫 +##洲 +##退 +##顾 +##尼 +##胜 +##剂 +##纯 +##舞 +##遗 +##苦 +##梦 +##挥 +##航 +##愿 +##街 +##招 +##矿 +##夏 +##盖 +##献 +##怎 +##茶 +##申 +##吧 +##脑 +##亦 +##吃 +##频 +##宋 +##央 +##威 +##厚 +##块 +##冲 +##叫 +##熟 +##礼 +##厅 +##否 +##渐 +##笑 +##钱 +##钟 +##甚 +##牛 +##丝 +##靠 +##岛 +##绍 +##盘 +##缘 +##聚 +##静 +##雨 +##氏 +##圣 +##顺 +##唱 +##刊 +##阶 +##困 +##急 +##饰 +##弹 +##庄 +##既 +##野 +##阴 +##混 +##饮 +##损 +##齐 +##末 +##错 +##轮 +##宜 +##鲜 +##兼 +##敌 +##粉 +##祖 +##延 +##钢 +##辑 +##欧 +##硬 +##甲 +##诉 +##册 +##痛 +##订 +##缺 +##晚 +##衣 +##佳 +##脉 +##盛 +##乎 +##拟 +##贸 +##扩 +##船 +##仪 +##谁 +##警 +##停 +##席 +##竞 +##释 +##庆 +##汽 +##仍 +##掌 +##诸 +##仙 +##弟 +##吉 +##洋 +##奥 +##票 +##危 +##架 +##买 +##径 +##塔 +##休 +##付 +##恶 +##雷 +##怀 +##秋 +##借 +##巨 +##透 +##誉 +##厘 +##句 +##跟 +##胞 +##婚 +##幼 +##烈 +##峰 +##寻 +##君 +##汇 +##趣 +##纸 +##假 +##肥 +##患 +##杨 +##雅 +##罪 +##谓 +##亮 +##脱 +##寺 +##烟 +##判 +##绩 +##乱 +##刚 +##摄 +##洞 +##践 +##码 +##启 +##励 +##呈 +##曰 +##呢 +##符 +##哥 +##媒 +##疾 +##坐 +##雪 +##孔 +##倒 +##旧 +##菌 +##岩 +##鼓 +##亡 +##访 +##症 +##暗 +##湾 +##幸 +##池 +##讨 +##努 +##露 +##吗 +##繁 +##途 +##殖 +##败 +##蛋 +##握 +##刺 +##耕 +##洗 +##沉 +##概 +##哈 +##泛 +##凡 +##残 +##隐 +##虫 +##朋 +##虚 +##餐 +##殊 +##慢 +##询 +##蒙 +##孙 +##谈 +##鲁 +##裂 +##贴 +##污 +##漫 +##谷 +##违 +##泉 +##拿 +##森 +##横 +##扬 +##键 +##膜 +##迁 +##尤 +##涉 +##净 +##诚 +##折 +##冰 +##械 +##拍 +##梁 +##沿 +##避 +##吴 +##惊 +##犯 +##灭 +##湿 +##迷 +##姓 +##阅 +##灯 +##妇 +##触 +##冠 +##答 +##俗 +##档 +##尊 +##谢 +##措 +##筹 +##竟 +##韩 +##签 +##剑 +##鉴 +##灾 +##贯 +##迹 +##洛 +##沟 +##束 +##翻 +##巧 +##坏 +##弱 +##零 +##壁 +##枝 +##映 +##恩 +##抓 +##屋 +##呼 +##脚 +##绘 +##淡 +##辖 +##伊 +##粒 +##欲 +##震 +##伯 +##私 +##蓝 +##甘 +##储 +##胡 +##卖 +##梅 +##耳 +##疑 +##润 +##伴 +##泽 +##牧 +##烧 +##尾 +##累 +##糖 +##怪 +##唯 +##莫 +##粮 +##柱 +##竹 +##灰 +##岸 +##缩 +##井 +##伦 +##柔 +##盟 +##珠 +##丹 +##皆 +##哪 +##迎 +##颜 +##衡 +##啊 +##塑 +##寒 +##紫 +##镜 +##氧 +##误 +##伍 +##彻 +##刀 +##览 +##炎 +##津 +##耐 +##秦 +##尖 +##潮 +##描 +##浓 +##召 +##禁 +##阻 +##胶 +##译 +##腹 +##泰 +##乃 +##盐 +##潜 +##鸡 +##诺 +##遍 +##纹 +##冬 +##牙 +##麻 +##辅 +##猪 +##弃 +##楚 +##羊 +##晋 +##鸟 +##赵 +##洁 +##谋 +##隆 +##滑 +##籍 +##臣 +##朱 +##泥 +##墨 +##辆 +##墙 +##浪 +##姐 +##赏 +##纵 +##拔 +##倍 +##纷 +##摩 +##壮 +##苗 +##偏 +##塞 +##贡 +##仁 +##宇 +##卵 +##瓦 +##枪 +##覆 +##殿 +##刑 +##贫 +##妈 +##幅 +##幕 +##忆 +##丁 +##估 +##废 +##萨 +##舍 +##详 +##旗 +##岗 +##洪 +##贝 +##迅 +##凭 +##勇 +##雕 +##奏 +##旋 +##杰 +##煤 +##阵 +##乘 +##溪 +##奉 +##畜 +##挑 +##昌 +##硕 +##庙 +##惠 +##薄 +##逃 +##爆 +##哲 +##浙 +##珍 +##炼 +##栏 +##暴 +##币 +##隔 +##吨 +##倾 +##嘉 +##址 +##陶 +##绕 +##诊 +##遭 +##桃 +##魂 +##兽 +##豆 +##闲 +##箱 +##拓 +##燃 +##裁 +##晶 +##掉 +##脂 +##溶 +##顿 +##肤 +##虑 +##鬼 +##灌 +##徐 +##龄 +##陵 +##恋 +##侵 +##坡 +##寿 +##勤 +##磨 +##妹 +##瑞 +##缓 +##轴 +##麦 +##羽 +##咨 +##凝 +##默 +##驻 +##敢 +##债 +##浮 +##幻 +##株 +##浅 +##敬 +##敏 +##陷 +##凤 +##坛 +##虎 +##乌 +##铜 +##御 +##乳 +##讯 +##循 +##圈 +##肌 +##妙 +##奋 +##忘 +##闭 +##墓 +##汤 +##忠 +##跨 +##怕 +##振 +##宾 +##跑 +##屏 +##坦 +##粗 +##租 +##悲 +##伟 +##拜 +##妻 +##赞 +##兄 +##宿 +##碑 +##貌 +##勒 +##罚 +##夺 +##偶 +##截 +##纤 +##齿 +##郑 +##聘 +##偿 +##扶 +##豪 +##慧 +##跳 +##疏 +##莱 +##腐 +##插 +##恐 +##郎 +##辞 +##挂 +##娘 +##肿 +##徒 +##伏 +##磁 +##杯 +##丛 +##旨 +##琴 +##炮 +##醒 +##砖 +##替 +##辛 +##暖 +##锁 +##杜 +##肠 +##孤 +##饭 +##脸 +##邮 +##贷 +##俄 +##毁 +##荷 +##谐 +##荒 +##肝 +##链 +##尺 +##尘 +##援 +##疫 +##崇 +##恢 +##扎 +##伸 +##幽 +##抵 +##胸 +##谱 +##舒 +##迫 +##畅 +##泡 +##岭 +##喷 +##窗 +##捷 +##宏 +##肯 +##狂 +##铺 +##骑 +##抽 +##券 +##俱 +##徽 +##胆 +##碎 +##邀 +##褐 +##斤 +##涂 +##赋 +##署 +##颗 +##渠 +##仿 +##迪 +##炉 +##辉 +##涵 +##耗 +##返 +##邻 +##斑 +##董 +##魏 +##午 +##娱 +##浴 +##尿 +##曼 +##锅 +##柳 +##舰 +##搭 +##旁 +##宅 +##趋 +##凉 +##赢 +##伙 +##爷 +##廷 +##戴 +##壤 +##奶 +##页 +##玄 +##驾 +##阔 +##轨 +##朗 +##捕 +##肾 +##稿 +##惯 +##侯 +##乙 +##渡 +##稍 +##恨 +##脏 +##姆 +##腔 +##抱 +##杆 +##垂 +##赴 +##赶 +##莲 +##辽 +##荐 +##旦 +##妖 +##稀 +##驱 +##沈 +##役 +##晓 +##亭 +##仲 +##澳 +##炸 +##绪 +##陕 +##恒 +##堡 +##纠 +##仇 +##懂 +##焦 +##搜 +##忍 +##贤 +##添 +##艾 +##赤 +##犹 +##尝 +##锦 +##稻 +##撰 +##填 +##衰 +##栽 +##邪 +##粘 +##跃 +##桌 +##胃 +##悬 +##翼 +##彼 +##睡 +##曹 +##刷 +##摆 +##悉 +##锋 +##摇 +##抢 +##乏 +##廉 +##鼠 +##盾 +##瓷 +##抑 +##埃 +##邦 +##遂 +##寸 +##渔 +##祥 +##胎 +##牵 +##壳 +##甜 +##卓 +##瓜 +##袭 +##遵 +##巡 +##逆 +##玛 +##韵 +##桑 +##酷 +##赖 +##桂 +##郡 +##肃 +##仓 +##寄 +##塘 +##瘤 +##碳 +##搞 +##燕 +##蒸 +##允 +##忽 +##斜 +##穷 +##郁 +##囊 +##奔 +##昆 +##盆 +##愈 +##递 +##黎 +##祭 +##怒 +##辈 +##腺 +##滚 +##暂 +##郭 +##璃 +##踪 +##芳 +##碍 +##肺 +##狱 +##冒 +##阁 +##砂 +##苍 +##揭 +##踏 +##颇 +##柄 +##闪 +##孝 +##葡 +##腾 +##茎 +##鸣 +##撤 +##仰 +##伐 +##丘 +##於 +##泪 +##荡 +##扰 +##纲 +##拼 +##欣 +##纽 +##癌 +##堆 +##菲 +##披 +##挖 +##寓 +##履 +##捐 +##悟 +##乾 +##嘴 +##钻 +##拳 +##吹 +##柏 +##遥 +##抚 +##忧 +##赠 +##霸 +##艰 +##淋 +##猫 +##帅 +##奈 +##寨 +##滴 +##鼻 +##掘 +##狗 +##驶 +##朴 +##拆 +##惜 +##玻 +##扣 +##萄 +##蔬 +##宠 +##缴 +##赫 +##凯 +##滨 +##乔 +##腰 +##葬 +##孟 +##吾 +##枚 +##圳 +##忙 +##扫 +##杭 +##凌 +##梯 +##丈 +##隶 +##剪 +##盗 +##擅 +##疆 +##弯 +##携 +##拒 +##秒 +##颁 +##醇 +##割 +##浆 +##姑 +##爸 +##螺 +##穗 +##缝 +##慈 +##喝 +##瓶 +##漏 +##悠 +##猎 +##番 +##孕 +##伪 +##漂 +##腿 +##吐 +##坝 +##滤 +##函 +##匀 +##偷 +##浩 +##矛 +##僧 +##辨 +##俊 +##棉 +##铸 +##诞 +##丧 +##夹 +##姿 +##睛 +##淮 +##阀 +##姜 +##尸 +##猛 +##芽 +##账 +##旱 +##醉 +##弄 +##坊 +##烤 +##萧 +##矣 +##雾 +##倡 +##榜 +##弗 +##氨 +##朵 +##锡 +##袋 +##拨 +##湘 +##岳 +##烦 +##肩 +##熙 +##炭 +##婆 +##棋 +##禅 +##穴 +##宙 +##汗 +##艳 +##儒 +##叙 +##晨 +##颈 +##峡 +##拖 +##烂 +##茂 +##戒 +##飘 +##氛 +##蒂 +##撞 +##瓣 +##箭 +##叛 +##鞋 +##劲 +##祝 +##娜 +##饲 +##侍 +##诱 +##叹 +##卢 +##弥 +##鼎 +##厦 +##屈 +##慕 +##魅 +##厨 +##嫁 +##绵 +##逼 +##扮 +##叔 +##酶 +##燥 +##狼 +##滋 +##汁 +##辐 +##怨 +##翅 +##佩 +##坑 +##旬 +##沃 +##剩 +##蛇 +##颖 +##篮 +##锐 +##侠 +##匹 +##唤 +##熊 +##漠 +##迟 +##敦 +##雌 +##谨 +##婴 +##浸 +##磷 +##筒 +##滩 +##埋 +##框 +##弘 +##吕 +##碰 +##纺 +##硫 +##堪 +##契 +##蜜 +##蓄 +##阐 +##傲 +##碱 +##晰 +##狭 +##撑 +##叉 +##卧 +##劫 +##闹 +##赐 +##邓 +##奴 +##溉 +##浦 +##蹈 +##辣 +##遣 +##耀 +##耶 +##翠 +##叠 +##迈 +##霍 +##碧 +##恰 +##脊 +##昭 +##摸 +##饱 +##赔 +##泄 +##哭 +##讼 +##逝 +##逻 +##廊 +##擦 +##渗 +##彰 +##卿 +##旺 +##宪 +##顷 +##妆 +##陪 +##葛 +##仔 +##淀 +##翰 +##悦 +##穆 +##煮 +##辩 +##弦 +##串 +##押 +##蚀 +##逢 +##贺 +##焊 +##煌 +##缔 +##惑 +##鹿 +##袁 +##糊 +##逸 +##舟 +##勃 +##侦 +##涯 +##蔡 +##辟 +##涌 +##枯 +##痕 +##疼 +##莉 +##柴 +##眉 +##罢 +##催 +##衔 +##秉 +##妃 +##鸿 +##傅 +##辰 +##聪 +##咸 +##扇 +##盈 +##勘 +##佐 +##泊 +##抛 +##搬 +##牢 +##宴 +##牲 +##贾 +##摘 +##姻 +##慎 +##帕 +##忌 +##卒 +##夕 +##卜 +##惟 +##挺 +##崖 +##炒 +##爵 +##冻 +##椒 +##鳞 +##祸 +##潭 +##腊 +##蒋 +##缠 +##寂 +##眠 +##冯 +##芯 +##槽 +##吊 +##聊 +##梗 +##嫩 +##凶 +##铭 +##爽 +##筋 +##韦 +##脾 +##铝 +##肢 +##栋 +##勾 +##萌 +##渊 +##掩 +##狮 +##撒 +##漆 +##骗 +##禽 +##蕴 +##坪 +##洒 +##冶 +##兹 +##椭 +##喻 +##泵 +##哀 +##翔 +##棒 +##芝 +##扑 +##毅 +##衍 +##惨 +##疯 +##欺 +##贼 +##肖 +##轰 +##巢 +##臂 +##轩 +##扁 +##淘 +##犬 +##宰 +##祠 +##挡 +##厌 +##帐 +##蜂 +##狐 +##垃 +##昂 +##圾 +##秩 +##芬 +##瞬 +##枢 +##舌 +##唇 +##棕 +##霞 +##霜 +##艇 +##侨 +##鹤 +##硅 +##靖 +##哦 +##削 +##泌 +##奠 +##吏 +##夷 +##咖 +##彭 +##窑 +##胁 +##肪 +##贞 +##劝 +##钙 +##柜 +##鸭 +##庞 +##兔 +##荆 +##丙 +##纱 +##戈 +##藤 +##矩 +##泳 +##惧 +##铃 +##渴 +##胀 +##袖 +##丸 +##狠 +##豫 +##茫 +##浇 +##菩 +##氯 +##啡 +##葱 +##梨 +##霉 +##脆 +##氢 +##巷 +##丑 +##娃 +##锻 +##愤 +##贪 +##蝶 +##厉 +##闽 +##浑 +##斩 +##栖 +##茅 +##昏 +##龟 +##碗 +##棚 +##滞 +##慰 +##斋 +##虹 +##屯 +##萝 +##饼 +##窄 +##潘 +##绣 +##丢 +##芦 +##鳍 +##裕 +##誓 +##腻 +##锈 +##吞 +##蜀 +##啦 +##扭 +##巩 +##髓 +##劣 +##拌 +##谊 +##涛 +##勋 +##郊 +##莎 +##痴 +##窝 +##驰 +##跌 +##笼 +##挤 +##溢 +##隙 +##鹰 +##诏 +##帽 +##芒 +##爬 +##凸 +##牺 +##熔 +##吻 +##竭 +##瘦 +##冥 +##搏 +##屡 +##昔 +##萼 +##愁 +##捉 +##翁 +##怖 +##汪 +##烯 +##疲 +##缸 +##溃 +##泼 +##剖 +##涨 +##橡 +##谜 +##悔 +##嫌 +##盒 +##苯 +##凹 +##绳 +##畏 +##罐 +##虾 +##柯 +##邑 +##馨 +##兆 +##帖 +##陌 +##禄 +##垫 +##壶 +##逊 +##骤 +##祀 +##晴 +##蓬 +##苞 +##煎 +##菊 +##堤 +##甫 +##拱 +##氮 +##罕 +##舶 +##伞 +##姚 +##弓 +##嵌 +##馈 +##琼 +##噪 +##雀 +##呵 +##汝 +##焉 +##陀 +##胺 +##惩 +##沼 +##枣 +##桐 +##酱 +##遮 +##孢 +##钝 +##呀 +##锥 +##妥 +##酿 +##巫 +##闯 +##沧 +##崩 +##蕊 +##酬 +##匠 +##躲 +##喊 +##琳 +##绎 +##喉 +##凰 +##抬 +##膨 +##盲 +##剥 +##喂 +##庸 +##奸 +##钩 +##冈 +##募 +##苑 +##杏 +##杉 +##辱 +##隋 +##薪 +##绒 +##欠 +##尉 +##攀 +##抹 +##巾 +##渣 +##苹 +##猴 +##悄 +##屠 +##颂 +##湛 +##魄 +##颠 +##呆 +##粤 +##岂 +##娇 +##暑 +##鹅 +##筛 +##膏 +##樱 +##缆 +##襄 +##瑟 +##恭 +##泻 +##匪 +##兮 +##恼 +##吟 +##仕 +##蔽 +##骄 +##蚕 +##斥 +##椅 +##姬 +##谦 +##椎 +##搅 +##卸 +##沫 +##怜 +##坎 +##瑰 +##钦 +##拾 +##厕 +##後 +##逾 +##薯 +##衬 +##钾 +##崔 +##稽 +##蛮 +##殷 +##晒 +##菇 +##臭 +##弧 +##擎 +##粹 +##纬 +##焰 +##玲 +##竣 +##咒 +##歇 +##糕 +##诵 +##茨 +##妮 +##酯 +##麟 +##卑 +##浏 +##咽 +##罩 +##舱 +##酵 +##晕 +##顽 +##赁 +##咬 +##枫 +##冀 +##贮 +##艘 +##亏 +##薛 +##瀑 +##篆 +##膀 +##沸 +##雍 +##咳 +##尹 +##愉 +##烹 +##坠 +##勿 +##钠 +##坤 +##甸 +##墅 +##闸 +##藻 +##韧 +##鄂 +##瑶 +##舆 +##夸 +##蕾 +##栗 +##咏 +##丞 +##抄 +##鹏 +##弊 +##檐 +##骂 +##仆 +##峻 +##爪 +##赚 +##帆 +##娶 +##嘛 +##钓 +##澄 +##猜 +##裔 +##抒 +##铅 +##卉 +##彦 +##删 +##衷 +##禹 +##寡 +##蒲 +##砌 +##棱 +##拘 +##堵 +##雁 +##仄 +##荫 +##祈 +##奢 +##赌 +##寇 +##隧 +##摊 +##雇 +##卦 +##婉 +##敲 +##挣 +##皱 +##虞 +##亨 +##懈 +##挽 +##珊 +##饶 +##滥 +##锯 +##闷 +##酮 +##虐 +##兑 +##僵 +##傻 +##沦 +##巅 +##鞭 +##梳 +##赣 +##锌 +##庐 +##薇 +##庵 +##慨 +##肚 +##妄 +##仗 +##绑 +##枕 +##牡 +##胖 +##沪 +##垒 +##捞 +##捧 +##竖 +##蜡 +##桩 +##厢 +##孵 +##黏 +##拯 +##谭 +##诈 +##灿 +##釉 +##裹 +##钮 +##俩 +##灶 +##彝 +##蟹 +##涩 +##醋 +##匙 +##歧 +##刹 +##玫 +##棘 +##橙 +##凑 +##桶 +##刃 +##伽 +##硝 +##怡 +##籽 +##敞 +##淳 +##矮 +##镶 +##戚 +##幢 +##涡 +##尧 +##膝 +##哉 +##肆 +##畔 +##溯 +##媚 +##烘 +##窃 +##焚 +##澜 +##愚 +##棵 +##乞 +##佑 +##暨 +##敷 +##饥 +##俯 +##蔓 +##暮 +##砍 +##邵 +##仑 +##毗 +##剿 +##馀 +##锤 +##刮 +##梭 +##摧 +##掠 +##躯 +##诡 +##匈 +##侣 +##胚 +##疮 +##裙 +##裸 +##塌 +##吓 +##俘 +##糙 +##藩 +##楷 +##羞 +##鲍 +##帘 +##裤 +##宛 +##憾 +##桓 +##痰 +##寞 +##骚 +##惹 +##笋 +##萃 +##栓 +##挫 +##矢 +##垦 +##垄 +##绸 +##凄 +##镀 +##熏 +##钉 +##粪 +##缅 +##洽 +##鞘 +##蔗 +##迄 +##沐 +##凿 +##勉 +##昨 +##喘 +##爹 +##屑 +##耻 +##沥 +##庶 +##涅 +##腕 +##袍 +##懒 +##阜 +##嗜 +##朔 +##蒜 +##沛 +##坟 +##轿 +##喀 +##笛 +##狄 +##饿 +##蓉 +##泣 +##窟 +##豹 +##屿 +##崛 +##迦 +##诠 +##贬 +##腥 +##钥 +##嗣 +##瑜 +##倦 +##萎 +##拦 +##冤 +##讽 +##潇 +##谣 +##趁 +##妨 +##贩 +##萍 +##窦 +##纂 +##缀 +##矫 +##淑 +##墩 +##梵 +##沾 +##淫 +##乖 +##汰 +##莞 +##旷 +##浊 +##挚 +##撼 +##氟 +##焕 +##庚 +##掀 +##诀 +##盼 +##疹 +##窖 +##匆 +##厥 +##轧 +##淹 +##亥 +##鸦 +##棍 +##谅 +##歼 +##汕 +##挪 +##蚁 +##敛 +##魁 +##畴 +##炫 +##丫 +##奎 +##菱 +##沂 +##撕 +##阎 +##詹 +##蛛 +##靡 +##瞻 +##咱 +##愧 +##烷 +##畸 +##灸 +##眸 +##觅 +##芜 +##廓 +##斌 +##躁 +##麓 +##摔 +##烛 +##睹 +##孜 +##缚 +##堕 +##昼 +##睿 +##琪 +##琉 +##贱 +##渝 +##跋 +##茄 +##舜 +##诛 +##捣 +##芙 +##倚 +##酰 +##澈 +##慌 +##帜 +##颤 +##陇 +##颌 +##昧 +##佣 +##眷 +##徙 +##禾 +##逮 +##莹 +##碟 +##梢 +##朽 +##粥 +##喇 +##榆 +##驳 +##楔 +##啸 +##肋 +##踢 +##傍 +##桔 +##肴 +##呕 +##旭 +##埠 +##贿 +##曝 +##杖 +##俭 +##栩 +##斧 +##镁 +##匾 +##踩 +##橘 +##颅 +##囚 +##蛙 +##膳 +##坞 +##琐 +##荧 +##瘟 +##涤 +##胰 +##衫 +##噬 +##皖 +##邱 +##埔 +##汀 +##羡 +##睐 +##葵 +##耿 +##糟 +##厄 +##秧 +##黔 +##蹄 +##漳 +##鞍 +##谏 +##腋 +##簇 +##梧 +##戎 +##榴 +##诣 +##宦 +##苔 +##揽 +##簧 +##狸 +##阙 +##扯 +##耍 +##棠 +##脓 +##烫 +##翘 +##芭 +##躺 +##羁 +##藉 +##拐 +##陡 +##漓 +##棺 +##钧 +##琅 +##扔 +##寝 +##绚 +##熬 +##驿 +##邹 +##杠 +##绥 +##窥 +##晃 +##渭 +##樊 +##鑫 +##祁 +##陋 +##哺 +##堰 +##祛 +##梓 +##崎 +##孽 +##蝴 +##蔚 +##抖 +##苟 +##肇 +##溜 +##绅 +##妾 +##跪 +##沁 +##莽 +##虏 +##瞄 +##砸 +##稚 +##僚 +##崭 +##迭 +##皂 +##彬 +##雏 +##羲 +##缕 +##绞 +##俞 +##簿 +##耸 +##廖 +##嘲 +##翌 +##榄 +##裴 +##槐 +##洼 +##睁 +##灼 +##啤 +##臀 +##啥 +##濒 +##醛 +##峨 +##葫 +##悍 +##笨 +##嘱 +##稠 +##韶 +##陛 +##峭 +##酚 +##翩 +##舅 +##寅 +##蕉 +##阮 +##垣 +##戮 +##趾 +##犀 +##巍 +##霄 +##饪 +##秆 +##朕 +##驼 +##肛 +##揉 +##楠 +##岚 +##疡 +##帧 +##柑 +##赎 +##逍 +##滇 +##璋 +##礁 +##黛 +##钞 +##邢 +##涧 +##劈 +##瞳 +##砚 +##驴 +##锣 +##恳 +##栅 +##吵 +##牟 +##沌 +##瞩 +##咪 +##毯 +##炳 +##淤 +##盯 +##芋 +##粟 +##栈 +##戊 +##盏 +##峪 +##拂 +##暇 +##酥 +##汛 +##嚣 +##轼 +##妒 +##匿 +##鸽 +##蝉 +##痒 +##宵 +##瘫 +##璧 +##汲 +##冢 +##碌 +##琢 +##磅 +##卤 +##剔 +##谎 +##圩 +##酌 +##捏 +##渺 +##媳 +##穹 +##谥 +##骏 +##哨 +##骆 +##乒 +##摹 +##兜 +##柿 +##喧 +##呜 +##捡 +##橄 +##逗 +##瑚 +##呐 +##檀 +##辜 +##妊 +##祯 +##苷 +##衙 +##笃 +##芸 +##霖 +##荔 +##闺 +##羌 +##芹 +##哼 +##糯 +##吼 +##蕃 +##嵩 +##矶 +##绽 +##坯 +##娠 +##祷 +##锰 +##瘀 +##岐 +##茵 +##筝 +##斐 +##肽 +##歉 +##嗽 +##恤 +##汶 +##聂 +##樟 +##擒 +##鹃 +##拙 +##鲤 +##絮 +##鄙 +##彪 +##嗓 +##墟 +##骼 +##渤 +##僻 +##豁 +##谕 +##荟 +##姨 +##婷 +##挠 +##哇 +##炙 +##诅 +##娥 +##哑 +##阱 +##嫉 +##圭 +##乓 +##橱 +##歪 +##禧 +##甩 +##坷 +##晏 +##驯 +##讳 +##泗 +##煞 +##淄 +##倪 +##妓 +##窍 +##竿 +##襟 +##匡 +##钛 +##侈 +##侄 +##铲 +##哮 +##厩 +##亢 +##辕 +##瘾 +##辊 +##狩 +##掷 +##潍 +##伺 +##嘿 +##弈 +##嘎 +##陨 +##娅 +##昊 +##犁 +##屁 +##蜘 +##寥 +##滕 +##毙 +##涝 +##谛 +##郝 +##痹 +##溺 +##汾 +##脐 +##馅 +##蠢 +##珀 +##腌 +##扼 +##敕 +##莓 +##峦 +##铬 +##谍 +##炬 +##龚 +##麒 +##睦 +##磺 +##吁 +##掺 +##烁 +##靶 +##圃 +##饵 +##褶 +##娟 +##滔 +##挨 +##褒 +##胱 +##晖 +##脖 +##垢 +##抉 +##冉 +##茧 +##渲 +##癫 +##悼 +##嫂 +##瞒 +##纶 +##肘 +##炖 +##瀚 +##皋 +##姊 +##颐 +##俏 +##颊 +##讶 +##札 +##奕 +##磊 +##镖 +##遐 +##眺 +##腑 +##琦 +##蚊 +##窜 +##渍 +##嗯 +##夯 +##笙 +##蘑 +##翡 +##碘 +##卯 +##啼 +##靓 +##辍 +##莺 +##躬 +##猿 +##杞 +##眩 +##虔 +##凋 +##遁 +##泾 +##岔 +##羟 +##弛 +##娄 +##茸 +##皓 +##峙 +##逅 +##邂 +##苇 +##楹 +##蹲 +##拢 +##甄 +##鳃 +##邯 +##捆 +##勺 +##酉 +##荚 +##唑 +##臻 +##辗 +##绰 +##徊 +##榨 +##苛 +##赦 +##盔 +##壬 +##恍 +##缉 +##熨 +##澡 +##桨 +##匣 +##兢 +##驭 +##镍 +##孰 +##绮 +##馏 +##蝇 +##佼 +##鲸 +##哎 +##裳 +##蜕 +##嚼 +##嘻 +##庇 +##绢 +##倩 +##钵 +##恪 +##帷 +##莆 +##柠 +##藕 +##砾 +##绊 +##喙 +##坂 +##徘 +##荀 +##瞧 +##蛾 +##晦 +##铎 +##紊 +##锚 +##酪 +##稷 +##聋 +##闵 +##熹 +##冕 +##诫 +##珑 +##曦 +##篷 +##迥 +##蘖 +##胤 +##檬 +##瑾 +##钳 +##遏 +##辄 +##嬉 +##隅 +##秃 +##帛 +##聆 +##芥 +##诬 +##挟 +##宕 +##鹊 +##琶 +##膛 +##兀 +##懿 +##碾 +##叮 +##蠕 +##譬 +##缮 +##烽 +##妍 +##榕 +##邃 +##焙 +##倘 +##戌 +##茹 +##豚 +##晾 +##浒 +##玺 +##醚 +##祐 +##炽 +##缪 +##凛 +##噩 +##溅 +##毋 +##槛 +##嫡 +##蝠 +##娴 +##稣 +##禀 +##壑 +##殆 +##敖 +##倭 +##挛 +##侃 +##蚌 +##咀 +##盎 +##殉 +##岑 +##浚 +##谬 +##狡 +##癸 +##逛 +##耽 +##俺 +##璨 +##巳 +##茜 +##郸 +##蒴 +##琵 +##叩 +##泸 +##塾 +##稼 +##侮 +##锂 +##曙 +##薰 +##婿 +##惶 +##拭 +##篱 +##恬 +##淌 +##烙 +##袜 +##徵 +##慷 +##夭 +##噶 +##莘 +##鸳 +##殡 +##蚂 +##憎 +##喃 +##佚 +##龛 +##潢 +##烃 +##岱 +##潺 +##衢 +##璀 +##鹭 +##揣 +##痢 +##厮 +##氓 +##怠 +##痘 +##硒 +##镌 +##乍 +##咯 +##惬 +##桦 +##骇 +##枉 +##蜗 +##睾 +##淇 +##耘 +##娓 +##弼 +##鳌 +##嗅 +##狙 +##箫 +##朦 +##椰 +##胥 +##丐 +##陂 +##唾 +##鳄 +##柚 +##谒 +##戍 +##刁 +##鸾 +##缭 +##骸 +##铣 +##酋 +##蝎 +##掏 +##耦 +##怯 +##娲 +##拇 +##汹 +##胧 +##疤 +##硼 +##恕 +##哗 +##眶 +##痫 +##凳 +##鲨 +##擢 +##歹 +##樵 +##瘠 +##茗 +##翟 +##黯 +##蜒 +##壹 +##殇 +##伶 +##辙 +##瑕 +##町 +##孚 +##痉 +##铵 +##搁 +##漾 +##戟 +##镰 +##鸯 +##猩 +##蔷 +##缤 +##叭 +##垩 +##曳 +##奚 +##毓 +##颓 +##汐 +##靴 +##傣 +##尬 +##濮 +##赂 +##媛 +##懦 +##扦 +##韬 +##戳 +##雯 +##蜿 +##笺 +##裘 +##尴 +##侗 +##钨 +##苓 +##寰 +##蛊 +##扳 +##搓 +##涟 +##睫 +##淬 +##赈 +##恺 +##瞎 +##蝙 +##枸 +##萱 +##颚 +##憩 +##秽 +##秸 +##拷 +##阑 +##貂 +##粱 +##煲 +##隘 +##暧 +##惕 +##沽 +##菠 +##趟 +##磋 +##偕 +##涕 +##邸 +##踞 +##惫 +##阪 +##鞠 +##饺 +##汞 +##颍 +##氰 +##屹 +##蛟 +##跻 +##哟 +##臼 +##熄 +##绛 +##弩 +##褪 +##渎 +##亟 +##匮 +##撇 +##霆 +##攒 +##舵 +##扛 +##彤 +##蛤 +##婢 +##偃 +##胫 +##姥 +##睑 +##诙 +##诲 +##锭 +##悚 +##扒 +##洱 +##劾 +##惰 +##篡 +##瓯 +##徇 +##铀 +##骋 +##筷 +##渚 +##踵 +##俨 +##榻 +##糜 +##捻 +##釜 +##哩 +##萤 +##蛹 +##隽 +##垮 +##鸠 +##鸥 +##漕 +##瑙 +##礴 +##憧 +##殴 +##潼 +##悯 +##砺 +##拽 +##钗 +##酣 +##镂 +##膺 +##楞 +##竺 +##迂 +##嫣 +##忱 +##哄 +##疣 +##鹦 +##枭 +##憬 +##疱 +##婪 +##沮 +##怅 +##筱 +##扉 +##瞰 +##旌 +##蔑 +##铠 +##瀛 +##琥 +##懵 +##谴 +##捍 +##蟾 +##漩 +##拣 +##汴 +##刨 +##叱 +##曜 +##妞 +##澎 +##镑 +##翎 +##瞪 +##倔 +##芍 +##璞 +##瓮 +##驹 +##芷 +##寐 +##擂 +##丕 +##蟠 +##诃 +##悸 +##亘 +##溴 +##宸 +##廿 +##恃 +##棣 +##荼 +##筠 +##羚 +##慑 +##唉 +##纣 +##麼 +##蹦 +##锄 +##淆 +##甙 +##蚜 +##椿 +##禺 +##绯 +##冗 +##葩 +##厝 +##媲 +##蒿 +##痪 +##菁 +##炊 +##俑 +##讥 +##桀 +##祺 +##吡 +##迩 +##箔 +##皿 +##缎 +##萦 +##剃 +##霓 +##酝 +##诰 +##茉 +##飙 +##湍 +##蜥 +##箕 +##蘸 +##柬 +##韭 +##溥 +##熠 +##鹉 +##咐 +##剌 +##悖 +##瞿 +##槟 +##娩 +##闾 +##遴 +##咫 +##孺 +##彷 +##茬 +##蓟 +##憨 +##袅 +##佬 +##炯 +##啶 +##昙 +##蚩 +##痔 +##蕨 +##瓢 +##夔 +##毡 +##赃 +##鳖 +##沅 +##饷 +##臧 +##掖 +##褚 +##羹 +##勐 +##谚 +##畦 +##眨 +##贻 +##攸 +##涎 +##弑 +##咎 +##铂 +##瑛 +##矗 +##虱 +##秤 +##谟 +##漱 +##俸 +##夙 +##雉 +##螨 +##恣 +##斛 +##谙 +##隍 +##奄 +##壕 +##髻 +##鄱 +##嘶 +##磕 +##濡 +##赘 +##荞 +##讹 +##猕 +##痞 +##鬓 +##铮 +##腱 +##幡 +##榭 +##爻 +##涓 +##晤 +##咕 +##惭 +##钼 +##匕 +##撮 +##庾 +##笠 +##窘 +##癖 +##垛 +##窒 +##畲 +##甬 +##彗 +##缨 +##湮 +##寮 +##衅 +##谪 +##绫 +##兖 +##疽 +##磐 +##菏 +##沱 +##骁 +##嫔 +##盂 +##娆 +##钊 +##蟒 +##忏 +##谤 +##晟 +##痈 +##耆 +##谧 +##簪 +##疟 +##扈 +##脍 +##琛 +##咋 +##胄 +##葆 +##轶 +##桢 +##攘 +##邕 +##拧 +##茯 +##摒 +##傀 +##祚 +##嘟 +##帼 +##筵 +##馒 +##疚 +##璇 +##砧 +##槃 +##犷 +##腓 +##煜 +##弋 +##疸 +##濑 +##麝 +##嗟 +##忻 +##愣 +##斓 +##吝 +##咧 +##矾 +##愫 +##漪 +##珂 +##逞 +##糠 +##璐 +##藓 +##昕 +##妩 +##屌 +##疵 +##嘘 +##袂 +##稃 +##剁 +##侏 +##掐 +##猾 +##匍 +##坳 +##黜 +##邺 +##闫 +##猥 +##湃 +##斟 +##癣 +##匐 +##粳 +##叟 +##俾 +##儡 +##莒 +##骥 +##跤 +##耙 +##矜 +##翱 +##赡 +##浣 +##栾 +##拈 +##螟 +##桧 +##坍 +##睢 +##趴 +##伎 +##婺 +##霹 +##痊 +##膊 +##眯 +##豌 +##驮 +##骈 +##嶂 +##淞 +##腮 +##髅 +##炀 +##啄 +##亳 +##麾 +##筐 +##叨 +##徨 +##跷 +##楂 +##郴 +##绶 +##羔 +##咤 +##靳 +##屎 +##雳 +##瘘 +##蹬 +##惮 +##涪 +##阖 +##煽 +##蹊 +##栉 +##俟 +##涸 +##辫 +##锢 +##佟 +##皎 +##啮 +##钰 +##螂 +##啪 +##绷 +##闰 +##畿 +##覃 +##惘 +##贰 +##碉 +##卞 +##酐 +##枷 +##葺 +##芪 +##蕙 +##咚 +##籁 +##钴 +##冽 +##玮 +##骷 +##啃 +##焖 +##猝 +##榈 +##滁 +##拮 +##跗 +##讷 +##蝗 +##蠡 +##烨 +##脯 +##歙 +##泠 +##刍 +##掳 +##僳 +##螯 +##胳 +##髦 +##粽 +##戾 +##祜 +##岷 +##懋 +##馥 +##昵 +##踊 +##湄 +##郢 +##斡 +##迢 +##嗪 +##裨 +##羧 +##膈 +##翊 +##鲫 +##螃 +##沓 +##疝 +##笈 +##榔 +##诘 +##颉 +##蛀 +##鸢 +##焯 +##囧 +##梆 +##潞 +##戛 +##佗 +##艮 +##霾 +##鬟 +##玖 +##腭 +##喔 +##罔 +##佥 +##粑 +##舷 +##泯 +##泓 +##炜 +##谗 +##烬 +##跆 +##傩 +##飓 +##浔 +##钤 +##惚 +##胭 +##踝 +##镯 +##臆 +##蜚 +##揪 +##觞 +##皈 +##迸 +##匝 +##筏 +##醴 +##黍 +##洮 +##滦 +##侬 +##甾 +##澧 +##阈 +##袱 +##迤 +##衮 +##濂 +##娑 +##砥 +##砷 +##铨 +##缜 +##箴 +##逵 +##猖 +##蛰 +##箍 +##侥 +##搂 +##纨 +##裱 +##枋 +##嫦 +##敝 +##挝 +##贲 +##潦 +##撩 +##惺 +##铰 +##忒 +##咆 +##哆 +##莅 +##炕 +##抨 +##涿 +##龈 +##猷 +##遒 +##缥 +##捂 +##俐 +##瘙 +##搐 +##牍 +##馍 +##痿 +##袤 +##峥 +##栎 +##罹 +##燎 +##喵 +##璜 +##飒 +##蔼 +##珞 +##澹 +##奘 +##岖 +##芡 +##簸 +##杵 +##甥 +##骊 +##悴 +##惆 +##殃 +##呃 +##祗 +##髋 +##幔 +##榛 +##犊 +##霁 +##芮 +##牒 +##佰 +##狈 +##薨 +##吩 +##鳝 +##嵘 +##濠 +##呤 +##纫 +##檄 +##浜 +##缙 +##缢 +##煦 +##蓦 +##揖 +##拴 +##缈 +##褥 +##铿 +##燮 +##锵 +##荥 +##忿 +##僖 +##婶 +##芾 +##镐 +##痣 +##眈 +##祇 +##邈 +##翳 +##碣 +##遨 +##鳗 +##诂 +##岫 +##焘 +##茱 +##洵 +##晁 +##噢 +##偈 +##旖 +##蚓 +##柘 +##珐 +##遽 +##岌 +##桅 +##唔 +##鄞 +##雹 +##驸 +##苻 +##恻 +##鬃 +##玑 +##磬 +##崂 +##祉 +##荤 +##淼 +##肱 +##呗 +##骡 +##囱 +##佞 +##耒 +##伫 +##嚷 +##粼 +##歆 +##佃 +##旎 +##惋 +##殁 +##杳 +##阡 +##畈 +##蔺 +##巽 +##昱 +##啰 +##吠 +##嗔 +##涮 +##奂 +##撷 +##袒 +##爰 +##捶 +##赭 +##蜓 +##姗 +##蔻 +##垠 +##噻 +##峒 +##皙 +##憔 +##帚 +##杷 +##蟆 +##觐 +##钒 +##岙 +##栀 +##幄 +##啧 +##癜 +##擀 +##轲 +##铆 +##讴 +##樽 +##霏 +##肮 +##枳 +##骞 +##诧 +##瘢 +##虬 +##拗 +##蕲 +##茁 +##唆 +##沭 +##毂 +##蛎 +##芊 +##銮 +##瞥 +##呱 +##羿 +##吒 +##傥 +##髯 +##濯 +##蜻 +##皴 +##邳 +##燧 +##獭 +##垭 +##祟 +##虢 +##枇 +##鹫 +##颞 +##皑 +##脲 +##舔 +##魇 +##霭 +##坨 +##郧 +##椽 +##舫 +##荠 +##琊 +##溟 +##煨 +##谯 +##粲 +##罂 +##屉 +##佯 +##郦 +##亵 +##诽 +##芩 +##嵇 +##蚤 +##哒 +##啬 +##嚎 +##玥 +##隼 +##唢 +##铛 +##壅 +##藜 +##吱 +##楣 +##璟 +##锆 +##憋 +##罡 +##咙 +##腈 +##廪 +##堑 +##诩 +##溧 +##鹑 +##讫 +##哌 +##铢 +##蜴 +##稹 +##噜 +##镉 +##愕 +##桁 +##晔 +##琰 +##陲 +##疙 +##崮 +##颛 +##桡 +##钜 +##谑 +##仞 +##咦 +##珪 +##揍 +##鱿 +##阉 +##瘩 +##槌 +##滓 +##茴 +##泮 +##涣 +##柞 +##渥 +##飨 +##孪 +##沔 +##谲 +##桉 +##慵 +##俚 +##跖 +##纭 +##恙 +##佘 +##荃 +##咄 +##鞅 +##叁 +##恽 +##炔 +##萘 +##钺 +##楫 +##塬 +##钡 +##琮 +##苄 +##骰 +##偎 +##粕 +##咔 +##鹄 +##瓒 +##阆 +##捅 +##嬴 +##箨 +##氦 +##倜 +##觊 +##婕 +##锑 +##撬 +##掰 +##嗷 +##饯 +##蓓 +##鼬 +##佤 +##蚯 +##挞 +##臾 +##嶙 +##幂 +##饬 +##闱 +##煅 +##嘧 +##蹭 +##瞭 +##顼 +##箐 +##徉 +##骜 +##嗨 +##邛 +##庑 +##柩 +##饕 +##俎 +##嘌 +##颏 +##椁 +##崧 +##锉 +##籼 +##狞 +##弁 +##羯 +##踹 +##糅 +##砼 +##嫖 +##豉 +##啉 +##榷 +##嘈 +##俪 +##痂 +##儋 +##芎 +##繇 +##蹇 +##诋 +##煸 +##峋 +##淙 +##泱 +##徜 +##汩 +##纥 +##蝼 +##囿 +##暹 +##谆 +##蹂 +##鞣 +##螳 +##馗 +##幺 +##鞑 +##贽 +##漯 +##牦 +##淖 +##囤 +##晗 +##忡 +##懊 +##呋 +##埂 +##鲈 +##阕 +##幌 +##鳅 +##勰 +##萸 +##剽 +##蚝 +##绔 +##辇 +##麋 +##陟 +##宥 +##锺 +##喽 +##淅 +##熵 +##荨 +##忤 +##轭 +##嗦 +##荪 +##骠 +##鹘 +##聿 +##绾 +##诶 +##怆 +##喋 +##恸 +##湟 +##睨 +##翦 +##蜈 +##褂 +##娼 +##羸 +##觎 +##瘁 +##蚣 +##呻 +##昶 +##谶 +##猬 +##荻 +##酗 +##肄 +##躏 +##膑 +##嗡 +##庠 +##崽 +##搪 +##胯 +##铉 +##峤 +##郯 +##藐 +##舂 +##蓼 +##薏 +##窿 +##羣 +##氽 +##徕 +##冼 +##阂 +##欤 +##殒 +##窈 +##脘 +##篝 +##麸 +##砭 +##砰 +##骶 +##豺 +##窠 +##獒 +##腴 +##苕 +##缇 +##骅 +##劭 +##卅 +##揆 +##垅 +##琏 +##镗 +##苜 +##胛 +##珏 +##吮 +##抠 +##搔 +##槎 +##掣 +##琨 +##餮 +##舛 +##痤 +##埭 +##胪 +##喹 +##妲 +##婀 +##帙 +##箩 +##灏 +##霎 +##袄 +##镭 +##蓿 +##墉 +##嵊 +##堇 +##蟋 +##叽 +##钎 +##録 +##郓 +##瘴 +##丶 +##呦 +##邬 +##頫 +##馁 +##鄢 +##蛭 +##愍 +##锲 +##槿 +##珈 +##蜃 +##拎 +##鎏 +##裟 +##沏 +##螭 +##觑 +##墒 +##捺 +##轸 +##榫 +##怔 +##昀 +##泷 +##凫 +##唠 +##狰 +##鲛 +##氐 +##呛 +##绀 +##碛 +##茏 +##盅 +##蟀 +##洙 +##訇 +##蠹 +##棂 +##蚴 +##篾 +##靛 +##暄 +##泞 +##洄 +##赓 +##麽 +##篓 +##孑 +##烩 +##颢 +##钣 +##髂 +##蹴 +##筮 +##蝌 +##醮 +##菖 +##獗 +##岘 +##鼐 +##姣 +##蟑 +##袈 +##葶 +##掬 +##躇 +##鹌 +##踌 +##钹 +##蚪 +##颧 +##鹳 +##鲲 +##驷 +##潴 +##焱 +##稔 +##悌 +##唏 +##苒 +##蹙 +##氩 +##宓 +##綦 +##苎 +##疃 +##攫 +##掾 +##徭 +##舀 +##逶 +##嗤 +##蜷 +##茔 +##疳 +##迳 +##罄 +##瓠 +##讪 +##傈 +##杲 +##灞 +##氲 +##鬲 +##獠 +##柒 +##骧 +##搀 +##珩 +##绦 +##嚏 +##镛 +##喱 +##倏 +##馋 +##茭 +##擘 +##斫 +##怂 +##唧 +##犍 +##谩 +##赊 +##鬻 +##禛 +##圻 +##蹶 +##缄 +##瘿 +##黠 +##甑 +##矸 +##嘀 +##蹼 +##叼 +##旻 +##鹜 +##稗 +##雒 +##赉 +##馔 +##颦 +##颔 +##掇 +##赅 +##桎 +##痧 +##谄 +##孛 +##笆 +##鲶 +##铳 +##龋 +##盱 +##笏 +##窕 +##苴 +##萋 +##辘 +##琬 +##梏 +##蚧 +##镳 +##瞅 +##睬 +##偌 +##鲵 +##惦 +##蜍 +##靼 +##阗 +##菟 +##黝 +##挈 +##嵴 +##剡 +##楸 +##氤 +##呎 +##珲 +##馄 +##滂 +##蹉 +##蓑 +##锷 +##啜 +##婵 +##鬣 +##钿 +##晌 +##蛆 +##隗 +##酞 +##枞 +##戬 +##獾 +##镕 +##饨 +##娣 +##缰 +##邾 +##鹗 +##嗒 +##苋 +##薮 +##棹 +##拄 +##埕 +##勖 +##鹞 +##殚 +##鲢 +##啖 +##沣 +##靥 +##葭 +##诿 +##鸪 +##饴 +##疖 +##抟 +##睽 +##稞 +##吋 +##谀 +##澍 +##杈 +##妤 +##峄 +##漉 +##気 +##咲 +##璘 +##萜 +##僭 +##朐 +##圜 +##癞 +##藿 +##珉 +##陉 +##僮 +##膻 +##薹 +##汊 +##锗 +##昉 +##猗 +##锶 +##跛 +##嘹 +##瓤 +##衄 +##豕 +##吆 +##腆 +##喆 +##莴 +##谌 +##珙 +##疥 +##鲑 +##玷 +##蛔 +##砀 +##谔 +##睥 +##蹑 +##诒 +##逋 +##姝 +##刈 +##婧 +##喳 +##镞 +##铌 +##辎 +##鹧 +##檩 +##扪 +##霰 +##裆 +##嬷 +##刎 +##嵋 +##悱 +##嘤 +##篁 +##荸 +##瞑 +##殓 +##搽 +##橇 +##雎 +##弭 +##獐 +##恿 +##眦 +##铐 +##尕 +##捎 +##诟 +##痨 +##笞 +##趺 +##唬 +##苣 +##啾 +##瘪 +##垸 +##橹 +##濛 +##曷 +##樾 +##汨 +##仟 +##姒 +##怦 +##荏 +##诤 +##苡 +##吭 +##崆 +##氡 +##脩 +##胝 +##钏 +##屐 +##忐 +##彧 +##拚 +##鏖 +##孳 +##忑 +##邝 +##穰 +##摈 +##庖 +##鸵 +##矽 +##鲟 +##発 +##菅 +##圪 +##蹋 +##衾 +##簋 +##璎 +##噎 +##嬗 +##肼 +##跎 +##滟 +##戦 +##嵬 +##仝 +##惇 +##纾 +##炁 +##闳 +##骐 +##秣 +##眙 +##谘 +##碓 +##疔 +##恂 +##鳕 +##鸱 +##爨 +##镊 +##钯 +##圮 +##楽 +##堀 +##膘 +##噗 +##锹 +##杼 +##酊 +##挎 +##箸 +##郗 +##垌 +##溏 +##蔫 +##偻 +##妫 +##飚 +##辔 +##濬 +##瑄 +##觚 +##铍 +##跚 +##翕 +##煊 +##耄 +##铋 +##篦 +##阇 +##骛 +##莪 +##吲 +##唁 +##箧 +##珅 +##潋 +##迨 +##哽 +##砦 +##缗 +##謇 +##呸 +##垓 +##糍 +##璠 +##妣 +##狎 +##攥 +##闇 +##蛉 +##瑁 +##腼 +##蹒 +##嶷 +##莠 +##沤 +##哚 +##遑 +##跺 +##膦 +##蹿 +##郫 +##玳 +##埚 +##衿 +##醪 +##挹 +##绡 +##汜 +##坩 +##旃 +##鸨 +##翈 +##抡 +##晞 +##盥 +##藁 +##蓖 +##臊 +##羰 +##楝 +##噱 +##饽 +##苌 +##褓 +##佶 +##稜 +##瞠 +##仡 +##伉 +##襁 +##涞 +##蜇 +##抿 +##瑗 +##孱 +##懑 +##淦 +##赝 +##醌 +##缫 +##蠲 +##嚓 +##鲷 +##湫 +##捋 +##咩 +##裏 +##犒 +##墀 +##硐 +##蔸 +##钽 +##麂 +##蒡 +##鼹 +##绻 +##錾 +##仃 +##篙 +##蕤 +##铤 +##槁 +##牖 +##螈 +##俦 +##笄 +##啻 +##対 +##郤 +##闼 +##醺 +##赍 +##檗 +##裾 +##噫 +##掸 +##箓 +##妪 +##乂 +##蝈 +##砻 +##胍 +##蜱 +##聃 +##雠 +##碚 +##椤 +##缯 +##昴 +##缱 +##祎 +##缬 +##铙 +##孀 +##笳 +##蘇 +##愆 +##榉 +##氙 +##燹 +##撂 +##菽 +##箬 +##蛄 +##瘸 +##嬛 +##橐 +##纡 +##刽 +##辂 +##蒯 +##邨 +##赀 +##跸 +##邙 +##黟 +##磴 +##闿 +##垟 +##嵯 +##钚 +##跄 +##潸 +##崴 +##恁 +##楮 +##腧 +##胨 +##芫 +##碴 +##隰 +##杓 +##貉 +##欹 +##侑 +##鳜 +##铄 +##椴 +##昇 +##醍 +##肓 +##缂 +##铡 +##蹠 +##徂 +##豢 +##蒽 +##菡 +##衲 +##阚 +##芗 +##痍 +##玠 +##晷 +##淝 +##鄯 +##糗 +##耨 +##榧 +##胴 +##蕈 +##镬 +##鼾 +##摭 +##鸮 +##恚 +##実 +##砝 +##珣 +##寤 +##埙 +##锏 +##喟 +##蘅 +##骺 +##捭 +##莜 +##缶 +##锟 +##叵 +##炷 +##鲧 +##胼 +##査 +##岬 +##鹂 +##牯 +##珥 +##莼 +##邠 +##眇 +##卟 +##変 +##惴 +##渑 +##蚱 +##瞌 +##瘰 +##佝 +##旸 +##衽 +##郅 +##奁 +##魑 +##缛 +##颙 +##镫 +##簌 +##豇 +##姹 +##邋 +##暝 +##釐 +##洹 +##咿 +##俳 +##蜊 +##醐 +##聩 +##坻 +##毽 +##喾 +##辋 +##倌 +##媪 +##蛳 +##滹 +##哙 +##阊 +##趸 +##祢 +##籀 +##徼 +##訾 +##髁 +##砜 +##撸 +##瓘 +##缁 +##镓 +##縻 +##菀 +##酢 +##桠 +##撵 +##怏 +##渌 +##摞 +##槲 +##浠 +##诜 +##魉 +##韫 +##亓 +##盤 +##瑭 +##魍 +##襞 +##爿 +##浃 +##樯 +##讵 +##揩 +##耋 +##帏 +##崃 +##鸩 +##遢 +##臃 +##粿 +##禳 +##桫 +##髹 +##诳 +##踉 +##郃 +##嗖 +##讧 +##碁 +##湎 +##阏 +##媾 +##様 +##哔 +##舸 +##曩 +##忝 +##峁 +##掂 +##葳 +##鄄 +##谵 +##彊 +##锴 +##郜 +##葖 +##蓇 +##瓴 +##鳟 +##橼 +##鲇 +##邗 +##犄 +##秭 +##槭 +##缵 +##巯 +##龊 +##狍 +##擞 +##瞽 +##栲 +##撅 +##瑀 +##戢 +##朓 +##逖 +##椹 +##洺 +##艏 +##苁 +##滘 +##铧 +##侪 +##豳 +##竦 +##貔 +##圄 +##呷 +##旄 +##遛 +##芈 +##砣 +##桷 +##龌 +##疬 +##缟 +##洌 +##跏 +##蝮 +##菰 +##帑 +##怙 +##豸 +##雩 +##誊 +##臬 +##镣 +##箇 +##踱 +##钍 +##苫 +##蝽 +##浯 +##単 +##亶 +##囹 +##穑 +##佻 +##绌 +##诔 +##鹬 +##髌 +##蒌 +##鳏 +##殄 +##怛 +##筌 +##刳 +##翮 +##卍 +##畹 +##箜 +##燔 +##赳 +##篌 +##窨 +##翥 +##炅 +##钕 +##莳 +##忖 +##戡 +##沢 +##狒 +##圉 +##琯 +##邰 +##苾 +##犸 +##邡 +##郏 +##襦 +##沆 +##玟 +##濉 +##洎 +##莨 +##氘 +##咛 +##佺 +##腩 +##鳔 +##剜 +##秕 +##牝 +##芨 +##関 +##拊 +##竑 +##圹 +##颡 +##摺 +##沩 +##蜉 +##筚 +##愔 +##肟 +##俶 +##堃 +##绉 +##奭 +##罅 +##嗳 +##蜢 +##疠 +##帔 +##髡 +##黥 +##褛 +##柰 +##鏊 +##痼 +##堞 +##嗝 +##娉 +##戕 +##铱 +##耜 +##觥 +##镒 +##呓 +##蒹 +##栱 +##卮 +##琚 +##逦 +##酩 +##蓍 +##虺 +##谠 +##鼋 +##焗 +##褴 +##砒 +##赧 +##蛏 +##蚬 +##瘕 +##顗 +##愠 +##勣 +##飕 +##徳 +##滢 +##琇 +##鳙 +##瞟 +##尻 +##澶 +##荽 +##舐 +##侂 +##黼 +##潟 +##绂 +##瘗 +##蓥 +##竽 +##濞 +##骖 +##偁 +##応 +##锜 +##匏 +##赑 +##讦 +##诨 +##罘 +##巖 +##嫘 +##颀 +##岿 +##虻 +##罴 +##囗 +##溆 +##噤 +##骝 +##咂 +##锛 +##槊 +##啕 +##驽 +##凇 +##籴 +##硖 +##铯 +##怿 +##笥 +##噙 +##倨 +##坭 +##醅 +##滏 +##悻 +##聒 +##枥 +##昺 +##酆 +##簟 +##睇 +##轫 +##溱 +##骢 +##榘 +##珺 +##跹 +##蚶 +##驺 +##饧 +##噼 +##儆 +##氚 +##哧 +##旒 +##鸬 +##夥 +##玦 +##貅 +##揄 +##戗 +##璩 +##剐 +##垴 +##蘼 +##裒 +##躅 +##唳 +##嗑 +##荦 +##霈 +##缦 +##啭 +##隈 +##悫 +##彀 +##悭 +##焓 +##磔 +##蓊 +##郾 +##枧 +##鹚 +##検 +##屃 +##馑 +##嗲 +##铟 +##薤 +##涔 +##樗 +##忾 +##収 +##绺 +##烊 +##螫 +##黩 +##鞫 +##鲠 +##嘭 +##缣 +##蒺 +##黒 +##骘 +##氖 +##镝 +##俅 +##谮 +##屦 +##摁 +##氪 +##蘧 +##伝 +##腠 +##叡 +##鲂 +##続 +##讣 +##耷 +##燊 +##鸷 +##猊 +##囡 +##崤 +##砬 +##湜 +##翚 +##峯 +##鲎 +##蕖 +##鹈 +##凼 +##泫 +##荑 +##黻 +##牂 +##鄣 +##篑 +##髭 +##陬 +##寔 +##疴 +##邽 +##喏 +##彖 +##彘 +##赟 +##盹 +##诮 +##鸫 +##茕 +##铖 +##闩 +##読 +##鄜 +##漈 +##盍 +##甭 +##愎 +##魃 +##炆 +##鍊 +##蛐 +##薜 +##楯 +##鲀 +##逡 +##嘞 +##侔 +##觇 +##糸 +##踮 +##狷 +##菘 +##寳 +##扃 +##禊 +##喑 +##塍 +##栝 +##瓿 +##廨 +##貘 +##馕 +##僰 +##哏 +##瑷 +##疎 +##蝣 +##怵 +##阃 +##弢 +##镲 +##螅 +##吖 +##碲 +##夼 +##茌 +##嗬 +##靺 +##髀 +##铊 +##谡 +##癔 +##镠 +##巻 +##秾 +##菪 +##赜 +##铈 +##髙 +##鲳 +##珰 +##畋 +##泅 +##鲅 +##泚 +##飏 +##屍 +##仨 +##葚 +##叻 +##咻 +##衩 +##郄 +##蹩 +##嬖 +##踽 +##柽 +##鞨 +##麴 +##薙 +##钇 +##氵 +##垆 +##犟 +##罍 +##経 +##粜 +##焜 +##牀 +##埝 +##洧 +##覧 +##蓣 +##甯 +##蒐 +##馐 +##畑 +##缑 +##礽 +##瞋 +##浍 +##袢 +##桕 +##侩 +##詈 +##戸 +##烝 +##堌 +##伋 +##倬 +##圯 +##碇 +##纰 +##磾 +##泔 +##纮 +##蓁 +##铗 +##弇 +##挲 +##艉 +##鱬 +##泺 +##橛 +##袴 +##韪 +##籓 +##贶 +##棰 +##趵 +##樨 +##傕 +##玕 +##毎 +##繸 +##劵 +##镧 +##秫 +##邶 +##猞 +##廛 +##栌 +##钲 +##镦 +##嘏 +##蝰 +##镏 +##淠 +##荇 +##逄 +##嘅 +##祕 +##瑠 +##炝 +##杪 +##埴 +##獬 +##柢 +##捱 +##跣 +##涑 +##撃 +##伢 +##堠 +##卽 +##猁 +##厣 +##辏 +##旆 +##茆 +##乜 +##踯 +##。 +##? +##! +##? +##; +[UNK] diff --git a/modules/text/text_generation/ernie_gen/template/model/decode.py b/modules/text/text_generation/ernie_gen/template/model/decode.py new file mode 100644 index 00000000..d07a58b5 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/template/model/decode.py @@ -0,0 +1,259 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import numpy as np +from collections import namedtuple + +import paddle.fluid as F +import paddle.fluid.layers as L +import paddle.fluid.dygraph as D + + +def gen_bias(encoder_inputs, decoder_inputs, step): + decoder_bsz, decoder_seqlen = decoder_inputs.shape[:2] + attn_bias = L.reshape(L.range(0, decoder_seqlen, 1, dtype='float32') + 1, [1, -1, 1]) + decoder_bias = L.cast((L.matmul(attn_bias, 1. / attn_bias, transpose_y=True) >= 1.), + 'float32') #[1, 1, decoderlen, decoderlen] + encoder_bias = L.unsqueeze(L.cast(L.ones_like(encoder_inputs), 'float32'), [1]) #[bsz, 1, encoderlen] + encoder_bias = L.expand(encoder_bias, [1, decoder_seqlen, 1]) #[bsz,decoderlen, encoderlen] + decoder_bias = L.expand(decoder_bias, [decoder_bsz, 1, 1]) #[bsz, decoderlen, decoderlen] + if step > 0: + bias = L.concat([encoder_bias, L.ones([decoder_bsz, decoder_seqlen, step], 'float32'), decoder_bias], -1) + else: + bias = L.concat([encoder_bias, decoder_bias], -1) + return bias + + +@D.no_grad +def greedy_search_infilling(model, + q_ids, + q_sids, + sos_id, + eos_id, + attn_id, + max_encode_len=640, + max_decode_len=100, + tgt_type_id=3): + model.eval() + _, logits, info = model(q_ids, q_sids) + gen_ids = L.argmax(logits, -1) + d_batch, d_seqlen = q_ids.shape + seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) + has_stopped = np.zeros([d_batch], dtype=np.bool) + gen_seq_len = np.zeros([d_batch], dtype=np.int64) + output_ids = [] + + past_cache = info['caches'] + + cls_ids = L.ones([d_batch], dtype='int64') * sos_id + attn_ids = L.ones([d_batch], dtype='int64') * attn_id + ids = L.stack([cls_ids, attn_ids], -1) + for step in range(max_decode_len): + bias = gen_bias(q_ids, ids, step) + pos_ids = D.to_variable(np.tile(np.array([[step, step + 1]], dtype=np.int64), [d_batch, 1])) + pos_ids += seqlen + _, logits, info = model( + ids, L.ones_like(ids) * tgt_type_id, pos_ids=pos_ids, attn_bias=bias, past_cache=past_cache) + gen_ids = L.argmax(logits, -1) + + past_cached_k, past_cached_v = past_cache + cached_k, cached_v = info['caches'] + cached_k = [L.concat([pk, k[:, :1, :]], 1) for pk, k in zip(past_cached_k, cached_k)] # concat cached + cached_v = [L.concat([pv, v[:, :1, :]], 1) for pv, v in zip(past_cached_v, cached_v)] + past_cache = (cached_k, cached_v) + + gen_ids = gen_ids[:, 1] + ids = L.stack([gen_ids, attn_ids], 1) + + gen_ids = gen_ids.numpy() + has_stopped |= (gen_ids == eos_id).astype(np.bool) + gen_seq_len += (1 - has_stopped.astype(np.int64)) + output_ids.append(gen_ids.tolist()) + if has_stopped.all(): + break + output_ids = np.array(output_ids).transpose([1, 0]) + return output_ids + + +BeamSearchState = namedtuple('BeamSearchState', ['log_probs', 'lengths', 'finished']) +BeamSearchOutput = namedtuple('BeamSearchOutput', ['scores', 'predicted_ids', 'beam_parent_ids']) + + +def log_softmax(x): + e_x = np.exp(x - np.max(x)) + return np.log(e_x / e_x.sum()) + + +def mask_prob(p, onehot_eos, finished): + is_finished = L.cast(L.reshape(finished, [-1, 1]) != 0, 'float32') + p = is_finished * (1. - L.cast(onehot_eos, 'float32')) * -9999. + (1. - is_finished) * p + return p + + +def hyp_score(log_probs, length, length_penalty): + lp = L.pow((5. + L.cast(length, 'float32')) / 6., length_penalty) + return log_probs / lp + + +def beam_search_step(state, logits, eos_id, beam_width, is_first_step, length_penalty): + """logits.shape == [B*W, V]""" + beam_size, vocab_size = logits.shape # as batch size=1 in this hub module. the first dim means bsz * beam_size equals beam_size + logits_np = logits.numpy() + for i in range(beam_size): + logits_np[i][17963] = 0 # make [UNK] prob = 0 + logits = D.to_variable(logits_np) + + bsz, beam_width = state.log_probs.shape + onehot_eos = L.cast(F.one_hot(L.ones([1], 'int64') * eos_id, vocab_size), 'int64') #[1, V] + + probs = L.log(L.softmax(logits)) #[B*W, V] + probs = mask_prob(probs, onehot_eos, state.finished) #[B*W, V] + allprobs = L.reshape(state.log_probs, [-1, 1]) + probs #[B*W, V] + + not_finished = 1 - L.reshape(state.finished, [-1, 1]) #[B*W,1] + not_eos = 1 - onehot_eos + length_to_add = not_finished * not_eos #[B*W,V] + alllen = L.reshape(state.lengths, [-1, 1]) + length_to_add + + allprobs = L.reshape(allprobs, [-1, beam_width * vocab_size]) + alllen = L.reshape(alllen, [-1, beam_width * vocab_size]) + allscore = hyp_score(allprobs, alllen, length_penalty) + if is_first_step: + allscore = L.reshape(allscore, [bsz, beam_width, -1])[:, 0, :] # first step only consiter beam 0 + scores, idx = L.topk(allscore, k=beam_width) #[B, W] + next_beam_id = idx // vocab_size #[B, W] + next_word_id = idx % vocab_size + + gather_idx = L.concat([L.where(idx != -1)[:, :1], L.reshape(idx, [-1, 1])], 1) + next_probs = L.reshape(L.gather_nd(allprobs, gather_idx), idx.shape) + next_len = L.reshape(L.gather_nd(alllen, gather_idx), idx.shape) + + gather_idx = L.concat([L.where(next_beam_id != -1)[:, :1], L.reshape(next_beam_id, [-1, 1])], 1) + next_finished = L.reshape(L.gather_nd(state.finished, gather_idx), + state.finished.shape) #[gather new beam state according to new beam id] + + next_finished += L.cast(next_word_id == eos_id, 'int64') + next_finished = L.cast(next_finished > 0, 'int64') + + next_state = BeamSearchState(log_probs=next_probs, lengths=next_len, finished=next_finished) + output = BeamSearchOutput(scores=scores, predicted_ids=next_word_id, beam_parent_ids=next_beam_id) + + return output, next_state + + +@D.no_grad +def beam_search_infilling(model, + q_ids, + q_sids, + sos_id, + eos_id, + attn_id, + max_encode_len=640, + max_decode_len=100, + beam_width=5, + tgt_type_id=3, + length_penalty=1.0): + model.eval() + _, __, info = model(q_ids, q_sids) + d_batch, d_seqlen = q_ids.shape + + state = BeamSearchState( + log_probs=L.zeros([d_batch, beam_width], 'float32'), + lengths=L.zeros([d_batch, beam_width], 'int64'), + finished=L.zeros([d_batch, beam_width], 'int64')) + outputs = [] + + def reorder_(t, parent_id): + """reorder cache according to parent beam id""" + gather_idx = L.where(parent_id != -1)[:, 0] * beam_width + L.reshape(parent_id, [-1]) + t = L.gather(t, gather_idx) + return t + + def tile_(t, times): + _shapes = list(t.shape[1:]) + ret = L.reshape(L.expand(L.unsqueeze(t, [1]), [ + 1, + times, + ] + [ + 1, + ] * len(_shapes)), [ + -1, + ] + _shapes) + return ret + + cached_k, cached_v = info['caches'] + cached_k = [tile_(k, beam_width) for k in cached_k] + cached_v = [tile_(v, beam_width) for v in cached_v] + past_cache = (cached_k, cached_v) + + q_ids = tile_(q_ids, beam_width) + seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) + + cls_ids = L.ones([d_batch * beam_width], dtype='int64') * sos_id + attn_ids = L.ones([d_batch * beam_width], dtype='int64') * attn_id # SOS + ids = L.stack([cls_ids, attn_ids], -1) + for step in range(max_decode_len): + bias = gen_bias(q_ids, ids, step) + pos_ids = D.to_variable(np.tile(np.array([[step, step + 1]], dtype=np.int64), [d_batch * beam_width, 1])) + pos_ids += seqlen + + _, logits, info = model( + ids, L.ones_like(ids) * tgt_type_id, pos_ids=pos_ids, attn_bias=bias, past_cache=past_cache) + + output, state = beam_search_step( + state, + logits[:, 1], + eos_id=eos_id, + beam_width=beam_width, + is_first_step=(step == 0), + length_penalty=length_penalty) + outputs.append(output) + + past_cached_k, past_cached_v = past_cache + cached_k, cached_v = info['caches'] + cached_k = [ + reorder_(L.concat([pk, k[:, :1, :]], 1), output.beam_parent_ids) for pk, k in zip(past_cached_k, cached_k) + ] # concat cached + cached_v = [ + reorder_(L.concat([pv, v[:, :1, :]], 1), output.beam_parent_ids) for pv, v in zip(past_cached_v, cached_v) + ] + past_cache = (cached_k, cached_v) + + pred_ids_flatten = L.reshape(output.predicted_ids, [d_batch * beam_width]) + ids = L.stack([pred_ids_flatten, attn_ids], 1) + + if state.finished.numpy().all(): + break + + final_ids = L.stack([o.predicted_ids for o in outputs], 0) + final_parent_ids = L.stack([o.beam_parent_ids for o in outputs], 0) + final_ids = L.gather_tree(final_ids, final_parent_ids) #[:, :, + #0] #pick best beam + final_ids = L.transpose(L.reshape(final_ids, [-1, d_batch * 1, beam_width]), [1, 2, 0]) + return final_ids + + +en_patten = re.compile(r'^[a-zA-Z0-9]*$') + + +def post_process(token): + if token.startswith('##'): + ret = token[2:] + else: + if en_patten.match(token): + ret = ' ' + token + else: + ret = token + return ret diff --git a/modules/text/text_generation/ernie_gen/template/model/file_utils.py b/modules/text/text_generation/ernie_gen/template/model/file_utils.py new file mode 100644 index 00000000..608be4ef --- /dev/null +++ b/modules/text/text_generation/ernie_gen/template/model/file_utils.py @@ -0,0 +1,46 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +from tqdm import tqdm +from paddlehub.common.logger import logger +from paddlehub.common.dir import MODULE_HOME + + +def _fetch_from_remote(url, force_download=False): + import tempfile, requests, tarfile + cached_dir = os.path.join(MODULE_HOME, "ernie_for_gen") + if force_download or not os.path.exists(cached_dir): + with tempfile.NamedTemporaryFile() as f: + #url = 'https://ernie.bj.bcebos.com/ERNIE_stable.tgz' + r = requests.get(url, stream=True) + total_len = int(r.headers.get('content-length')) + for chunk in tqdm( + r.iter_content(chunk_size=1024), total=total_len // 1024, desc='downloading %s' % url, unit='KB'): + if chunk: + f.write(chunk) + f.flush() + logger.debug('extacting... to %s' % f.name) + with tarfile.open(f.name) as tf: + tf.extractall(path=cached_dir) + logger.debug('%s cached in %s' % (url, cached_dir)) + return cached_dir + + +def add_docstring(doc): + def func(f): + f.__doc__ += ('\n======other docs from supper class ======\n%s' % doc) + return f + + return func diff --git a/modules/text/text_generation/ernie_gen/template/model/modeling_ernie.py b/modules/text/text_generation/ernie_gen/template/model/modeling_ernie.py new file mode 100644 index 00000000..d5de28a5 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/template/model/modeling_ernie.py @@ -0,0 +1,327 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +import logging + +import paddle.fluid.dygraph as D +import paddle.fluid as F +import paddle.fluid.layers as L + +log = logging.getLogger(__name__) + + +def _build_linear(n_in, n_out, name, init, act=None): + return D.Linear( + n_in, + n_out, + param_attr=F.ParamAttr(name='%s.w_0' % name if name is not None else None, initializer=init), + bias_attr='%s.b_0' % name if name is not None else None, + act=act) + + +def _build_ln(n_in, name): + return D.LayerNorm( + normalized_shape=n_in, + param_attr=F.ParamAttr( + name='%s_layer_norm_scale' % name if name is not None else None, initializer=F.initializer.Constant(1.)), + bias_attr=F.ParamAttr( + name='%s_layer_norm_bias' % name if name is not None else None, initializer=F.initializer.Constant(1.)), + ) + + +def append_name(name, postfix): + if name is None: + return None + elif name == '': + return postfix + else: + return '%s_%s' % (name, postfix) + + +class AttentionLayer(D.Layer): + def __init__(self, cfg, name=None): + super(AttentionLayer, self).__init__() + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + d_model = cfg['hidden_size'] + n_head = cfg['num_attention_heads'] + assert d_model % n_head == 0 + d_model_q = cfg.get('query_hidden_size_per_head', d_model // n_head) * n_head + d_model_v = cfg.get('value_hidden_size_per_head', d_model // n_head) * n_head + self.n_head = n_head + self.d_key = d_model_q // n_head + self.q = _build_linear(d_model, d_model_q, append_name(name, 'query_fc'), initializer) + self.k = _build_linear(d_model, d_model_q, append_name(name, 'key_fc'), initializer) + self.v = _build_linear(d_model, d_model_v, append_name(name, 'value_fc'), initializer) + self.o = _build_linear(d_model_v, d_model, append_name(name, 'output_fc'), initializer) + self.dropout = lambda i: L.dropout( + i, + dropout_prob=cfg['attention_probs_dropout_prob'], + dropout_implementation="upscale_in_train", + ) if self.training else i + + def forward(self, queries, keys, values, attn_bias, past_cache): + assert len(queries.shape) == len(keys.shape) == len(values.shape) == 3 + + q = self.q(queries) + k = self.k(keys) + v = self.v(values) + + cache = (k, v) + if past_cache is not None: + cached_k, cached_v = past_cache + k = L.concat([cached_k, k], 1) + v = L.concat([cached_v, v], 1) + + q = L.transpose(L.reshape(q, [0, 0, self.n_head, q.shape[-1] // self.n_head]), + [0, 2, 1, 3]) #[batch, head, seq, dim] + k = L.transpose(L.reshape(k, [0, 0, self.n_head, k.shape[-1] // self.n_head]), + [0, 2, 1, 3]) #[batch, head, seq, dim] + v = L.transpose(L.reshape(v, [0, 0, self.n_head, v.shape[-1] // self.n_head]), + [0, 2, 1, 3]) #[batch, head, seq, dim] + + q = L.scale(q, scale=self.d_key**-0.5) + score = L.matmul(q, k, transpose_y=True) + if attn_bias is not None: + score += attn_bias + score = L.softmax(score, use_cudnn=True) + score = self.dropout(score) + + out = L.matmul(score, v) + out = L.transpose(out, [0, 2, 1, 3]) + out = L.reshape(out, [0, 0, out.shape[2] * out.shape[3]]) + + out = self.o(out) + return out, cache + + +class PositionwiseFeedForwardLayer(D.Layer): + def __init__(self, cfg, name=None): + super(PositionwiseFeedForwardLayer, self).__init__() + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + d_model = cfg['hidden_size'] + d_ffn = cfg.get('intermediate_size', 4 * d_model) + assert cfg['hidden_act'] in ['relu', 'gelu'] + self.i = _build_linear(d_model, d_ffn, append_name(name, 'fc_0'), initializer, act=cfg['hidden_act']) + self.o = _build_linear(d_ffn, d_model, append_name(name, 'fc_1'), initializer) + prob = cfg.get('intermediate_dropout_prob', 0.) + self.dropout = lambda i: L.dropout( + i, + dropout_prob=prob, + dropout_implementation="upscale_in_train", + ) if self.training else i + + def forward(self, inputs): + hidden = self.i(inputs) + hidden = self.dropout(hidden) + out = self.o(hidden) + return out + + +class ErnieBlock(D.Layer): + def __init__(self, cfg, name=None): + super(ErnieBlock, self).__init__() + d_model = cfg['hidden_size'] + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + + self.attn = AttentionLayer(cfg, name=append_name(name, 'multi_head_att')) + self.ln1 = _build_ln(d_model, name=append_name(name, 'post_att')) + self.ffn = PositionwiseFeedForwardLayer(cfg, name=append_name(name, 'ffn')) + self.ln2 = _build_ln(d_model, name=append_name(name, 'post_ffn')) + prob = cfg.get('intermediate_dropout_prob', cfg['hidden_dropout_prob']) + self.dropout = lambda i: L.dropout( + i, + dropout_prob=prob, + dropout_implementation="upscale_in_train", + ) if self.training else i + + def forward(self, inputs, attn_bias=None, past_cache=None): + attn_out, cache = self.attn(inputs, inputs, inputs, attn_bias, past_cache=past_cache) #self attn + attn_out = self.dropout(attn_out) + hidden = attn_out + inputs + hidden = self.ln1(hidden) # dropout/ add/ norm + + ffn_out = self.ffn(hidden) + ffn_out = self.dropout(ffn_out) + hidden = ffn_out + hidden + hidden = self.ln2(hidden) + return hidden, cache + + +class ErnieEncoderStack(D.Layer): + def __init__(self, cfg, name=None): + super(ErnieEncoderStack, self).__init__() + n_layers = cfg['num_hidden_layers'] + self.block = D.LayerList([ErnieBlock(cfg, append_name(name, 'layer_%d' % i)) for i in range(n_layers)]) + + def forward(self, inputs, attn_bias=None, past_cache=None): + if past_cache is not None: + assert isinstance( + past_cache, + tuple), 'unknown type of `past_cache`, expect tuple or list. got %s' % repr(type(past_cache)) + past_cache = list(zip(*past_cache)) + else: + past_cache = [None] * len(self.block) + cache_list_k, cache_list_v, hidden_list = [], [], [inputs] + + for b, p in zip(self.block, past_cache): + inputs, cache = b(inputs, attn_bias=attn_bias, past_cache=p) + cache_k, cache_v = cache + cache_list_k.append(cache_k) + cache_list_v.append(cache_v) + hidden_list.append(inputs) + + return inputs, hidden_list, (cache_list_k, cache_list_v) + + +class ErnieModel(D.Layer): + def __init__(self, cfg, name=None): + """ + Fundamental pretrained Ernie model + """ + log.debug('init ErnieModel with config: %s' % repr(cfg)) + D.Layer.__init__(self) + d_model = cfg['hidden_size'] + d_emb = cfg.get('emb_size', cfg['hidden_size']) + d_vocab = cfg['vocab_size'] + d_pos = cfg['max_position_embeddings'] + d_sent = cfg.get("sent_type_vocab_size") or cfg['type_vocab_size'] + self.n_head = cfg['num_attention_heads'] + self.return_additional_info = cfg.get('return_additional_info', False) + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + + self.ln = _build_ln(d_model, name=append_name(name, 'pre_encoder')) + self.word_emb = D.Embedding([d_vocab, d_emb], + param_attr=F.ParamAttr( + name=append_name(name, 'word_embedding'), initializer=initializer)) + self.pos_emb = D.Embedding([d_pos, d_emb], + param_attr=F.ParamAttr( + name=append_name(name, 'pos_embedding'), initializer=initializer)) + self.sent_emb = D.Embedding([d_sent, d_emb], + param_attr=F.ParamAttr( + name=append_name(name, 'sent_embedding'), initializer=initializer)) + prob = cfg['hidden_dropout_prob'] + self.dropout = lambda i: L.dropout( + i, + dropout_prob=prob, + dropout_implementation="upscale_in_train", + ) if self.training else i + + self.encoder_stack = ErnieEncoderStack(cfg, append_name(name, 'encoder')) + if cfg.get('has_pooler', True): + self.pooler = _build_linear( + cfg['hidden_size'], cfg['hidden_size'], append_name(name, 'pooled_fc'), initializer, act='tanh') + else: + self.pooler = None + self.train() + + def eval(self): + if F.in_dygraph_mode(): + super(ErnieModel, self).eval() + self.training = False + for l in self.sublayers(): + l.training = False + + def train(self): + if F.in_dygraph_mode(): + super(ErnieModel, self).train() + self.training = True + for l in self.sublayers(): + l.training = True + + def forward(self, + src_ids, + sent_ids=None, + pos_ids=None, + input_mask=None, + attn_bias=None, + past_cache=None, + use_causal_mask=False): + """ + Args: + src_ids (`Variable` of shape `[batch_size, seq_len]`): + Indices of input sequence tokens in the vocabulary. + sent_ids (optional, `Variable` of shape `[batch_size, seq_len]`): + aka token_type_ids, Segment token indices to indicate first and second portions of the inputs. + if None, assume all tokens come from `segment_a` + pos_ids(optional, `Variable` of shape `[batch_size, seq_len]`): + Indices of positions of each input sequence tokens in the position embeddings. + input_mask(optional `Variable` of shape `[batch_size, seq_len]`): + Mask to avoid performing attention on the padding token indices of the encoder input. + attn_bias(optional, `Variable` of shape `[batch_size, seq_len, seq_len] or False`): + 3D version of `input_mask`, if set, overrides `input_mask`; if set not False, will not apply attention mask + past_cache(optional, tuple of two lists: cached key and cached value, + each is a list of `Variable`s of shape `[batch_size, seq_len, hidden_size]`): + cached key/value tensor that will be concated to generated key/value when performing self attention. + if set, `attn_bias` should not be None. + + Returns: + pooled (`Variable` of shape `[batch_size, hidden_size]`): + output logits of pooler classifier + encoded(`Variable` of shape `[batch_size, seq_len, hidden_size]`): + output logits of transformer stack + """ + assert len(src_ids.shape) == 2, 'expect src_ids.shape = [batch, sequecen], got %s' % (repr(src_ids.shape)) + assert attn_bias is not None if past_cache else True, 'if `past_cache` is specified; attn_bias should not be None' + d_batch = L.shape(src_ids)[0] + d_seqlen = L.shape(src_ids)[1] + if pos_ids is None: + pos_ids = L.reshape(L.range(0, d_seqlen, 1, dtype='int32'), [1, -1]) + pos_ids = L.cast(pos_ids, 'int64') + if attn_bias is None: + if input_mask is None: + input_mask = L.cast(src_ids != 0, 'float32') + assert len(input_mask.shape) == 2 + input_mask = L.unsqueeze(input_mask, axes=[-1]) + attn_bias = L.matmul(input_mask, input_mask, transpose_y=True) + if use_causal_mask: + sequence = L.reshape(L.range(0, d_seqlen, 1, dtype='float32') + 1., [1, 1, -1, 1]) + causal_mask = L.cast((L.matmul(sequence, 1. / sequence, transpose_y=True) >= 1.), 'float32') + attn_bias *= causal_mask + else: + assert len(attn_bias.shape) == 3, 'expect attn_bias tobe rank 3, got %r' % attn_bias.shape + attn_bias = (1. - attn_bias) * -10000.0 + attn_bias = L.unsqueeze(attn_bias, [1]) + attn_bias = L.expand(attn_bias, [1, self.n_head, 1, 1]) # avoid broadcast =_= + attn_bias.stop_gradient = True + + if sent_ids is None: + sent_ids = L.zeros_like(src_ids) + + src_embedded = self.word_emb(src_ids) + pos_embedded = self.pos_emb(pos_ids) + sent_embedded = self.sent_emb(sent_ids) + embedded = src_embedded + pos_embedded + sent_embedded + + embedded = self.dropout(self.ln(embedded)) + + encoded, hidden_list, cache_list = self.encoder_stack(embedded, attn_bias, past_cache=past_cache) + if self.pooler is not None: + pooled = self.pooler(encoded[:, 0, :]) + else: + pooled = None + + additional_info = { + 'hiddens': hidden_list, + 'caches': cache_list, + } + + if self.return_additional_info: + return pooled, encoded, additional_info + else: + return pooled, encoded diff --git a/modules/text/text_generation/ernie_gen/template/model/modeling_ernie_gen.py b/modules/text/text_generation/ernie_gen/template/model/modeling_ernie_gen.py new file mode 100644 index 00000000..bc3d783d --- /dev/null +++ b/modules/text/text_generation/ernie_gen/template/model/modeling_ernie_gen.py @@ -0,0 +1,65 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.fluid as F +import paddle.fluid.layers as L + +from .modeling_ernie import ErnieModel +from .modeling_ernie import _build_linear, _build_ln, append_name + + +class ErnieModelForGeneration(ErnieModel): + def __init__(self, cfg, name=None): + cfg['return_additional_info'] = True + cfg['has_pooler'] = False + super(ErnieModelForGeneration, self).__init__(cfg, name=name) + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + d_model = cfg['hidden_size'] + d_vocab = cfg['vocab_size'] + + self.mlm = _build_linear( + d_model, d_model, append_name(name, 'mask_lm_trans_fc'), initializer, act=cfg['hidden_act']) + self.mlm_ln = _build_ln(d_model, name=append_name(name, 'mask_lm_trans')) + self.mlm_bias = L.create_parameter( + dtype='float32', + shape=[d_vocab], + attr=F.ParamAttr( + name=append_name(name, 'mask_lm_out_fc.b_0'), initializer=F.initializer.Constant(value=0.0)), + is_bias=True, + ) + + def forward(self, src_ids, *args, **kwargs): + tgt_labels = kwargs.pop('tgt_labels', None) + tgt_pos = kwargs.pop('tgt_pos', None) + encode_only = kwargs.pop('encode_only', False) + _, encoded, info = ErnieModel.forward(self, src_ids, *args, **kwargs) + if encode_only: + return None, None, info + elif tgt_labels is None: + encoded = self.mlm(encoded) + encoded = self.mlm_ln(encoded) + logits = L.matmul(encoded, self.word_emb.weight, transpose_y=True) + self.mlm_bias + output_ids = L.argmax(logits, -1) + return output_ids, logits, info + else: + encoded_2d = L.gather_nd(encoded, tgt_pos) + encoded_2d = self.mlm(encoded_2d) + encoded_2d = self.mlm_ln(encoded_2d) + logits_2d = L.matmul(encoded_2d, self.word_emb.weight, transpose_y=True) + self.mlm_bias + if len(tgt_labels.shape) == 1: + tgt_labels = L.reshape(tgt_labels, [-1, 1]) + + loss = L.reduce_mean( + L.softmax_with_cross_entropy(logits_2d, tgt_labels, soft_label=(tgt_labels.shape[-1] != 1))) + return loss, logits_2d, info diff --git a/modules/text/text_generation/ernie_gen/template/model/tokenizing_ernie.py b/modules/text/text_generation/ernie_gen/template/model/tokenizing_ernie.py new file mode 100644 index 00000000..c9e5638f --- /dev/null +++ b/modules/text/text_generation/ernie_gen/template/model/tokenizing_ernie.py @@ -0,0 +1,163 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import six +import re +import logging +from functools import partial + +import numpy as np + +import io + +open = partial(io.open, encoding='utf8') + +log = logging.getLogger(__name__) + +_max_input_chars_per_word = 100 + + +def _wordpiece(token, vocab, unk_token, prefix='##', sentencepiece_prefix=''): + """ wordpiece: helloworld => [hello, ##world] """ + chars = list(token) + if len(chars) > _max_input_chars_per_word: + return [unk_token], [(0, len(chars))] + + is_bad = False + start = 0 + sub_tokens = [] + sub_pos = [] + while start < len(chars): + end = len(chars) + cur_substr = None + while start < end: + substr = "".join(chars[start:end]) + if start == 0: + substr = sentencepiece_prefix + substr + if start > 0: + substr = prefix + substr + if substr in vocab: + cur_substr = substr + break + end -= 1 + if cur_substr is None: + is_bad = True + break + sub_tokens.append(cur_substr) + sub_pos.append((start, end)) + start = end + if is_bad: + return [unk_token], [(0, len(chars))] + else: + return sub_tokens, sub_pos + + +class ErnieTokenizer(object): + def __init__(self, + vocab, + unk_token='[UNK]', + sep_token='[SEP]', + cls_token='[CLS]', + pad_token='[PAD]', + mask_token='[MASK]', + wordpiece_prefix='##', + sentencepiece_prefix='', + lower=True, + encoding='utf8', + special_token_list=[]): + if not isinstance(vocab, dict): + raise ValueError('expect `vocab` to be instance of dict, got %s' % type(vocab)) + self.vocab = vocab + self.lower = lower + self.prefix = wordpiece_prefix + self.sentencepiece_prefix = sentencepiece_prefix + self.pad_id = self.vocab[pad_token] + self.cls_id = cls_token and self.vocab[cls_token] + self.sep_id = sep_token and self.vocab[sep_token] + self.unk_id = unk_token and self.vocab[unk_token] + self.mask_id = mask_token and self.vocab[mask_token] + self.unk_token = unk_token + special_tokens = {pad_token, cls_token, sep_token, unk_token, mask_token} | set(special_token_list) + pat_str = '' + for t in special_tokens: + if t is None: + continue + pat_str += '(%s)|' % re.escape(t) + pat_str += r'([a-zA-Z0-9]+|\S)' + log.debug('regex: %s' % pat_str) + self.pat = re.compile(pat_str) + self.encoding = encoding + + def tokenize(self, text): + if len(text) == 0: + return [] + if six.PY3 and not isinstance(text, six.string_types): + text = text.decode(self.encoding) + if six.PY2 and isinstance(text, str): + text = text.decode(self.encoding) + + res = [] + for match in self.pat.finditer(text): + match_group = match.group(0) + if match.groups()[-1]: + if self.lower: + match_group = match_group.lower() + words, _ = _wordpiece( + match_group, + vocab=self.vocab, + unk_token=self.unk_token, + prefix=self.prefix, + sentencepiece_prefix=self.sentencepiece_prefix) + else: + words = [match_group] + res += words + return res + + def convert_tokens_to_ids(self, tokens): + return [self.vocab.get(t, self.unk_id) for t in tokens] + + def truncate(self, id1, id2, seqlen): + len1 = len(id1) + len2 = len(id2) + half = seqlen // 2 + if len1 > len2: + len1_truncated, len2_truncated = max(half, seqlen - len2), min(half, len2) + else: + len1_truncated, len2_truncated = min(half, seqlen - len1), max(half, seqlen - len1) + return id1[:len1_truncated], id2[:len2_truncated] + + def build_for_ernie(self, text_id, pair_id=[]): + """build sentence type id, add [CLS] [SEP]""" + text_id_type = np.zeros_like(text_id, dtype=np.int64) + ret_id = np.concatenate([[self.cls_id], text_id, [self.sep_id]], 0) + ret_id_type = np.concatenate([[0], text_id_type, [0]], 0) + + if len(pair_id): + pair_id_type = np.ones_like(pair_id, dtype=np.int64) + ret_id = np.concatenate([ret_id, pair_id, [self.sep_id]], 0) + ret_id_type = np.concatenate([ret_id_type, pair_id_type, [1]], 0) + return ret_id, ret_id_type + + def encode(self, text, pair=None, truncate_to=None): + text_id = np.array(self.convert_tokens_to_ids(self.tokenize(text)), dtype=np.int64) + text_id_type = np.zeros_like(text_id, dtype=np.int64) + if pair is not None: + pair_id = np.array(self.convert_tokens_to_ids(self.tokenize(pair)), dtype=np.int64) + else: + pair_id = [] + if truncate_to is not None: + text_id, pair_id = self.truncate(text_id, [] if pair_id is None else pair_id, truncate_to) + + ret_id, ret_id_type = self.build_for_ernie(text_id, pair_id) + return ret_id, ret_id_type diff --git a/modules/text/text_generation/ernie_gen/template/module.temp b/modules/text/text_generation/ernie_gen/template/module.temp new file mode 100644 index 00000000..4a739a7c --- /dev/null +++ b/modules/text/text_generation/ernie_gen/template/module.temp @@ -0,0 +1,177 @@ +# coding:utf-8 +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import ast +import json + +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.module.module import runnable +from paddlehub.module.nlp_module import DataFormatError +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo, serving + +import argparse +import os +import numpy as np + +import paddle.fluid.dygraph as D + +from .model.tokenizing_ernie import ErnieTokenizer +from .model.decode import beam_search_infilling +from .model.modeling_ernie_gen import ErnieModelForGeneration + + +@moduleinfo( + name="{module_name}", + version="{version}", + summary= + "{summary}", + author="{author}", + author_email="{author_email}", + type="nlp/text_generation", +) +class ErnieGen(hub.NLPPredictionModule): + def _initialize(self): + """ + initialize with the necessary elements + """ + assets_path = os.path.join(self.directory, "assets") + gen_checkpoint_path = os.path.join(assets_path, "ernie_gen") + ernie_cfg_path = os.path.join(assets_path, 'ernie_config.json') + with open(ernie_cfg_path, encoding='utf8') as ernie_cfg_file: + ernie_cfg = dict(json.loads(ernie_cfg_file.read())) + ernie_vocab_path = os.path.join(assets_path, 'vocab.txt') + with open(ernie_vocab_path, encoding='utf8') as ernie_vocab_file: + ernie_vocab = { + j.strip().split('\t')[0]: i + for i, j in enumerate(ernie_vocab_file.readlines()) + } + + with fluid.dygraph.guard(fluid.CPUPlace()): + with fluid.unique_name.guard(): + self.model = ErnieModelForGeneration(ernie_cfg) + finetuned_states, _ = D.load_dygraph(gen_checkpoint_path) + self.model.set_dict(finetuned_states) + + self.tokenizer = ErnieTokenizer(ernie_vocab) + self.rev_dict = {v: k for k, v in self.tokenizer.vocab.items()} + self.rev_dict[self.tokenizer.pad_id] = '' # replace [PAD] + self.rev_dict[self.tokenizer.unk_id] = '' # replace [PAD] + self.rev_lookup = np.vectorize(lambda i: self.rev_dict[i]) + + @serving + def generate(self, texts, use_gpu=False, beam_width=5): + """ + Get the predict result from the input texts. + + Args: + texts(list): the input texts. + use_gpu(bool): whether use gpu to predict or not + beam_width(int): the beam search width. + + Returns: + results(list): the predict result. + """ + if texts and isinstance(texts, list) and all(texts) and all( + [isinstance(text, str) for text in texts]): + predicted_data = texts + else: + raise ValueError( + "The input texts should be a list with nonempty string elements." + ) + + if use_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ: + use_gpu = False + logger.warning( + "use_gpu has been set False as you didn't set the environment variable CUDA_VISIBLE_DEVICES while using use_gpu=True" + ) + if use_gpu: + place = fluid.CUDAPlace(0) + else: + place = fluid.CPUPlace() + + with fluid.dygraph.guard(place): + self.model.eval() + results = [] + for text in predicted_data: + sample_results = [] + ids, sids = self.tokenizer.encode(text) + src_ids = D.to_variable(np.expand_dims(ids, 0)) + src_sids = D.to_variable(np.expand_dims(sids, 0)) + output_ids = beam_search_infilling( + self.model, + src_ids, + src_sids, + eos_id=self.tokenizer.sep_id, + sos_id=self.tokenizer.cls_id, + attn_id=self.tokenizer.vocab['[MASK]'], + max_decode_len={max_decode_len}, + max_encode_len={max_encode_len}, + beam_width=beam_width, + tgt_type_id=1) + output_str = self.rev_lookup(output_ids[0].numpy()) + + for ostr in output_str.tolist(): + if '[SEP]' in ostr: + ostr = ostr[:ostr.index('[SEP]')] + sample_results.append("".join(ostr)) + results.append(sample_results) + return results + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU for prediction") + + self.arg_config_group.add_argument( + '--beam_width', type=int, default=5, help="the beam search width") + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description='Run the %s module.' % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group( + title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", + description= + "Run configuration for controlling module behavior, optional.") + + self.add_module_config_arg() + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + + try: + input_data = self.check_input_data(args) + except DataFormatError and RuntimeError: + self.parser.print_help() + return None + + results = self.generate( + texts=input_data, use_gpu=args.use_gpu, beam_width=args.beam_width) + + return results \ No newline at end of file diff --git a/modules/text/text_generation/ernie_gen/test_data/dev.txt b/modules/text/text_generation/ernie_gen/test_data/dev.txt new file mode 100644 index 00000000..685d461e --- /dev/null +++ b/modules/text/text_generation/ernie_gen/test_data/dev.txt @@ -0,0 +1,6 @@ +1 入林不动草,入水不动波。 镬汤无冷处,合眼跳黄河。 +2 画师端为谁,钓者亦安在。 我不识若人,相望大千内。 +3 何必关山远,凉风在殿西。 箫声犹袅袅,舞袖忽凄凄。 +4 不记门前路,门前一尺深。 梅花如有语,参透老逋心。 +5 石桥跨两岫,野叟尝远蹠。 旁有枰棋处,云是仙人奕。 +6 顾渚吴商绝,蒙山蜀信稀。 千丛因此始,含霞紫英肥。 diff --git a/modules/text/text_generation/ernie_gen/test_data/train.txt b/modules/text/text_generation/ernie_gen/test_data/train.txt new file mode 100644 index 00000000..1e9077e7 --- /dev/null +++ b/modules/text/text_generation/ernie_gen/test_data/train.txt @@ -0,0 +1,24 @@ +1 落叶频惊鹿,连峰欲映雕。 此生诗病苦,此病更萧条。 +2 汴水夹榆柳,今留胡马踪。 如何进贤路,只是见青松。 +3 夜行无月时,古路多荒榛。 山鬼摇把火,自照不照人。 +4 春到村居好,园林兴味长。 蚕贪桑眼出,蜂趁蜜脾忙。 +5 麀鹿同呦呦,山林风雨秋。 姑苏台上月,子胥曾约游。 +6 谢家庭下玉,化此青琅玕。 风标敻不俗,谁谓骨相寒。 +7 团团青枫阴,绰绰万间屋。 下有避俗翁,扫石叠两足。 +8 万壑摇苍烟,百滩度流水。 下有骑馿人,萧萧吹冻耳。 +9 太平蜀雀异,仍映碧桃间。 一秀三千岁,高枝永共攀。 +10 壁带非烟润,金铺霁景鲜。 绣功添采缕,和气入繁弦。 +11 苔寒两不借,对面宁尔劳。 欲语二三子,卑之毋甚高。 +12 肌细分红脉,香浓破紫苞。 无因留得翫,争忍折来抛。 +13 流水难穷目,斜阳易断肠。 谁同砑光帽,一曲舞山香。 +14 孙儿正啼哭,母言来与金。 捻他黄叶把,便是正声音。 +15 多病苦虚羸,晴明强展眉。 读书心绪少,闲卧日长时。 +16 去国投兹土,编茅隐旧踪。 年年秋水上,独对数株松。 +17 易觉春风老,偏知夏日长。 四山新笋出,一涧野花香。 +18 门拥千峰翠,溪无一点尘。 松风清入耳,山月白随人。 +19 款款穿芳径,双双度短墙。 不知身是幻,抵死恋花香。 +20 游目贝叶书,究竟华严境。 当年寓名心,观者要深省。 +21 声求不可求,见迹不寻牛。 迹在牛还在,不求何自休。 +22 学道如钻火,逢烟未可休。 直待金星现,曹门取郑州。 +23 修证彼何人,有国号众香。 此境了不殊,沉檀蔼飞扬。 +24 山中砖塔闭,松下影堂新。 恨不生前识,今朝礼画身。 diff --git a/modules/text/text_generation/plato2_en_base/utils/tokenization.py b/modules/text/text_generation/plato2_en_base/utils/tokenization.py index 8c5edcc0..c8596828 100644 --- a/modules/text/text_generation/plato2_en_base/utils/tokenization.py +++ b/modules/text/text_generation/plato2_en_base/utils/tokenization.py @@ -79,22 +79,9 @@ def encode_ids(spm_model, text, sample=False): def convert_to_unicode(text): """Converts `text` to Unicode (if it's not already), assuming utf-8 input.""" - if six.PY3: - if isinstance(text, str): - return text - elif isinstance(text, bytes): - return text.decode("utf-8", "ignore") - else: - raise ValueError("Unsupported string type: %s" % (type(text))) - elif six.PY2: - if isinstance(text, str): - return text.decode("utf-8", "ignore") - elif isinstance(text, unicode): - return text - else: - raise ValueError("Unsupported string type: %s" % (type(text))) - else: - raise ValueError("Not running on Python2 or Python 3?") + if isinstance(text, six.binary_type): + return text.decode("utf-8", "ignore") + return text def load_vocab(vocab_file): diff --git a/modules/text/text_generation/plato2_en_large/utils/tokenization.py b/modules/text/text_generation/plato2_en_large/utils/tokenization.py index e07ab266..216ef86b 100644 --- a/modules/text/text_generation/plato2_en_large/utils/tokenization.py +++ b/modules/text/text_generation/plato2_en_large/utils/tokenization.py @@ -79,22 +79,9 @@ def encode_ids(spm_model, text, sample=False): def convert_to_unicode(text): """Converts `text` to Unicode (if it's not already), assuming utf-8 input.""" - if six.PY3: - if isinstance(text, str): - return text - elif isinstance(text, bytes): - return text.decode("utf-8", "ignore") - else: - raise ValueError("Unsupported string type: %s" % (type(text))) - elif six.PY2: - if isinstance(text, str): - return text.decode("utf-8", "ignore") - elif isinstance(text, unicode): - return text - else: - raise ValueError("Unsupported string type: %s" % (type(text))) - else: - raise ValueError("Not running on Python2 or Python 3?") + if isinstance(text, six.binary_type): + return text.decode("utf-8", "ignore") + return text def load_vocab(vocab_file): diff --git a/modules/text/text_generation/reading_pictures_writing_poems/README.md b/modules/text/text_generation/reading_pictures_writing_poems/README.md new file mode 100644 index 00000000..ecc9ad2c --- /dev/null +++ b/modules/text/text_generation/reading_pictures_writing_poems/README.md @@ -0,0 +1,42 @@ +reading_pictures_writing_poems +类别 文本 - 文本生成 + +# 模型概述 +看图写诗(reading_pictures_writing_poems),该模型可自动根据图像生成古诗词。该PaddleHub Module支持预测。 + +# 选择模型版本进行安装 +$ hub install reading_pictures_writing_poems==1.0.0 + +# 命令行预测示例 +$ hub run reading_pictures_writing_poems --input_image "scenery.jpg" + +![](https://ai-studio-static-online.cdn.bcebos.com/69a9d5a5472449678a08e1ee5066c81b5859827647d74eb8a674afabbc205ae5) +
AI根据这张图片生成的古诗是:
+- 蕾蕾海河海,岳峰岳麓蔓。 +- 不萌枝上春,自结心中线。 + +
+怎么样?还不错吧! +# Module API说明 +## WritingPoem(self, image, use_gpu=False) +看图写诗预测接口,预测输入一张图像,输出一首古诗词 +### 参数 +- image(str): 待检测的图片路径 +- use_gpu (bool): 是否使用 GPU +### 返回 +- results (list[dict]): 识别结果的列表,列表中每一个元素为 dict,关键字有 image,Poetrys, 其中: +image字段为原输入图片的路径 +Poetrys字段为输出的古诗词 + +# 代码示例 +import paddlehub as hub + +readingPicturesWritingPoems = hub.Module(directory="./reading_pictures_writing_poems") +readingPicturesWritingPoems.WritingPoem(image = "scenery.jpg", use_gpu=True) + +# 贡献者 +郑博培、彭兆帅 + +# 依赖 +paddlepaddle >= 1.8.2 +paddlehub >= 1.8.0 diff --git a/modules/text/text_generation/reading_pictures_writing_poems/__init__.py b/modules/text/text_generation/reading_pictures_writing_poems/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modules/text/text_generation/reading_pictures_writing_poems/module.py b/modules/text/text_generation/reading_pictures_writing_poems/module.py new file mode 100644 index 00000000..d03b02a5 --- /dev/null +++ b/modules/text/text_generation/reading_pictures_writing_poems/module.py @@ -0,0 +1,137 @@ +import argparse +import ast +import os +import math +import six + +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import runnable, serving, moduleinfo +from paddlehub.io.parser import txt_parser +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from translate import Translator + + +@moduleinfo( + name="reading_pictures_writing_poems", + version="1.0.0", + summary="Just for test", + author="Mr.郑先生_", + author_email="2733821739@qq.com", + type="nlp/text_generation") +class ReadingPicturesWritingPoems(hub.Module): + def _initialize(self): + """ + Initialize with the necessary elements + """ + self.pretrained_model_path = os.path.join(self.directory, "assets", "infer_model") + self.module_image = hub.Module(name="xception71_imagenet") # 调用图像分类的模型 + self.module_similar = hub.Module(name="ernie_gen_couplet") # 调用对联生成的模型 + self.module_poem = hub.Module(name="ernie_gen_poetry") # 调用古诗生成的模型 + + def is_chinese(self, string): + """ + 检查整个字符串是否为中文 + Args: + string (str): 需要检查的字符串,包含空格也是False + Return + bool + """ + if (len(string) <= 1): # 去除只有单个字或者为空的字符串 + return False + + for chart in string: # 把除了中文的所有字母、数字、符号去除 + if (chart < u'\u4e00' or chart > u'\u9fff'): + return False + + return True + + def WritingPoem(self, image, use_gpu=False): + input_dict = {"image": [image]} + results_image = self.module_image.classification(data=input_dict) + PictureClassification = list(results_image[0][0].keys())[0] + translator = Translator(to_lang="chinese") + PictureClassification_ch = translator.translate("{}".format(PictureClassification)) + texts = ["{}".format(PictureClassification_ch)] + results_keywords = self.module_similar.generate(texts=texts, use_gpu=use_gpu, beam_width=20) + Words = [] # 将符合标准的近义词保存在这里(标准:字符串为中文且长度大于1) + for item in range(20): + if (self.is_chinese(results_keywords[0][item])): + Words.append(results_keywords[0][item]) + # 古诗的一句可以拆分成许多词语,因此这里先找到能合成古诗的词语 + FirstWord = Words[0] + Words[1] + SecondWord = Words[2] + Words[3] + ThirdWord = Words[4] + Words[5] + FourthWord = Words[6] + Words[7] + # 出句和对句,也可以理解为上下句(专业讲法是出句和对句,古诗词是中国传统文化,出句和对句的英文翻译即拼音) + ChuJu = FirstWord + SecondWord # 出句 + DuiJu = ThirdWord + FourthWord # 对句 + FirstPoetry = ["{:.5},{:.5}。".format(ChuJu, DuiJu)] # 古诗词的上阕 + results = self.module_poem.generate(texts=FirstPoetry, use_gpu=use_gpu, beam_width=5) + SecondPoetry = ["{:.12}".format(results[0][0])] + Poetrys = [] + Poetrys.append(FirstPoetry) + Poetrys.append(SecondPoetry) + print("根据图片生成的古诗词:") + print("{}".format(Poetrys[0][0])) + print("{}".format(Poetrys[1][0])) + results = [{'image': image, 'Poetrys': "{}".format(Poetrys[0][0] + Poetrys[1][0])}] + + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description='Run the %s module.' % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + + self.add_module_config_arg() + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + + try: + input_data = self.check_input_data(args) + except RuntimeError: + self.parser.print_help() + return None + + results = self.WritingPoem(input_data) + + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU for prediction") + + def add_module_input_arg(self): + """ + Add the command input options + """ + self.arg_input_group.add_argument('--input_image', type=str, default=None, help="Pictures to write poetry") + + def check_input_data(self, args): + input_data = [] + if args.input_image: + if not os.path.exists(args.input_image): + raise RuntimeError("File %s is not exist." % args.input_image) + else: + input_data = args.input_image + + if input_data == []: + raise RuntimeError("The input data is inconsistent with expectations.") + + return input_data diff --git a/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnDetection/__init__.py b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnDetection/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnDetection/module.py b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnDetection/module.py new file mode 100644 index 00000000..323dea76 --- /dev/null +++ b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnDetection/module.py @@ -0,0 +1,123 @@ +from __future__ import absolute_import +from __future__ import division + +import os +import cv2 +import argparse +import base64 +import paddlex as pdx + +import numpy as np +import paddlehub as hub +from paddlehub.module.module import moduleinfo, runnable, serving + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def cv2_to_base64(image): + # return base64.b64encode(image) + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def read_images(paths): + images = [] + for path in paths: + images.append(cv2.imread(path)) + return images + + +@moduleinfo( + name='MidAutumnDetection', + type='CV', + author='彭兆帅,郑博培', + author_email='1084667371@qq.com,2733821739@qq.com', + summary='', + version='1.0.0') +class MODULE(hub.Module): + def _initialize(self, **kwargs): + self.default_pretrained_model_path = os.path.join(self.directory, 'assets') + self.model = pdx.deploy.Predictor(self.default_pretrained_model_path, **kwargs) + + def predict(self, images=None, paths=None, data=None, batch_size=1, use_gpu=False, **kwargs): + + all_data = images if images is not None else read_images(paths) + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + res = [] + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except IndexError: + break + out = self.model.batch_predict(batch_data, **kwargs) + res.extend(out) + return res + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.predict(images_decode, **kwargs) + res = [] + for result in results: + if isinstance(result, dict): + # result_new = dict() + for key, value in result.items(): + if isinstance(value, np.ndarray): + result[key] = cv2_to_base64(value) + elif isinstance(value, np.generic): + result[key] = np.asscalar(value) + + elif isinstance(result, list): + for index in range(len(result)): + for key, value in result[index].items(): + if isinstance(value, np.ndarray): + result[index][key] = cv2_to_base64(value) + elif isinstance(value, np.generic): + result[index][key] = np.asscalar(value) + else: + raise RuntimeError('The result cannot be used in serving.') + res.append(result) + return res + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.predict(paths=[args.input_path], use_gpu=args.use_gpu) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', type=bool, default=False, help="whether use GPU or not") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") diff --git a/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/__init__.py b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/decode.py b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/decode.py new file mode 100644 index 00000000..d07a58b5 --- /dev/null +++ b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/decode.py @@ -0,0 +1,259 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import numpy as np +from collections import namedtuple + +import paddle.fluid as F +import paddle.fluid.layers as L +import paddle.fluid.dygraph as D + + +def gen_bias(encoder_inputs, decoder_inputs, step): + decoder_bsz, decoder_seqlen = decoder_inputs.shape[:2] + attn_bias = L.reshape(L.range(0, decoder_seqlen, 1, dtype='float32') + 1, [1, -1, 1]) + decoder_bias = L.cast((L.matmul(attn_bias, 1. / attn_bias, transpose_y=True) >= 1.), + 'float32') #[1, 1, decoderlen, decoderlen] + encoder_bias = L.unsqueeze(L.cast(L.ones_like(encoder_inputs), 'float32'), [1]) #[bsz, 1, encoderlen] + encoder_bias = L.expand(encoder_bias, [1, decoder_seqlen, 1]) #[bsz,decoderlen, encoderlen] + decoder_bias = L.expand(decoder_bias, [decoder_bsz, 1, 1]) #[bsz, decoderlen, decoderlen] + if step > 0: + bias = L.concat([encoder_bias, L.ones([decoder_bsz, decoder_seqlen, step], 'float32'), decoder_bias], -1) + else: + bias = L.concat([encoder_bias, decoder_bias], -1) + return bias + + +@D.no_grad +def greedy_search_infilling(model, + q_ids, + q_sids, + sos_id, + eos_id, + attn_id, + max_encode_len=640, + max_decode_len=100, + tgt_type_id=3): + model.eval() + _, logits, info = model(q_ids, q_sids) + gen_ids = L.argmax(logits, -1) + d_batch, d_seqlen = q_ids.shape + seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) + has_stopped = np.zeros([d_batch], dtype=np.bool) + gen_seq_len = np.zeros([d_batch], dtype=np.int64) + output_ids = [] + + past_cache = info['caches'] + + cls_ids = L.ones([d_batch], dtype='int64') * sos_id + attn_ids = L.ones([d_batch], dtype='int64') * attn_id + ids = L.stack([cls_ids, attn_ids], -1) + for step in range(max_decode_len): + bias = gen_bias(q_ids, ids, step) + pos_ids = D.to_variable(np.tile(np.array([[step, step + 1]], dtype=np.int64), [d_batch, 1])) + pos_ids += seqlen + _, logits, info = model( + ids, L.ones_like(ids) * tgt_type_id, pos_ids=pos_ids, attn_bias=bias, past_cache=past_cache) + gen_ids = L.argmax(logits, -1) + + past_cached_k, past_cached_v = past_cache + cached_k, cached_v = info['caches'] + cached_k = [L.concat([pk, k[:, :1, :]], 1) for pk, k in zip(past_cached_k, cached_k)] # concat cached + cached_v = [L.concat([pv, v[:, :1, :]], 1) for pv, v in zip(past_cached_v, cached_v)] + past_cache = (cached_k, cached_v) + + gen_ids = gen_ids[:, 1] + ids = L.stack([gen_ids, attn_ids], 1) + + gen_ids = gen_ids.numpy() + has_stopped |= (gen_ids == eos_id).astype(np.bool) + gen_seq_len += (1 - has_stopped.astype(np.int64)) + output_ids.append(gen_ids.tolist()) + if has_stopped.all(): + break + output_ids = np.array(output_ids).transpose([1, 0]) + return output_ids + + +BeamSearchState = namedtuple('BeamSearchState', ['log_probs', 'lengths', 'finished']) +BeamSearchOutput = namedtuple('BeamSearchOutput', ['scores', 'predicted_ids', 'beam_parent_ids']) + + +def log_softmax(x): + e_x = np.exp(x - np.max(x)) + return np.log(e_x / e_x.sum()) + + +def mask_prob(p, onehot_eos, finished): + is_finished = L.cast(L.reshape(finished, [-1, 1]) != 0, 'float32') + p = is_finished * (1. - L.cast(onehot_eos, 'float32')) * -9999. + (1. - is_finished) * p + return p + + +def hyp_score(log_probs, length, length_penalty): + lp = L.pow((5. + L.cast(length, 'float32')) / 6., length_penalty) + return log_probs / lp + + +def beam_search_step(state, logits, eos_id, beam_width, is_first_step, length_penalty): + """logits.shape == [B*W, V]""" + beam_size, vocab_size = logits.shape # as batch size=1 in this hub module. the first dim means bsz * beam_size equals beam_size + logits_np = logits.numpy() + for i in range(beam_size): + logits_np[i][17963] = 0 # make [UNK] prob = 0 + logits = D.to_variable(logits_np) + + bsz, beam_width = state.log_probs.shape + onehot_eos = L.cast(F.one_hot(L.ones([1], 'int64') * eos_id, vocab_size), 'int64') #[1, V] + + probs = L.log(L.softmax(logits)) #[B*W, V] + probs = mask_prob(probs, onehot_eos, state.finished) #[B*W, V] + allprobs = L.reshape(state.log_probs, [-1, 1]) + probs #[B*W, V] + + not_finished = 1 - L.reshape(state.finished, [-1, 1]) #[B*W,1] + not_eos = 1 - onehot_eos + length_to_add = not_finished * not_eos #[B*W,V] + alllen = L.reshape(state.lengths, [-1, 1]) + length_to_add + + allprobs = L.reshape(allprobs, [-1, beam_width * vocab_size]) + alllen = L.reshape(alllen, [-1, beam_width * vocab_size]) + allscore = hyp_score(allprobs, alllen, length_penalty) + if is_first_step: + allscore = L.reshape(allscore, [bsz, beam_width, -1])[:, 0, :] # first step only consiter beam 0 + scores, idx = L.topk(allscore, k=beam_width) #[B, W] + next_beam_id = idx // vocab_size #[B, W] + next_word_id = idx % vocab_size + + gather_idx = L.concat([L.where(idx != -1)[:, :1], L.reshape(idx, [-1, 1])], 1) + next_probs = L.reshape(L.gather_nd(allprobs, gather_idx), idx.shape) + next_len = L.reshape(L.gather_nd(alllen, gather_idx), idx.shape) + + gather_idx = L.concat([L.where(next_beam_id != -1)[:, :1], L.reshape(next_beam_id, [-1, 1])], 1) + next_finished = L.reshape(L.gather_nd(state.finished, gather_idx), + state.finished.shape) #[gather new beam state according to new beam id] + + next_finished += L.cast(next_word_id == eos_id, 'int64') + next_finished = L.cast(next_finished > 0, 'int64') + + next_state = BeamSearchState(log_probs=next_probs, lengths=next_len, finished=next_finished) + output = BeamSearchOutput(scores=scores, predicted_ids=next_word_id, beam_parent_ids=next_beam_id) + + return output, next_state + + +@D.no_grad +def beam_search_infilling(model, + q_ids, + q_sids, + sos_id, + eos_id, + attn_id, + max_encode_len=640, + max_decode_len=100, + beam_width=5, + tgt_type_id=3, + length_penalty=1.0): + model.eval() + _, __, info = model(q_ids, q_sids) + d_batch, d_seqlen = q_ids.shape + + state = BeamSearchState( + log_probs=L.zeros([d_batch, beam_width], 'float32'), + lengths=L.zeros([d_batch, beam_width], 'int64'), + finished=L.zeros([d_batch, beam_width], 'int64')) + outputs = [] + + def reorder_(t, parent_id): + """reorder cache according to parent beam id""" + gather_idx = L.where(parent_id != -1)[:, 0] * beam_width + L.reshape(parent_id, [-1]) + t = L.gather(t, gather_idx) + return t + + def tile_(t, times): + _shapes = list(t.shape[1:]) + ret = L.reshape(L.expand(L.unsqueeze(t, [1]), [ + 1, + times, + ] + [ + 1, + ] * len(_shapes)), [ + -1, + ] + _shapes) + return ret + + cached_k, cached_v = info['caches'] + cached_k = [tile_(k, beam_width) for k in cached_k] + cached_v = [tile_(v, beam_width) for v in cached_v] + past_cache = (cached_k, cached_v) + + q_ids = tile_(q_ids, beam_width) + seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) + + cls_ids = L.ones([d_batch * beam_width], dtype='int64') * sos_id + attn_ids = L.ones([d_batch * beam_width], dtype='int64') * attn_id # SOS + ids = L.stack([cls_ids, attn_ids], -1) + for step in range(max_decode_len): + bias = gen_bias(q_ids, ids, step) + pos_ids = D.to_variable(np.tile(np.array([[step, step + 1]], dtype=np.int64), [d_batch * beam_width, 1])) + pos_ids += seqlen + + _, logits, info = model( + ids, L.ones_like(ids) * tgt_type_id, pos_ids=pos_ids, attn_bias=bias, past_cache=past_cache) + + output, state = beam_search_step( + state, + logits[:, 1], + eos_id=eos_id, + beam_width=beam_width, + is_first_step=(step == 0), + length_penalty=length_penalty) + outputs.append(output) + + past_cached_k, past_cached_v = past_cache + cached_k, cached_v = info['caches'] + cached_k = [ + reorder_(L.concat([pk, k[:, :1, :]], 1), output.beam_parent_ids) for pk, k in zip(past_cached_k, cached_k) + ] # concat cached + cached_v = [ + reorder_(L.concat([pv, v[:, :1, :]], 1), output.beam_parent_ids) for pv, v in zip(past_cached_v, cached_v) + ] + past_cache = (cached_k, cached_v) + + pred_ids_flatten = L.reshape(output.predicted_ids, [d_batch * beam_width]) + ids = L.stack([pred_ids_flatten, attn_ids], 1) + + if state.finished.numpy().all(): + break + + final_ids = L.stack([o.predicted_ids for o in outputs], 0) + final_parent_ids = L.stack([o.beam_parent_ids for o in outputs], 0) + final_ids = L.gather_tree(final_ids, final_parent_ids) #[:, :, + #0] #pick best beam + final_ids = L.transpose(L.reshape(final_ids, [-1, d_batch * 1, beam_width]), [1, 2, 0]) + return final_ids + + +en_patten = re.compile(r'^[a-zA-Z0-9]*$') + + +def post_process(token): + if token.startswith('##'): + ret = token[2:] + else: + if en_patten.match(token): + ret = ' ' + token + else: + ret = token + return ret diff --git a/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/file_utils.py b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/file_utils.py new file mode 100644 index 00000000..608be4ef --- /dev/null +++ b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/file_utils.py @@ -0,0 +1,46 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +from tqdm import tqdm +from paddlehub.common.logger import logger +from paddlehub.common.dir import MODULE_HOME + + +def _fetch_from_remote(url, force_download=False): + import tempfile, requests, tarfile + cached_dir = os.path.join(MODULE_HOME, "ernie_for_gen") + if force_download or not os.path.exists(cached_dir): + with tempfile.NamedTemporaryFile() as f: + #url = 'https://ernie.bj.bcebos.com/ERNIE_stable.tgz' + r = requests.get(url, stream=True) + total_len = int(r.headers.get('content-length')) + for chunk in tqdm( + r.iter_content(chunk_size=1024), total=total_len // 1024, desc='downloading %s' % url, unit='KB'): + if chunk: + f.write(chunk) + f.flush() + logger.debug('extacting... to %s' % f.name) + with tarfile.open(f.name) as tf: + tf.extractall(path=cached_dir) + logger.debug('%s cached in %s' % (url, cached_dir)) + return cached_dir + + +def add_docstring(doc): + def func(f): + f.__doc__ += ('\n======other docs from supper class ======\n%s' % doc) + return f + + return func diff --git a/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/modeling_ernie.py b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/modeling_ernie.py new file mode 100644 index 00000000..d5de28a5 --- /dev/null +++ b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/modeling_ernie.py @@ -0,0 +1,327 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +import logging + +import paddle.fluid.dygraph as D +import paddle.fluid as F +import paddle.fluid.layers as L + +log = logging.getLogger(__name__) + + +def _build_linear(n_in, n_out, name, init, act=None): + return D.Linear( + n_in, + n_out, + param_attr=F.ParamAttr(name='%s.w_0' % name if name is not None else None, initializer=init), + bias_attr='%s.b_0' % name if name is not None else None, + act=act) + + +def _build_ln(n_in, name): + return D.LayerNorm( + normalized_shape=n_in, + param_attr=F.ParamAttr( + name='%s_layer_norm_scale' % name if name is not None else None, initializer=F.initializer.Constant(1.)), + bias_attr=F.ParamAttr( + name='%s_layer_norm_bias' % name if name is not None else None, initializer=F.initializer.Constant(1.)), + ) + + +def append_name(name, postfix): + if name is None: + return None + elif name == '': + return postfix + else: + return '%s_%s' % (name, postfix) + + +class AttentionLayer(D.Layer): + def __init__(self, cfg, name=None): + super(AttentionLayer, self).__init__() + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + d_model = cfg['hidden_size'] + n_head = cfg['num_attention_heads'] + assert d_model % n_head == 0 + d_model_q = cfg.get('query_hidden_size_per_head', d_model // n_head) * n_head + d_model_v = cfg.get('value_hidden_size_per_head', d_model // n_head) * n_head + self.n_head = n_head + self.d_key = d_model_q // n_head + self.q = _build_linear(d_model, d_model_q, append_name(name, 'query_fc'), initializer) + self.k = _build_linear(d_model, d_model_q, append_name(name, 'key_fc'), initializer) + self.v = _build_linear(d_model, d_model_v, append_name(name, 'value_fc'), initializer) + self.o = _build_linear(d_model_v, d_model, append_name(name, 'output_fc'), initializer) + self.dropout = lambda i: L.dropout( + i, + dropout_prob=cfg['attention_probs_dropout_prob'], + dropout_implementation="upscale_in_train", + ) if self.training else i + + def forward(self, queries, keys, values, attn_bias, past_cache): + assert len(queries.shape) == len(keys.shape) == len(values.shape) == 3 + + q = self.q(queries) + k = self.k(keys) + v = self.v(values) + + cache = (k, v) + if past_cache is not None: + cached_k, cached_v = past_cache + k = L.concat([cached_k, k], 1) + v = L.concat([cached_v, v], 1) + + q = L.transpose(L.reshape(q, [0, 0, self.n_head, q.shape[-1] // self.n_head]), + [0, 2, 1, 3]) #[batch, head, seq, dim] + k = L.transpose(L.reshape(k, [0, 0, self.n_head, k.shape[-1] // self.n_head]), + [0, 2, 1, 3]) #[batch, head, seq, dim] + v = L.transpose(L.reshape(v, [0, 0, self.n_head, v.shape[-1] // self.n_head]), + [0, 2, 1, 3]) #[batch, head, seq, dim] + + q = L.scale(q, scale=self.d_key**-0.5) + score = L.matmul(q, k, transpose_y=True) + if attn_bias is not None: + score += attn_bias + score = L.softmax(score, use_cudnn=True) + score = self.dropout(score) + + out = L.matmul(score, v) + out = L.transpose(out, [0, 2, 1, 3]) + out = L.reshape(out, [0, 0, out.shape[2] * out.shape[3]]) + + out = self.o(out) + return out, cache + + +class PositionwiseFeedForwardLayer(D.Layer): + def __init__(self, cfg, name=None): + super(PositionwiseFeedForwardLayer, self).__init__() + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + d_model = cfg['hidden_size'] + d_ffn = cfg.get('intermediate_size', 4 * d_model) + assert cfg['hidden_act'] in ['relu', 'gelu'] + self.i = _build_linear(d_model, d_ffn, append_name(name, 'fc_0'), initializer, act=cfg['hidden_act']) + self.o = _build_linear(d_ffn, d_model, append_name(name, 'fc_1'), initializer) + prob = cfg.get('intermediate_dropout_prob', 0.) + self.dropout = lambda i: L.dropout( + i, + dropout_prob=prob, + dropout_implementation="upscale_in_train", + ) if self.training else i + + def forward(self, inputs): + hidden = self.i(inputs) + hidden = self.dropout(hidden) + out = self.o(hidden) + return out + + +class ErnieBlock(D.Layer): + def __init__(self, cfg, name=None): + super(ErnieBlock, self).__init__() + d_model = cfg['hidden_size'] + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + + self.attn = AttentionLayer(cfg, name=append_name(name, 'multi_head_att')) + self.ln1 = _build_ln(d_model, name=append_name(name, 'post_att')) + self.ffn = PositionwiseFeedForwardLayer(cfg, name=append_name(name, 'ffn')) + self.ln2 = _build_ln(d_model, name=append_name(name, 'post_ffn')) + prob = cfg.get('intermediate_dropout_prob', cfg['hidden_dropout_prob']) + self.dropout = lambda i: L.dropout( + i, + dropout_prob=prob, + dropout_implementation="upscale_in_train", + ) if self.training else i + + def forward(self, inputs, attn_bias=None, past_cache=None): + attn_out, cache = self.attn(inputs, inputs, inputs, attn_bias, past_cache=past_cache) #self attn + attn_out = self.dropout(attn_out) + hidden = attn_out + inputs + hidden = self.ln1(hidden) # dropout/ add/ norm + + ffn_out = self.ffn(hidden) + ffn_out = self.dropout(ffn_out) + hidden = ffn_out + hidden + hidden = self.ln2(hidden) + return hidden, cache + + +class ErnieEncoderStack(D.Layer): + def __init__(self, cfg, name=None): + super(ErnieEncoderStack, self).__init__() + n_layers = cfg['num_hidden_layers'] + self.block = D.LayerList([ErnieBlock(cfg, append_name(name, 'layer_%d' % i)) for i in range(n_layers)]) + + def forward(self, inputs, attn_bias=None, past_cache=None): + if past_cache is not None: + assert isinstance( + past_cache, + tuple), 'unknown type of `past_cache`, expect tuple or list. got %s' % repr(type(past_cache)) + past_cache = list(zip(*past_cache)) + else: + past_cache = [None] * len(self.block) + cache_list_k, cache_list_v, hidden_list = [], [], [inputs] + + for b, p in zip(self.block, past_cache): + inputs, cache = b(inputs, attn_bias=attn_bias, past_cache=p) + cache_k, cache_v = cache + cache_list_k.append(cache_k) + cache_list_v.append(cache_v) + hidden_list.append(inputs) + + return inputs, hidden_list, (cache_list_k, cache_list_v) + + +class ErnieModel(D.Layer): + def __init__(self, cfg, name=None): + """ + Fundamental pretrained Ernie model + """ + log.debug('init ErnieModel with config: %s' % repr(cfg)) + D.Layer.__init__(self) + d_model = cfg['hidden_size'] + d_emb = cfg.get('emb_size', cfg['hidden_size']) + d_vocab = cfg['vocab_size'] + d_pos = cfg['max_position_embeddings'] + d_sent = cfg.get("sent_type_vocab_size") or cfg['type_vocab_size'] + self.n_head = cfg['num_attention_heads'] + self.return_additional_info = cfg.get('return_additional_info', False) + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + + self.ln = _build_ln(d_model, name=append_name(name, 'pre_encoder')) + self.word_emb = D.Embedding([d_vocab, d_emb], + param_attr=F.ParamAttr( + name=append_name(name, 'word_embedding'), initializer=initializer)) + self.pos_emb = D.Embedding([d_pos, d_emb], + param_attr=F.ParamAttr( + name=append_name(name, 'pos_embedding'), initializer=initializer)) + self.sent_emb = D.Embedding([d_sent, d_emb], + param_attr=F.ParamAttr( + name=append_name(name, 'sent_embedding'), initializer=initializer)) + prob = cfg['hidden_dropout_prob'] + self.dropout = lambda i: L.dropout( + i, + dropout_prob=prob, + dropout_implementation="upscale_in_train", + ) if self.training else i + + self.encoder_stack = ErnieEncoderStack(cfg, append_name(name, 'encoder')) + if cfg.get('has_pooler', True): + self.pooler = _build_linear( + cfg['hidden_size'], cfg['hidden_size'], append_name(name, 'pooled_fc'), initializer, act='tanh') + else: + self.pooler = None + self.train() + + def eval(self): + if F.in_dygraph_mode(): + super(ErnieModel, self).eval() + self.training = False + for l in self.sublayers(): + l.training = False + + def train(self): + if F.in_dygraph_mode(): + super(ErnieModel, self).train() + self.training = True + for l in self.sublayers(): + l.training = True + + def forward(self, + src_ids, + sent_ids=None, + pos_ids=None, + input_mask=None, + attn_bias=None, + past_cache=None, + use_causal_mask=False): + """ + Args: + src_ids (`Variable` of shape `[batch_size, seq_len]`): + Indices of input sequence tokens in the vocabulary. + sent_ids (optional, `Variable` of shape `[batch_size, seq_len]`): + aka token_type_ids, Segment token indices to indicate first and second portions of the inputs. + if None, assume all tokens come from `segment_a` + pos_ids(optional, `Variable` of shape `[batch_size, seq_len]`): + Indices of positions of each input sequence tokens in the position embeddings. + input_mask(optional `Variable` of shape `[batch_size, seq_len]`): + Mask to avoid performing attention on the padding token indices of the encoder input. + attn_bias(optional, `Variable` of shape `[batch_size, seq_len, seq_len] or False`): + 3D version of `input_mask`, if set, overrides `input_mask`; if set not False, will not apply attention mask + past_cache(optional, tuple of two lists: cached key and cached value, + each is a list of `Variable`s of shape `[batch_size, seq_len, hidden_size]`): + cached key/value tensor that will be concated to generated key/value when performing self attention. + if set, `attn_bias` should not be None. + + Returns: + pooled (`Variable` of shape `[batch_size, hidden_size]`): + output logits of pooler classifier + encoded(`Variable` of shape `[batch_size, seq_len, hidden_size]`): + output logits of transformer stack + """ + assert len(src_ids.shape) == 2, 'expect src_ids.shape = [batch, sequecen], got %s' % (repr(src_ids.shape)) + assert attn_bias is not None if past_cache else True, 'if `past_cache` is specified; attn_bias should not be None' + d_batch = L.shape(src_ids)[0] + d_seqlen = L.shape(src_ids)[1] + if pos_ids is None: + pos_ids = L.reshape(L.range(0, d_seqlen, 1, dtype='int32'), [1, -1]) + pos_ids = L.cast(pos_ids, 'int64') + if attn_bias is None: + if input_mask is None: + input_mask = L.cast(src_ids != 0, 'float32') + assert len(input_mask.shape) == 2 + input_mask = L.unsqueeze(input_mask, axes=[-1]) + attn_bias = L.matmul(input_mask, input_mask, transpose_y=True) + if use_causal_mask: + sequence = L.reshape(L.range(0, d_seqlen, 1, dtype='float32') + 1., [1, 1, -1, 1]) + causal_mask = L.cast((L.matmul(sequence, 1. / sequence, transpose_y=True) >= 1.), 'float32') + attn_bias *= causal_mask + else: + assert len(attn_bias.shape) == 3, 'expect attn_bias tobe rank 3, got %r' % attn_bias.shape + attn_bias = (1. - attn_bias) * -10000.0 + attn_bias = L.unsqueeze(attn_bias, [1]) + attn_bias = L.expand(attn_bias, [1, self.n_head, 1, 1]) # avoid broadcast =_= + attn_bias.stop_gradient = True + + if sent_ids is None: + sent_ids = L.zeros_like(src_ids) + + src_embedded = self.word_emb(src_ids) + pos_embedded = self.pos_emb(pos_ids) + sent_embedded = self.sent_emb(sent_ids) + embedded = src_embedded + pos_embedded + sent_embedded + + embedded = self.dropout(self.ln(embedded)) + + encoded, hidden_list, cache_list = self.encoder_stack(embedded, attn_bias, past_cache=past_cache) + if self.pooler is not None: + pooled = self.pooler(encoded[:, 0, :]) + else: + pooled = None + + additional_info = { + 'hiddens': hidden_list, + 'caches': cache_list, + } + + if self.return_additional_info: + return pooled, encoded, additional_info + else: + return pooled, encoded diff --git a/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/modeling_ernie_gen.py b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/modeling_ernie_gen.py new file mode 100644 index 00000000..bc3d783d --- /dev/null +++ b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/modeling_ernie_gen.py @@ -0,0 +1,65 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.fluid as F +import paddle.fluid.layers as L + +from .modeling_ernie import ErnieModel +from .modeling_ernie import _build_linear, _build_ln, append_name + + +class ErnieModelForGeneration(ErnieModel): + def __init__(self, cfg, name=None): + cfg['return_additional_info'] = True + cfg['has_pooler'] = False + super(ErnieModelForGeneration, self).__init__(cfg, name=name) + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + d_model = cfg['hidden_size'] + d_vocab = cfg['vocab_size'] + + self.mlm = _build_linear( + d_model, d_model, append_name(name, 'mask_lm_trans_fc'), initializer, act=cfg['hidden_act']) + self.mlm_ln = _build_ln(d_model, name=append_name(name, 'mask_lm_trans')) + self.mlm_bias = L.create_parameter( + dtype='float32', + shape=[d_vocab], + attr=F.ParamAttr( + name=append_name(name, 'mask_lm_out_fc.b_0'), initializer=F.initializer.Constant(value=0.0)), + is_bias=True, + ) + + def forward(self, src_ids, *args, **kwargs): + tgt_labels = kwargs.pop('tgt_labels', None) + tgt_pos = kwargs.pop('tgt_pos', None) + encode_only = kwargs.pop('encode_only', False) + _, encoded, info = ErnieModel.forward(self, src_ids, *args, **kwargs) + if encode_only: + return None, None, info + elif tgt_labels is None: + encoded = self.mlm(encoded) + encoded = self.mlm_ln(encoded) + logits = L.matmul(encoded, self.word_emb.weight, transpose_y=True) + self.mlm_bias + output_ids = L.argmax(logits, -1) + return output_ids, logits, info + else: + encoded_2d = L.gather_nd(encoded, tgt_pos) + encoded_2d = self.mlm(encoded_2d) + encoded_2d = self.mlm_ln(encoded_2d) + logits_2d = L.matmul(encoded_2d, self.word_emb.weight, transpose_y=True) + self.mlm_bias + if len(tgt_labels.shape) == 1: + tgt_labels = L.reshape(tgt_labels, [-1, 1]) + + loss = L.reduce_mean( + L.softmax_with_cross_entropy(logits_2d, tgt_labels, soft_label=(tgt_labels.shape[-1] != 1))) + return loss, logits_2d, info diff --git a/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/tokenizing_ernie.py b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/tokenizing_ernie.py new file mode 100644 index 00000000..c9e5638f --- /dev/null +++ b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/tokenizing_ernie.py @@ -0,0 +1,163 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import six +import re +import logging +from functools import partial + +import numpy as np + +import io + +open = partial(io.open, encoding='utf8') + +log = logging.getLogger(__name__) + +_max_input_chars_per_word = 100 + + +def _wordpiece(token, vocab, unk_token, prefix='##', sentencepiece_prefix=''): + """ wordpiece: helloworld => [hello, ##world] """ + chars = list(token) + if len(chars) > _max_input_chars_per_word: + return [unk_token], [(0, len(chars))] + + is_bad = False + start = 0 + sub_tokens = [] + sub_pos = [] + while start < len(chars): + end = len(chars) + cur_substr = None + while start < end: + substr = "".join(chars[start:end]) + if start == 0: + substr = sentencepiece_prefix + substr + if start > 0: + substr = prefix + substr + if substr in vocab: + cur_substr = substr + break + end -= 1 + if cur_substr is None: + is_bad = True + break + sub_tokens.append(cur_substr) + sub_pos.append((start, end)) + start = end + if is_bad: + return [unk_token], [(0, len(chars))] + else: + return sub_tokens, sub_pos + + +class ErnieTokenizer(object): + def __init__(self, + vocab, + unk_token='[UNK]', + sep_token='[SEP]', + cls_token='[CLS]', + pad_token='[PAD]', + mask_token='[MASK]', + wordpiece_prefix='##', + sentencepiece_prefix='', + lower=True, + encoding='utf8', + special_token_list=[]): + if not isinstance(vocab, dict): + raise ValueError('expect `vocab` to be instance of dict, got %s' % type(vocab)) + self.vocab = vocab + self.lower = lower + self.prefix = wordpiece_prefix + self.sentencepiece_prefix = sentencepiece_prefix + self.pad_id = self.vocab[pad_token] + self.cls_id = cls_token and self.vocab[cls_token] + self.sep_id = sep_token and self.vocab[sep_token] + self.unk_id = unk_token and self.vocab[unk_token] + self.mask_id = mask_token and self.vocab[mask_token] + self.unk_token = unk_token + special_tokens = {pad_token, cls_token, sep_token, unk_token, mask_token} | set(special_token_list) + pat_str = '' + for t in special_tokens: + if t is None: + continue + pat_str += '(%s)|' % re.escape(t) + pat_str += r'([a-zA-Z0-9]+|\S)' + log.debug('regex: %s' % pat_str) + self.pat = re.compile(pat_str) + self.encoding = encoding + + def tokenize(self, text): + if len(text) == 0: + return [] + if six.PY3 and not isinstance(text, six.string_types): + text = text.decode(self.encoding) + if six.PY2 and isinstance(text, str): + text = text.decode(self.encoding) + + res = [] + for match in self.pat.finditer(text): + match_group = match.group(0) + if match.groups()[-1]: + if self.lower: + match_group = match_group.lower() + words, _ = _wordpiece( + match_group, + vocab=self.vocab, + unk_token=self.unk_token, + prefix=self.prefix, + sentencepiece_prefix=self.sentencepiece_prefix) + else: + words = [match_group] + res += words + return res + + def convert_tokens_to_ids(self, tokens): + return [self.vocab.get(t, self.unk_id) for t in tokens] + + def truncate(self, id1, id2, seqlen): + len1 = len(id1) + len2 = len(id2) + half = seqlen // 2 + if len1 > len2: + len1_truncated, len2_truncated = max(half, seqlen - len2), min(half, len2) + else: + len1_truncated, len2_truncated = min(half, seqlen - len1), max(half, seqlen - len1) + return id1[:len1_truncated], id2[:len2_truncated] + + def build_for_ernie(self, text_id, pair_id=[]): + """build sentence type id, add [CLS] [SEP]""" + text_id_type = np.zeros_like(text_id, dtype=np.int64) + ret_id = np.concatenate([[self.cls_id], text_id, [self.sep_id]], 0) + ret_id_type = np.concatenate([[0], text_id_type, [0]], 0) + + if len(pair_id): + pair_id_type = np.ones_like(pair_id, dtype=np.int64) + ret_id = np.concatenate([ret_id, pair_id, [self.sep_id]], 0) + ret_id_type = np.concatenate([ret_id_type, pair_id_type, [1]], 0) + return ret_id, ret_id_type + + def encode(self, text, pair=None, truncate_to=None): + text_id = np.array(self.convert_tokens_to_ids(self.tokenize(text)), dtype=np.int64) + text_id_type = np.zeros_like(text_id, dtype=np.int64) + if pair is not None: + pair_id = np.array(self.convert_tokens_to_ids(self.tokenize(pair)), dtype=np.int64) + else: + pair_id = [] + if truncate_to is not None: + text_id, pair_id = self.truncate(text_id, [] if pair_id is None else pair_id, truncate_to) + + ret_id, ret_id_type = self.build_for_ernie(text_id, pair_id) + return ret_id, ret_id_type diff --git a/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/module.py b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/module.py new file mode 100644 index 00000000..e532c545 --- /dev/null +++ b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/module.py @@ -0,0 +1,162 @@ +# coding:utf-8 +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import ast +import json + +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.module.module import runnable +from paddlehub.module.nlp_module import DataFormatError +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo, serving + +import argparse +import os +import numpy as np + +import paddle.fluid.dygraph as D + +from reading_pictures_writing_poems_for_midautumn.MidAutumnPoetry.model.tokenizing_ernie import ErnieTokenizer +from reading_pictures_writing_poems_for_midautumn.MidAutumnPoetry.model.decode import beam_search_infilling +from reading_pictures_writing_poems_for_midautumn.MidAutumnPoetry.model.modeling_ernie_gen import ErnieModelForGeneration + + +@moduleinfo( + name="MidAutumnPoetry", + version="1.0.0", + summary="", + author="郑博培,彭兆帅", + author_email="2733821739@qq.com,1084667371@qq.com", + type="nlp/text_generation", +) +class ErnieGen(hub.NLPPredictionModule): + def _initialize(self): + """ + initialize with the necessary elements + """ + assets_path = os.path.join(self.directory, "assets") + gen_checkpoint_path = os.path.join(assets_path, "ernie_gen") + ernie_cfg_path = os.path.join(assets_path, 'ernie_config.json') + with open(ernie_cfg_path, encoding='utf8') as ernie_cfg_file: + ernie_cfg = dict(json.loads(ernie_cfg_file.read())) + ernie_vocab_path = os.path.join(assets_path, 'vocab.txt') + with open(ernie_vocab_path, encoding='utf8') as ernie_vocab_file: + ernie_vocab = {j.strip().split('\t')[0]: i for i, j in enumerate(ernie_vocab_file.readlines())} + + with fluid.dygraph.guard(fluid.CPUPlace()): + with fluid.unique_name.guard(): + self.model = ErnieModelForGeneration(ernie_cfg) + finetuned_states, _ = D.load_dygraph(gen_checkpoint_path) + self.model.set_dict(finetuned_states) + + self.tokenizer = ErnieTokenizer(ernie_vocab) + self.rev_dict = {v: k for k, v in self.tokenizer.vocab.items()} + self.rev_dict[self.tokenizer.pad_id] = '' # replace [PAD] + self.rev_dict[self.tokenizer.unk_id] = '' # replace [PAD] + self.rev_lookup = np.vectorize(lambda i: self.rev_dict[i]) + + @serving + def generate(self, texts, use_gpu=False, beam_width=5): + """ + Get the predict result from the input texts. + + Args: + texts(list): the input texts. + use_gpu(bool): whether use gpu to predict or not + beam_width(int): the beam search width. + + Returns: + results(list): the predict result. + """ + if texts and isinstance(texts, list) and all(texts) and all([isinstance(text, str) for text in texts]): + predicted_data = texts + else: + raise ValueError("The input texts should be a list with nonempty string elements.") + + if use_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ: + use_gpu = False + logger.warning( + "use_gpu has been set False as you didn't set the environment variable CUDA_VISIBLE_DEVICES while using use_gpu=True" + ) + if use_gpu: + place = fluid.CUDAPlace(0) + else: + place = fluid.CPUPlace() + + with fluid.dygraph.guard(place): + self.model.eval() + results = [] + for text in predicted_data: + sample_results = [] + ids, sids = self.tokenizer.encode(text) + src_ids = D.to_variable(np.expand_dims(ids, 0)) + src_sids = D.to_variable(np.expand_dims(sids, 0)) + output_ids = beam_search_infilling( + self.model, + src_ids, + src_sids, + eos_id=self.tokenizer.sep_id, + sos_id=self.tokenizer.cls_id, + attn_id=self.tokenizer.vocab['[MASK]'], + max_decode_len=50, + max_encode_len=50, + beam_width=beam_width, + tgt_type_id=1) + output_str = self.rev_lookup(output_ids[0].numpy()) + + for ostr in output_str.tolist(): + if '[SEP]' in ostr: + ostr = ostr[:ostr.index('[SEP]')] + sample_results.append("".join(ostr)) + results.append(sample_results) + return results + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU for prediction") + + self.arg_config_group.add_argument('--beam_width', type=int, default=5, help="the beam search width") + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description='Run the %s module.' % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, optional.") + + self.add_module_config_arg() + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + + try: + input_data = self.check_input_data(args) + except DataFormatError and RuntimeError: + self.parser.print_help() + return None + + results = self.generate(texts=input_data, use_gpu=args.use_gpu, beam_width=args.beam_width) + + return results diff --git a/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/__init__.py b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/module.py b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/module.py new file mode 100644 index 00000000..84958fd4 --- /dev/null +++ b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/module.py @@ -0,0 +1,121 @@ +import argparse +import ast +import os +import math +import six + +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import runnable, serving, moduleinfo +from paddlehub.io.parser import txt_parser +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from translate import Translator +import reading_pictures_writing_poems_for_midautumn.MidAutumnDetection.module as MidAutumnDetection +import reading_pictures_writing_poems_for_midautumn.MidAutumnPoetry.module as MidAutumnPoetry + + +@moduleinfo( + name="reading_pictures_writing_poems_for_midautumn", + version="1.0.0", + summary="Reading Pictures And Writing Poems For MidAutumn", + author="郑博培,彭兆帅", + author_email="2733821739@qq.com,1084667371@qq.com", + type="nlp/text_generation") +class ReadingPicturesWritingPoems(hub.Module): + def _initialize(self): + """ + Initialize with the necessary elements + """ + self.pretrained_model_path = os.path.join(self.directory, "assets", "infer_model") + self.module_image = MidAutumnDetection.MODULE( + directory="reading_pictures_writing_poems_for_midautumn/MidAutumnDetection") # 调用目标检测的模型 + self.module_similar = MidAutumnPoetry.ErnieGen( + directory='reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry') # 调用根据关键词生成古诗上阕的模型 + self.module_poem = hub.Module(name="ernie_gen_poetry") # 调用古诗生成的模型 + + def WritingPoem(self, images, use_gpu=False): + # 目标检测,输入图片,输入得分最高的标签 + results_image = self.module_image.predict(images=images) + best = {'score': 0, 'category': 'none'} + for item in results_image: + for items in item: + if (items['score'] > best['score']): + best['score'], best['category'] = items['score'], items['category'] + if best['category'] == 'MoonCake': + objects = ['月饼'] + elif best['category'] == 'moon': + objects = ['月亮'] + elif best['category'] == 'lantern': + objects = ['灯笼'] + elif best['category'] == 'rabbit': + objects = ['兔子'] + else: + objects = ['中秋节'] + # 根据关键词生成古诗上阕 + FirstPoetrys = self.module_similar.generate(texts=objects, use_gpu=True, beam_width=5) + FirstPoetry = [FirstPoetrys[0][0]] + # 调用古诗生成模型,使用上阕生成下阕 + SecondPoetry = self.module_poem.generate(texts=FirstPoetry, use_gpu=True, beam_width=5) + Poetrys = [] + Poetrys.append(FirstPoetry[0]) + Poetrys.append(SecondPoetry[0][0]) + results = [{'images': images, 'Poetrys': "{}".format(Poetrys[0] + Poetrys[1])}] + + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description='Run the %s module.' % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + + self.add_module_config_arg() + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + + try: + input_data = self.check_input_data(args) + except RuntimeError: + self.parser.print_help() + return None + + results = self.WritingPoem(input_data) + + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU for prediction") + + def add_module_input_arg(self): + """ + Add the command input options + """ + self.arg_input_group.add_argument('--input_image', type=str, default=None, help="Pictures to write poetry") + + def check_input_data(self, args): + input_data = [] + if args.input_image: + if not os.path.exists(args.input_image): + raise RuntimeError("File %s is not exist." % args.input_image) + else: + input_data = args.input_image + + if input_data == []: + raise RuntimeError("The input data is inconsistent with expectations.") + + return input_data -- GitLab