From 12267027796103c53e869936fa24290615abbc9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=83=E5=B9=B4=E6=9C=9F=E9=99=90?= <50902619+1084667371@users.noreply.github.com> Date: Wed, 27 Jan 2021 16:19:57 +0800 Subject: [PATCH] =?UTF-8?q?=E8=AF=B7=E6=95=99=E6=9D=A1=E5=92=8C=E8=B0=A3?= =?UTF-8?q?=E8=A8=80=E6=A3=80=E6=B5=8B=20(#1211)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 微博谣言预测 * 请教条 * Update module.py * Update module.py * Update module.py * Update README.md * Update README.md --- .../Rumor_prediction/README.md | 50 +++ .../text_generation/Rumor_prediction/dict.txt | 1 + .../Rumor_prediction/module.py | 152 +++++++ .../text_generation/ernie_gen_leave/README.md | 52 +++ .../ernie_gen_leave/model/decode.py | 305 ++++++++++++++ .../ernie_gen_leave/model/file_utils.py | 49 +++ .../ernie_gen_leave/model/modeling_ernie.py | 379 ++++++++++++++++++ .../model/modeling_ernie_gen.py | 78 ++++ .../ernie_gen_leave/model/tokenizing_ernie.py | 171 ++++++++ .../text_generation/ernie_gen_leave/module.py | 177 ++++++++ .../text_generation/ernie_gen_leave/test.py | 8 + 11 files changed, 1422 insertions(+) create mode 100644 modules/text/text_generation/Rumor_prediction/README.md create mode 100644 modules/text/text_generation/Rumor_prediction/dict.txt create mode 100644 modules/text/text_generation/Rumor_prediction/module.py create mode 100644 modules/text/text_generation/ernie_gen_leave/README.md create mode 100644 modules/text/text_generation/ernie_gen_leave/model/decode.py create mode 100644 modules/text/text_generation/ernie_gen_leave/model/file_utils.py create mode 100644 modules/text/text_generation/ernie_gen_leave/model/modeling_ernie.py create mode 100644 modules/text/text_generation/ernie_gen_leave/model/modeling_ernie_gen.py create mode 100644 modules/text/text_generation/ernie_gen_leave/model/tokenizing_ernie.py create mode 100644 modules/text/text_generation/ernie_gen_leave/module.py create mode 100644 modules/text/text_generation/ernie_gen_leave/test.py diff --git a/modules/text/text_generation/Rumor_prediction/README.md b/modules/text/text_generation/Rumor_prediction/README.md new file mode 100644 index 00000000..96ed782a --- /dev/null +++ b/modules/text/text_generation/Rumor_prediction/README.md @@ -0,0 +1,50 @@ +## 概述 + + +Rumor_prediction是预测语句是否为谣言的模型。 + +## 命令行预测 + +```shell +$ hub run Rumor_prediction --input_text='兴仁县今天抢小孩没抢走,把孩子母亲捅了一刀,看见这车的注意了,真事,车牌号辽HFM055!!!!!赶紧散播! 都别带孩子出去瞎转悠了 尤其别让老人自己带孩子出去 太危险了 注意了!!!!辽HFM055北京现代朗动,在各学校门口抢小孩!!!110已经 证实!!全市通缉!!' +``` + +## API + +```python +def Rumor(texts, use_gpu=False): +``` + +预测API,预测语句是否为谣言。 + +**参数** + +* texts (list\[str\]): 想要预测是否为谣言的语句; +* use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA\_VISIBLE\_DEVICES环境变量**; + +**返回** + +* results (list[dict]): 预测结果的列表,列表中每一个元素为 dict,各字段为: + + - content(str):输入文本内容 + - prediction(str):预测结果 + - probability(float):预测结果概率 + +**代码示例** + +```python +import paddlehub as hub + +module = hub.Module(name="Rumor_prediction") + +test_texts = ['兴仁县今天抢小孩没抢走,把孩子母亲捅了一刀,看见这车的注意了,真事,车牌号辽HFM055!!!!!赶紧散播! 都别带孩子出去瞎转悠了 尤其别让老人自己带孩子出去 太危险了 注意了!!!!辽HFM055北京现代朗动,在各学校门口抢小孩!!!110已经 证实!!全市通缉!!'] +results = module.Rumor(texts=test_texts, use_gpu=True) +print(results) +``` + + +### 依赖 + +paddlepaddle >= 2.0.0rc1 + +paddlehub >= 2.0.0rc0 diff --git a/modules/text/text_generation/Rumor_prediction/dict.txt b/modules/text/text_generation/Rumor_prediction/dict.txt new file mode 100644 index 00000000..b92f924f --- /dev/null +++ b/modules/text/text_generation/Rumor_prediction/dict.txt @@ -0,0 +1 @@ +{'寒': 0, '煲': 1, '升': 2, '耳': 3, '孜': 4, '矶': 5, '惑': 6, '谩': 7, '奉': 8, '坛': 9, '嘗': 10, '索': 11, '所': 12, '藏': 13, '阜': 14, '孟': 15, '久': 16, ')': 17, '散': 18, '真': 19, '肇': 20, '保': 21, '蜜': 22, '丁': 23, '玉': 24, '伏': 25, '次': 26, '隽': 27, '囯': 28, '浊': 29, '沥': 30, '豪': 31, '果': 32, '卢': 33, '夏': 34, '朦': 35, '墓': 36, '圖': 37, '躬': 38, '铃': 39, '浇': 40, '反': 41, '瑩': 42, '慕': 43, '練': 44, '抨': 45, '喃': 46, '滑': 47, '亇': 48, '紅': 49, '拙': 50, '侍': 51, '卤': 52, '摄': 53, '〗': 54, '谤': 55, '跟': 56, '⒑': 57, '备': 58, '躺': 59, '稳': 60, '九': 61, '歉': 62, '味': 63, '莎': 64, '黍': 65, '涎': 66, '想': 67, '鳍': 68, '籠': 69, '臨': 70, '纶': 71, '性': 72, '推': 73, '殉': 74, '平': 75, '倍': 76, '洽': 77, '浸': 78, '裔': 79, '鹤': 80, '破': 81, '軟': 82, '尚': 83, '肃': 84, '凱': 85, '呼': 86, '踊': 87, '编': 88, '輯': 89, '病': 90, '勤': 91, '婴': 92, '枯': 93, '邦': 94, '隨': 95, '級': 96, '〝': 97, '奸': 98, '愧': 99, '团': 100, '济': 101, '董': 102, '艺': 103, '赢': 104, '泄': 105, '蜂': 106, '东': 107, '荆': 108, '汶': 109, '痰': 110, '溅': 111, '湾': 112, '咚': 113, '異': 114, '省': 115, '互': 116, '亂': 117, '耙': 118, '棒': 119, '判': 120, '绘': 121, '呐': 122, '掷': 123, '匿': 124, '韵': 125, '低': 126, '演': 127, '做': 128, '榕': 129, '郡': 130, '明': 131, '吞': 132, '7': 133, '侣': 134, '曼': 135, '炭': 136, '淘': 137, '當': 138, '寨': 139, '餘': 140, '力': 141, '覽': 142, '坏': 143, '肩': 144, '宿': 145, '舟': 146, '嘉': 147, '妹': 148, '各': 149, '著': 150, '归': 151, '遗': 152, '表': 153, '勋': 154, '》': 155, '拦': 156, '瞬': 157, '運': 158, '挖': 159, '谊': 160, '乒': 161, '忽': 162, 'お': 163, '伞': 164, '粤': 165, '曾': 166, '糍': 167, '墨': 168, '设': 169, '滞': 170, '踩': 171, '沛': 172, '盗': 173, '尢': 174, '慌': 175, 'w': 176, '币': 177, 'O': 178, '份': 179, '晨': 180, '菌': 181, '药': 182, '颅': 183, '碍': 184, '桐': 185, '驱': 186, '险': 187, '焖': 188, '仕': 189, '牒': 190, '功': 191, '万': 192, '恼': 193, '囤': 194, '狐': 195, '诸': 196, '憨': 197, '戈': 198, '雀': 199, '筆': 200, '咆': 201, '郅': 202, '残': 203, '刷': 204, '茄': 205, '垄': 206, '眾': 207, '偿': 208, '求': 209, '0': 210, 'g': 211, '荩': 212, '帳': 213, '襲': 214, '庞': 215, '逅': 216, '杆': 217, '埃': 218, '俊': 219, '缺': 220, '爭': 221, '坨': 222, '秃': 223, '遐': 224, '是': 225, '玮': 226, '邀': 227, '监': 228, '呢': 229, '曦': 230, '紹': 231, '惋': 232, '揣': 233, '铺': 234, '篇': 235, '獨': 236, '哀': 237, '趣': 238, '咩': 239, '澳': 240, '坪': 241, '冰': 242, '婶': 243, '烟': 244, '像': 245, '👍': 246, '庸': 247, '舞': 248, '父': 249, '\ue415': 250, '貨': 251, '稠': 252, '锣': 253, '憶': 254, '鹅': 255, '苕': 256, '宋': 257, '机': 258, '.': 259, '危': 260, '鳝': 261, '御': 262, '隶': 263, '锥': 264, '失': 265, '第': 266, '座': 267, '★': 268, '宥': 269, '鞭': 270, '才': 271, '弃': 272, '憬': 273, '帝': 274, '\ue021': 275, '睡': 276, '凿': 277, '瀟': 278, '帥': 279, '渢': 280, '说': 281, '疚': 282, '墀': 283, '榨': 284, '哑': 285, '吼': 286, '意': 287, '드': 288, '–': 289, '耍': 290, '劝': 291, '話': 292, '親': 293, '桩': 294, "'": 295, '酚': 296, '干': 297, '国': 298, '歼': 299, '蕴': 300, '酿': 301, '叠': 302, '派': 303, '嬛': 304, '韩': 305, '宫': 306, '仁': 307, '臭': 308, '牌': 309, '說': 310, '棕': 311, '舍': 312, '伊': 313, '卿': 314, '抱': 315, '蔚': 316, '遛': 317, '/': 318, '腰': 319, '違': 320, '纱': 321, '溯': 322, '\u2029': 323, '怯': 324, '哎': 325, '曝': 326, '终': 327, '丨': 328, '逺': 329, '哩': 330, '警': 331, '捷': 332, '宙': 333, '峻': 334, '原': 335, '觀': 336, '蓋': 337, '竹': 338, '戴': 339, '聽': 340, '桓': 341, '沫': 342, '忐': 343, '杰': 344, '执': 345, '利': 346, '帽': 347, '嗷': 348, '枳': 349, '沪': 350, '率': 351, '雾': 352, '嚣': 353, '啸': 354, '乎': 355, '饮': 356, '独': 357, '添': 358, '走': 359, '涉': 360, '怪': 361, '羔': 362, '巾': 363, '盼': 364, '繁': 365, '呦': 366, '舌': 367, '斐': 368, '使': 369, '坐': 370, '依': 371, '啊': 372, '电': 373, '幺': 374, '沿': 375, '內': 376, '汪': 377, '称': 378, '妈': 379, '宏': 380, '柜': 381, '盲': 382, '蹒': 383, '開': 384, '稼': 385, '诈': 386, '瞰': 387, 'ㅋ': 388, '∩': 389, '嫉': 390, '泮': 391, '起': 392, '资': 393, '仍': 394, '憎': 395, '美': 396, '。': 397, '傈': 398, '裴': 399, '棺': 400, '弱': 401, '匪': 402, '箱': 403, '相': 404, '更': 405, '没': 406, '聚': 407, '跨': 408, '訴': 409, '龙': 410, '施': 411, '厌': 412, '梓': 413, '莺': 414, '阶': 415, '棋': 416, '专': 417, '挤': 418, '禮': 419, 'る': 420, '\ue10c': 421, '巡': 422, '遥': 423, '日': 424, '岗': 425, '勝': 426, '殡': 427, '痴': 428, '措': 429, '狸': 430, '#': 431, '歷': 432, '趁': 433, '殆': 434, '只': 435, '鼓': 436, '亞': 437, ' ': 438, '流': 439, '悲': 440, '噬': 441, '裤': 442, '拐': 443, '😠': 444, '狂': 445, '山': 446, '镇': 447, '稍': 448, '染': 449, '-': 450, '瑾': 451, '账': 452, 'l': 453, '誌': 454, '赡': 455, '지': 456, 'キ': 457, '谅': 458, '聘': 459, '绎': 460, '词': 461, '血': 462, '墙': 463, '℃': 464, '嫖': 465, '尺': 466, '活': 467, '脍': 468, '担': 469, '男': 470, '掉': 471, '咒': 472, '吸': 473, '痞': 474, '根': 475, '晏': 476, '仨': 477, '急': 478, '怠': 479, '履': 480, '洼': 481, '唾': 482, '懷': 483, '妆': 484, '单': 485, '肾': 486, '奧': 487, '薪': 488, '皂': 489, '参': 490, '朔': 491, '甲': 492, '钉': 493, '雖': 494, '希': 495, '冬': 496, '摩': 497, '谎': 498, '铂': 499, '蹄': 500, '壮': 501, '纺': 502, '岛': 503, '伴': 504, '贱': 505, '柯': 506, '拒': 507, '鲑': 508, '童': 509, '怡': 510, '績': 511, 'で': 512, '邻': 513, '班': 514, '藉': 515, '锐': 516, '鄙': 517, '蛰': 518, '告': 519, '⒒': 520, '浙': 521, '近': 522, '屈': 523, '喝': 524, '呛': 525, '痛': 526, '甚': 527, '铜': 528, '巅': 529, '盾': 530, '爵': 531, '段': 532, '貓': 533, '紀': 534, '臂': 535, '載': 536, '扁': 537, '😜': 538, '焚': 539, '厕': 540, '︰': 541, '谭': 542, '粱': 543, '殒': 544, '睐': 545, '夫': 546, '淞': 547, '骚': 548, '凳': 549, '洪': 550, '碎': 551, 'C': 552, '全': 553, '以': 554, '霉': 555, '放': 556, '觅': 557, '磕': 558, '励': 559, '搜': 560, '膊': 561, '畫': 562, '熊': 563, '罐': 564, '闸': 565, '歆': 566, '虹': 567, '估': 568, '落': 569, '經': 570, '拼': 571, '挺': 572, '糙': 573, '鉴': 574, '豁': 575, '捆': 576, '比': 577, '濛': 578, '初': 579, '属': 580, '寫': 581, '候': 582, '參': 583, '碳': 584, '哟': 585, '姜': 586, '垢': 587, '券': 588, '慑': 589, '点': 590, '己': 591, '霞': 592, '纸': 593, '哥': 594, '赎': 595, '妞': 596, '勲': 597, '刁': 598, '胃': 599, '韭': 600, '註': 601, '詐': 602, '燮': 603, '群': 604, '庙': 605, '來': 606, '仗': 607, '9': 608, '探': 609, '蝶': 610, '傅': 611, '徽': 612, '缤': 613, '^': 614, '堡': 615, '赏': 616, '蛆': 617, '烩': 618, '準': 619, '朵': 620, '吃': 621, '嘴': 622, '典': 623, '端': 624, '連': 625, '趟': 626, '欲': 627, '『': 628, '馒': 629, '神': 630, '拯': 631, '芸': 632, '防': 633, '竣': 634, '时': 635, '輕': 636, '却': 637, '泳': 638, '陡': 639, '冒': 640, '💖': 641, '托': 642, '鹫': 643, '姊': 644, '嘲': 645, '枸': 646, '总': 647, '绿': 648, '症': 649, '练': 650, '耕': 651, '野': 652, '强': 653, '匆': 654, '🙏': 655, '吶': 656, 'o': 657, '包': 658, '幣': 659, '央': 660, '惮': 661, '險': 662, '爬': 663, '猪': 664, '邯': 665, '妖': 666, '挣': 667, '世': 668, '登': 669, '女': 670, '佐': 671, '笙': 672, '×': 673, '你': 674, '肆': 675, '池': 676, '鳄': 677, '蒂': 678, '腕': 679, '囡': 680, '娅': 681, '°': 682, '徇': 683, '沱': 684, '恢': 685, '“': 686, 'I': 687, '恭': 688, '缝': 689, '肮': 690, '就': 691, '眶': 692, '席': 693, '據': 694, '剂': 695, '哄': 696, '谈': 697, '岔': 698, '瞒': 699, '坦': 700, '忑': 701, '赈': 702, '雷': 703, '辰': 704, 'e': 705, '荥': 706, '闯': 707, '純': 708, '揽': 709, '林': 710, '巴': 711, '逞': 712, '串': 713, '璨': 714, '聊': 715, '偌': 716, '斑': 717, '暄': 718, '计': 719, '会': 720, '琪': 721, '⒊': 722, '吹': 723, '碟': 724, '胚': 725, '陣': 726, '饭': 727, '🔴': 728, '友': 729, '招': 730, '扯': 731, '武': 732, '錄': 733, '後': 734, '敖': 735, '审': 736, '鸟': 737, '筑': 738, '稽': 739, '吵': 740, '制': 741, '俄': 742, '逮': 743, '毙': 744, '摘': 745, '巫': 746, '姣': 747, '從': 748, '瑰': 749, '闻': 750, '队': 751, '汲': 752, '听': 753, '邓': 754, '逆': 755, '隔': 756, '袒': 757, '芮': 758, '肺': 759, '汗': 760, '权': 761, '注': 762, '华': 763, '技': 764, '肓': 765, '”': 766, '愚': 767, '奠': 768, '呃': 769, '壹': 770, '搽': 771, '榜': 772, '莫': 773, '邮': 774, '狱': 775, '镑': 776, '雁': 777, '殊': 778, '貌': 779, '两': 780, '璃': 781, '关': 782, '吻': 783, '悉': 784, '惊': 785, '靴': 786, '手': 787, '姨': 788, '朴': 789, '修': 790, '谄': 791, '必': 792, '熱': 793, '煞': 794, '煜': 795, '廉': 796, '炅': 797, '照': 798, '睿': 799, 'う': 800, '呀': 801, '甜': 802, '珞': 803, '攬': 804, '简': 805, '牧': 806, '漳': 807, '狼': 808, '契': 809, '焉': 810, '糨': 811, '賤': 812, '庄': 813, '於': 814, '\u3000': 815, '慨': 816, '吧': 817, '交': 818, '赴': 819, '薰': 820, '磋': 821, '囗': 822, '诺': 823, '龜': 824, '孀': 825, '绝': 826, '旧': 827, '擀': 828, '録': 829, '秉': 830, '淋': 831, '料': 832, '碗': 833, '七': 834, '降': 835, '乾': 836, '叨': 837, '確': 838, '韧': 839, '廳': 840, '胖': 841, '階': 842, '肿': 843, '断': 844, '汹': 845, '伪': 846, '且': 847, '烧': 848, '銀': 849, '蚌': 850, '翼': 851, '纳': 852, '斌': 853, '侃': 854, '规': 855, '款': 856, '路': 857, '拧': 858, '别': 859, '协': 860, '矮': 861, '悬': 862, '場': 863, '•': 864, '寺': 865, '昨': 866, '尘': 867, '藕': 868, '能': 869, '講': 870, '蛮': 871, '곤': 872, '澡': 873, '炫': 874, '写': 875, '够': 876, '胞': 877, '藩': 878, '赦': 879, '鈞': 880, '〖': 881, '迁': 882, '灿': 883, '桦': 884, '瞎': 885, '戲': 886, '迦': 887, '楷': 888, '玄': 889, '哮': 890, '古': 891, 'N': 892, '配': 893, '弄': 894, '太': 895, '都': 896, '盯': 897, '邹': 898, '隻': 899, '🎯': 900, '靠': 901, '谱': 902, '任': 903, '应': 904, '約': 905, '攸': 906, '恨': 907, '邵': 908, '尿': 909, '岖': 910, '煮': 911, '柄': 912, '珀': 913, '还': 914, '削': 915, '輸': 916, '诿': 917, '秩': 918, '\xa0': 919, '喽': 920, '吳': 921, '説': 922, 'E': 923, '勃': 924, '紫': 925, '补': 926, '痨': 927, '卷': 928, '巢': 929, '拢': 930, '對': 931, '浮': 932, '期': 933, '兰': 934, '勁': 935, '死': 936, '传': 937, '備': 938, '篡': 939, '瓤': 940, '醇': 941, '錢': 942, '強': 943, '狰': 944, '蛀': 945, '健': 946, '键': 947, '圳': 948, '丧': 949, '拳': 950, '沈': 951, '捉': 952, '浆': 953, '金': 954, '品': 955, '悚': 956, '佈': 957, '愫': 958, '株': 959, '陀': 960, '廣': 961, '斤': 962, '烛': 963, '连': 964, '癌': 965, '晤': 966, '诛': 967, '倫': 968, '→': 969, '梧': 970, '瀬': 971, '蜗': 972, '刨': 973, '叮': 974, '戰': 975, '界': 976, '婷': 977, '拷': 978, '飙': 979, '绷': 980, '开': 981, '還': 982, '蚝': 983, '暗': 984, '焦': 985, '右': 986, '<': 987, '脑': 988, '攀': 989, '蹋': 990, '源': 991, '热': 992, '引': 993, '圓': 994, '咂': 995, '乌': 996, '塚': 997, '银': 998, '館': 999, '范': 1000, '乍': 1001, '均': 1002, '圣': 1003, '舱': 1004, '凑': 1005, '青': 1006, '寂': 1007, '馅': 1008, '惫': 1009, '😂': 1010, '曰': 1011, '戮': 1012, '砸': 1013, '逐': 1014, '⚠': 1015, '奚': 1016, '榄': 1017, '屉': 1018, '炮': 1019, '統': 1020, '樟': 1021, '谙': 1022, '肉': 1023, '蝴': 1024, '4': 1025, '栽': 1026, '葡': 1027, '诞': 1028, '嚏': 1029, '无': 1030, '沢': 1031, '夸': 1032, '娆': 1033, '限': 1034, '跷': 1035, '样': 1036, '势': 1037, '虫': 1038, '频': 1039, '裙': 1040, '糗': 1041, '涵': 1042, '禽': 1043, '終': 1044, '搏': 1045, '勇': 1046, '秦': 1047, 'θ': 1048, '#': 1049, '&': 1050, '抠': 1051, '磅': 1052, '垃': 1053, '耀': 1054, '律': 1055, '适': 1056, '究': 1057, '杂': 1058, '堵': 1059, '迷': 1060, '钻': 1061, '缆': 1062, '职': 1063, '共': 1064, '濃': 1065, '滋': 1066, '張': 1067, '剔': 1068, '层': 1069, '媽': 1070, '恕': 1071, '细': 1072, '體': 1073, '麒': 1074, '刊': 1075, '俏': 1076, '傻': 1077, '莱': 1078, '策': 1079, '浓': 1080, '离': 1081, '鸭': 1082, 'c': 1083, '釜': 1084, '蛩': 1085, '本': 1086, '龄': 1087, '忌': 1088, '载': 1089, '訪': 1090, '泥': 1091, '朽': 1092, '叶': 1093, '字': 1094, '盐': 1095, '争': 1096, '尹': 1097, '扣': 1098, '场': 1099, '螺': 1100, '文': 1101, '挨': 1102, '炎': 1103, '竿': 1104, '恃': 1105, '贡': 1106, '堰': 1107, '栖': 1108, '捏': 1109, '≪': 1110, '腊': 1111, '杖': 1112, '肚': 1113, '幾': 1114, '<': 1115, '饥': 1116, '醒': 1117, '掼': 1118, '束': 1119, '再': 1120, '叫': 1121, '湯': 1122, '扇': 1123, '緯': 1124, '亊': 1125, '撤': 1126, '5': 1127, '室': 1128, '離': 1129, '严': 1130, '压': 1131, '霖': 1132, '魅': 1133, '改': 1134, '樽': 1135, '腥': 1136, '歲': 1137, '谜': 1138, '優': 1139, '矩': 1140, '顏': 1141, '喔': 1142, '旁': 1143, '聂': 1144, '缓': 1145, '勾': 1146, '寄': 1147, '棠': 1148, '纹': 1149, '轿': 1150, '触': 1151, '先': 1152, '投': 1153, '⒍': 1154, '傑': 1155, '鹰': 1156, '趴': 1157, '霜': 1158, '酬': 1159, '⒔': 1160, '拎': 1161, '澜': 1162, '盎': 1163, '蚁': 1164, '南': 1165, '焱': 1166, '飏': 1167, '讯': 1168, '胡': 1169, '谦': 1170, '篪': 1171, '按': 1172, '恵': 1173, '辽': 1174, '寓': 1175, '祷': 1176, '峯': 1177, '档': 1178, '尸': 1179, '‘': 1180, '牛': 1181, '遨': 1182, '匣': 1183, '拭': 1184, '赶': 1185, '润': 1186, '捧': 1187, '薦': 1188, '桢': 1189, '踮': 1190, '祈': 1191, '洞': 1192, '疱': 1193, '杞': 1194, '侬': 1195, '则': 1196, '圭': 1197, '痔': 1198, '认': 1199, '泡': 1200, '宪': 1201, '抉': 1202, '衙': 1203, '欧': 1204, '擁': 1205, '哈': 1206, '砣': 1207, '膳': 1208, '科': 1209, '睬': 1210, '買': 1211, '藥': 1212, '缠': 1213, '永': 1214, '啲': 1215, '我': 1216, '捞': 1217, '杏': 1218, '敬': 1219, '持': 1220, '牺': 1221, '陂': 1222, '辛': 1223, '慧': 1224, '傳': 1225, '汽': 1226, '雉': 1227, '饪': 1228, '打': 1229, '分': 1230, '姑': 1231, '竟': 1232, '娜': 1233, '筋': 1234, '殴': 1235, '乳': 1236, '朋': 1237, '负': 1238, '靓': 1239, '潮': 1240, '织': 1241, '洋': 1242, '揉': 1243, '象': 1244, '齊': 1245, '顺': 1246, '漉': 1247, '⒉': 1248, '挡': 1249, '冧': 1250, '咔': 1251, '角': 1252, '网': 1253, '遍': 1254, '尤': 1255, '茉': 1256, '搀': 1257, '\u200a': 1258, '豚': 1259, '绑': 1260, '绵': 1261, '實': 1262, '骇': 1263, '滩': 1264, '彼': 1265, '桔': 1266, '槟': 1267, '哆': 1268, '头': 1269, '旭': 1270, '芳': 1271, '喉': 1272, '又': 1273, '脏': 1274, '几': 1275, '羽': 1276, '鑫': 1277, '沧': 1278, '「': 1279, '净': 1280, '驰': 1281, '帘': 1282, '企': 1283, '绯': 1284, '啪': 1285, '献': 1286, '掌': 1287, '赫': 1288, '癫': 1289, '诉': 1290, '承': 1291, '列': 1292, '緣': 1293, '复': 1294, '天': 1295, '丈': 1296, '元': 1297, '货': 1298, '辱': 1299, '糕': 1300, '咽': 1301, '厥': 1302, '地': 1303, '伶': 1304, '谨': 1305, '魄': 1306, '識': 1307, '孕': 1308, '負': 1309, '存': 1310, '⑥': 1311, '宁': 1312, '闺': 1313, '个': 1314, '虏': 1315, '暖': 1316, '冤': 1317, '母': 1318, '组': 1319, '燃': 1320, '憋': 1321, '厨': 1322, '咸': 1323, '贿': 1324, '捶': 1325, '租': 1326, '毒': 1327, '炳': 1328, '熔': 1329, '澄': 1330, '抑': 1331, '領': 1332, '惭': 1333, '满': 1334, '菇': 1335, '另': 1336, '旋': 1337, '柏': 1338, '些': 1339, '质': 1340, '撇': 1341, '恰': 1342, '臣': 1343, '丛': 1344, '沇': 1345, '远': 1346, '烂': 1347, '债': 1348, '批': 1349, '菊': 1350, '夜': 1351, '锻': 1352, '嚓': 1353, '傍': 1354, '邡': 1355, '晓': 1356, '岸': 1357, '爱': 1358, '毕': 1359, '漓': 1360, '锡': 1361, '⒕': 1362, '访': 1363, '豆': 1364, '沾': 1365, '牢': 1366, '惠': 1367, '豹': 1368, '念': 1369, '唤': 1370, '扭': 1371, '網': 1372, '爷': 1373, '錯': 1374, '旅': 1375, '休': 1376, '桶': 1377, '疼': 1378, '📢': 1379, '铁': 1380, '叙': 1381, '楼': 1382, '辟': 1383, '搞': 1384, 'て': 1385, '台': 1386, '炽': 1387, '侯': 1388, '霓': 1389, '粹': 1390, '卦': 1391, '煎': 1392, '枪': 1393, '高': 1394, '叟': 1395, '巧': 1396, '桥': 1397, '跪': 1398, '萝': 1399, '唇': 1400, '苑': 1401, '旗': 1402, '渊': 1403, '葩': 1404, '晾': 1405, '伦': 1406, '受': 1407, '椒': 1408, '姚': 1409, '梗': 1410, '尬': 1411, '局': 1412, '庝': 1413, '兲': 1414, '竞': 1415, '被': 1416, '雞': 1417, '覺': 1418, '攪': 1419, '惘': 1420, '丘': 1421, '闷': 1422, '擦': 1423, '沟': 1424, '皮': 1425, '炼': 1426, '礦': 1427, '叹': 1428, '检': 1429, '陈': 1430, '胎': 1431, '👏': 1432, '甘': 1433, '颍': 1434, '萬': 1435, '部': 1436, '楚': 1437, '隋': 1438, '燈': 1439, '客': 1440, '⒓': 1441, '襟': 1442, '悠': 1443, '葫': 1444, '着': 1445, '徹': 1446, '撅': 1447, '弘': 1448, '琅': 1449, '怨': 1450, '+': 1451, '披': 1452, '筠': 1453, '习': 1454, '停': 1455, '翻': 1456, '寿': 1457, '寝': 1458, '维': 1459, '漏': 1460, '程': 1461, '向': 1462, '=': 1463, '拘': 1464, '乙': 1465, '將': 1466, '姥': 1467, '柳': 1468, '冯': 1469, '搖': 1470, '吠': 1471, '上': 1472, '蹈': 1473, 'M': 1474, '倔': 1475, '痤': 1476, '腺': 1477, '须': 1478, '秤': 1479, '姿': 1480, '逛': 1481, 'S': 1482, '窈': 1483, '彰': 1484, '黎': 1485, '帷': 1486, '+': 1487, '县': 1488, '釧': 1489, '觊': 1490, '扒': 1491, '幼': 1492, '崖': 1493, '多': 1494, '峡': 1495, '动': 1496, '溃': 1497, '翠': 1498, '液': 1499, '抗': 1500, '拋': 1501, '管': 1502, 'K': 1503, '睛': 1504, '案': 1505, '宅': 1506, '鲲': 1507, '扬': 1508, '折': 1509, '珍': 1510, '幫': 1511, '届': 1512, '節': 1513, '嚷': 1514, '問': 1515, '虞': 1516, '校': 1517, '造': 1518, '憧': 1519, '退': 1520, '祎': 1521, '溜': 1522, '役': 1523, '逼': 1524, '➊': 1525, '語': 1526, '超': 1527, '辜': 1528, '4': 1529, '奋': 1530, '虚': 1531, '卑': 1532, '袁': 1533, '\ue00e': 1534, '嘅': 1535, '骸': 1536, 'サ': 1537, '僳': 1538, '芦': 1539, '股': 1540, '舰': 1541, '奕': 1542, '撞': 1543, '癢': 1544, '膨': 1545, '攫': 1546, '伤': 1547, '枭': 1548, '诅': 1549, '哨': 1550, '荡': 1551, '膛': 1552, '爸': 1553, '沉': 1554, '悟': 1555, '蹦': 1556, '陳': 1557, '弯': 1558, '梨': 1559, '脉': 1560, '烈': 1561, '蘇': 1562, '肘': 1563, '确': 1564, '漆': 1565, '8': 1566, '钊': 1567, '获': 1568, '噱': 1569, '刺': 1570, '丽': 1571, '扩': 1572, '领': 1573, '潇': 1574, '即': 1575, '把': 1576, '撕': 1577, ',': 1578, '吟': 1579, '饨': 1580, '隘': 1581, 'i': 1582, '夠': 1583, '郝': 1584, '者': 1585, '渠': 1586, '淄': 1587, '嵌': 1588, '幻': 1589, '鸣': 1590, '兑': 1591, 'ャ': 1592, '脊': 1593, '和': 1594, '柒': 1595, '簿': 1596, '匀': 1597, '缩': 1598, '井': 1599, '隆': 1600, '龍': 1601, '寸': 1602, '浴': 1603, '将': 1604, '徙': 1605, '塔': 1606, '定': 1607, '營': 1608, '⒖': 1609, '評': 1610, '或': 1611, '鸡': 1612, '轉': 1613, '崩': 1614, '矢': 1615, '甄': 1616, '晒': 1617, '喵': 1618, '窦': 1619, '⒌': 1620, '環': 1621, '姗': 1622, '❤': 1623, '齿': 1624, '阱': 1625, '北': 1626, '抵': 1627, '眈': 1628, '舅': 1629, '伙': 1630, '陷': 1631, '剥': 1632, '淀': 1633, '恍': 1634, '蔥': 1635, '宛': 1636, '卻': 1637, '览': 1638, '應': 1639, '動': 1640, '顿': 1641, '义': 1642, '炜': 1643, '奖': 1644, '琍': 1645, '啬': 1646, '匡': 1647, '狄': 1648, '欢': 1649, '阖': 1650, '方': 1651, '↓': 1652, '劑': 1653, '占': 1654, '贬': 1655, '观': 1656, '弧': 1657, '口': 1658, '蘋': 1659, '封': 1660, '拽': 1661, '哇': 1662, '船': 1663, '畜': 1664, '洗': 1665, '嘟': 1666, '忡': 1667, '佑': 1668, '贞': 1669, '俩': 1670, '它': 1671, '埋': 1672, '/': 1673, '殺': 1674, '窘': 1675, '兹': 1676, '纬': 1677, '桑': 1678, '迭': 1679, '卖': 1680, '➋': 1681, '躲': 1682, '驻': 1683, '阀': 1684, '穎': 1685, '嗨': 1686, '簸': 1687, '腔': 1688, '🔲': 1689, '努': 1690, '剁': 1691, '擅': 1692, '欺': 1693, '⒐': 1694, '唔': 1695, '们': 1696, '逝': 1697, '斓': 1698, '积': 1699, '烨': 1700, 'R': 1701, '陸': 1702, '悔': 1703, '非': 1704, '耗': 1705, '园': 1706, '嘎': 1707, '蝎': 1708, '咙': 1709, '侨': 1710, '痘': 1711, '曹': 1712, '侥': 1713, '接': 1714, '咖': 1715, '9': 1716, '住': 1717, '玛': 1718, '鞠': 1719, '脾': 1720, '撼': 1721, '火': 1722, '剩': 1723, '牙': 1724, '酋': 1725, '韶': 1726, '目': 1727, '论': 1728, '环': 1729, '6': 1730, '祛': 1731, '喊': 1732, '娘': 1733, '抄': 1734, '构': 1735, '嗲': 1736, '缮': 1737, '贤': 1738, '遣': 1739, '竺': 1740, '缙': 1741, '雅': 1742, '摇': 1743, '间': 1744, '刀': 1745, '拍': 1746, '(': 1747, '庐': 1748, '胺': 1749, '携': 1750, '价': 1751, '合': 1752, '益': 1753, '溝': 1754, '電': 1755, '佢': 1756, '黑': 1757, '骗': 1758, '亿': 1759, '阉': 1760, '坼': 1761, '趋': 1762, '蕉': 1763, '侠': 1764, '昌': 1765, '素': 1766, '飯': 1767, '僧': 1768, '逻': 1769, '赌': 1770, '尊': 1771, '紋': 1772, '彬': 1773, '庆': 1774, '找': 1775, '讲': 1776, '…': 1777, '雇': 1778, '纪': 1779, 'J': 1780, '」': 1781, '杯': 1782, '獎': 1783, '吕': 1784, '皓': 1785, '沁': 1786, '椽': 1787, '出': 1788, '邱': 1789, '咗': 1790, '?': 1791, '充': 1792, '阳': 1793, '\ue141': 1794, '扶': 1795, '亢': 1796, '逃': 1797, '河': 1798, '治': 1799, '愿': 1800, '际': 1801, '图': 1802, '拔': 1803, '祸': 1804, '墟': 1805, '横': 1806, '啦': 1807, '炒': 1808, '首': 1809, '證': 1810, '丢': 1811, '芜': 1812, '少': 1813, '敞': 1814, '诫': 1815, '陆': 1816, '`': 1817, '旬': 1818, '刑': 1819, '行': 1820, '.': 1821, 'é': 1822, '删': 1823, '犬': 1824, '邪': 1825, '亨': 1826, '*': 1827, '巳': 1828, '虑': 1829, '灵': 1830, '箭': 1831, '倡': 1832, '隧': 1833, '懒': 1834, '疡': 1835, '已': 1836, '摔': 1837, '谋': 1838, '讼': 1839, '衡': 1840, '妥': 1841, '鞋': 1842, '区': 1843, '仲': 1844, '盘': 1845, '腚': 1846, '沒': 1847, '拌': 1848, '蒸': 1849, '侵': 1850, '迹': 1851, '守': 1852, '湿': 1853, '達': 1854, '骏': 1855, '萧': 1856, '硝': 1857, '麻': 1858, '颗': 1859, '柔': 1860, '昧': 1861, '堪': 1862, '晟': 1863, '衔': 1864, '杠': 1865, '啖': 1866, '戟': 1867, '睹': 1868, '异': 1869, 'h': 1870, '┭': 1871, '迢': 1872, '蕾': 1873, '怜': 1874, '缴': 1875, '印': 1876, '醫': 1877, '袍': 1878, '妊': 1879, '录': 1880, '嘈': 1881, '蕭': 1882, '闹': 1883, '支': 1884, '唐': 1885, '星': 1886, '订': 1887, '烦': 1888, '齒': 1889, '甫': 1890, '既': 1891, '疮': 1892, '绪': 1893, '皇': 1894, '莲': 1895, '志': 1896, '涡': 1897, '偎': 1898, '胁': 1899, '疹': 1900, '勺': 1901, '因': 1902, '杜': 1903, '宠': 1904, '渎': 1905, '贯': 1906, '瓦': 1907, '衅': 1908, '叩': 1909, '瘀': 1910, '直': 1911, '肥': 1912, '许': 1913, '京': 1914, '敲': 1915, '褶': 1916, '沸': 1917, '毁': 1918, '讨': 1919, '屿': 1920, '值': 1921, '蹭': 1922, '芩': 1923, '街': 1924, '馨': 1925, '髦': 1926, '湧': 1927, '粵': 1928, '玻': 1929, '朱': 1930, '凌': 1931, '汕': 1932, '絕': 1933, '謝': 1934, '完': 1935, '函': 1936, '龚': 1937, '飽': 1938, '檐': 1939, '猫': 1940, '坍': 1941, '微': 1942, '跌': 1943, '奏': 1944, '仙': 1945, '站': 1946, '彪': 1947, '尔': 1948, '迈': 1949, '节': 1950, '尽': 1951, '诠': 1952, '乏': 1953, '犯': 1954, '研': 1955, '宰': 1956, '厮': 1957, '項': 1958, '搬': 1959, '忘': 1960, '当': 1961, '怀': 1962, '冲': 1963, '侄': 1964, '骤': 1965, '況': 1966, '會': 1967, '卸': 1968, '泾': 1969, '毯': 1970, '剑': 1971, '见': 1972, '蔗': 1973, '輩': 1974, '季': 1975, '珊': 1976, '嚕': 1977, '稣': 1978, '建': 1979, '误': 1980, '询': 1981, '茂': 1982, '獠': 1983, '潘': 1984, '舆': 1985, '嫁': 1986, '砂': 1987, '係': 1988, '仅': 1989, '茫': 1990, '酥': 1991, '茎': 1992, '汾': 1993, '﹣': 1994, '凶': 1995, '居': 1996, '喂': 1997, '搅': 1998, '璋': 1999, '羁': 2000, '挥': 2001, '回': 2002, '囊': 2003, '赞': 2004, '揪': 2005, '浦': 2006, '椰': 2007, '衷': 2008, ':': 2009, '汤': 2010, '編': 2011, '裏': 2012, '续': 2013, '广': 2014, '靡': 2015, '困': 2016, '選': 2017, '今': 2018, '垫': 2019, '崴': 2020, '车': 2021, '择': 2022, '饼': 2023, '炬': 2024, '傲': 2025, '組': 2026, '若': 2027, '敌': 2028, '疽': 2029, '骄': 2030, '誓': 2031, '温': 2032, '攝': 2033, '忻': 2034, '千': 2035, '綠': 2036, '辑': 2037, '佯': 2038, '傾': 2039, '桃': 2040, '抿': 2041, '踏': 2042, '豫': 2043, '态': 2044, '❌': 2045, '抹': 2046, '懈': 2047, '员': 2048, '对': 2049, '圾': 2050, '潭': 2051, '孔': 2052, '看': 2053, '鬼': 2054, '假': 2055, '呱': 2056, '號': 2057, '鍾': 2058, 'も': 2059, '疗': 2060, '谷': 2061, '彗': 2062, '丝': 2063, '之': 2064, '阪': 2065, '帮': 2066, '侧': 2067, '付': 2068, '祀': 2069, '苯': 2070, '诚': 2071, '歪': 2072, '举': 2073, '加': 2074, '婺': 2075, '窃': 2076, '👽': 2077, '容': 2078, '切': 2079, '锦': 2080, '唉': 2081, '弊': 2082, '及': 2083, '寻': 2084, '式': 2085, '页': 2086, '随': 2087, '钟': 2088, '炙': 2089, '颐': 2090, '瘦': 2091, '肤': 2092, '2': 2093, '絮': 2094, '畔': 2095, '娟': 2096, '⑤': 2097, '晰': 2098, '馆': 2099, '疏': 2100, '砧': 2101, '挂': 2102, '視': 2103, '浔': 2104, '丫': 2105, '1': 2106, '纷': 2107, '掏': 2108, '释': 2109, '惟': 2110, '家': 2111, '芥': 2112, '侮': 2113, '挝': 2114, '狠': 2115, '畸': 2116, 'A': 2117, '殃': 2118, '鲁': 2119, '琴': 2120, '枉': 2121, '佳': 2122, '菲': 2123, 'ン': 2124, '甩': 2125, '唱': 2126, '糟': 2127, '徨': 2128, '进': 2129, '忆': 2130, '蚂': 2131, '氣': 2132, '諾': 2133, '敦': 2134, '叭': 2135, '梳': 2136, '庇': 2137, '球': 2138, '饺': 2139, 'V': 2140, '增': 2141, '《': 2142, '亏': 2143, '匹': 2144, '楠': 2145, '畅': 2146, '暮': 2147, '物': 2148, '屠': 2149, '税': 2150, '魏': 2151, '碰': 2152, '[': 2153, '鲜': 2154, '蟹': 2155, '縛': 2156, '基': 2157, '蔡': 2158, '爽': 2159, '導': 2160, '级': 2161, '赛': 2162, '项': 2163, '寞': 2164, '湘': 2165, '渴': 2166, '么': 2167, '稚': 2168, '冷': 2169, '轩': 2170, '\ue419': 2171, '教': 2172, '爪': 2173, '淆': 2174, '轻': 2175, '靈': 2176, '融': 2177, '衩': 2178, '結': 2179, '喱': 2180, '曉': 2181, '贴': 2182, '云': 2183, '尝': 2184, '紧': 2185, '慘': 2186, '线': 2187, '笋': 2188, '暴': 2189, '數': 2190, '不': 2191, '拖': 2192, '滤': 2193, '秀': 2194, '蜀': 2195, '愤': 2196, '易': 2197, '导': 2198, '玲': 2199, '蛇': 2200, '奂': 2201, '挫': 2202, '嘛': 2203, '腻': 2204, '雯': 2205, '阔': 2206, '实': 2207, '蛊': 2208, '叼': 2209, '经': 2210, '廊': 2211, '拓': 2212, '达': 2213, '混': 2214, '仆': 2215, '痕': 2216, '较': 2217, '信': 2218, '镌': 2219, '荣': 2220, '羊': 2221, '吴': 2222, '苟': 2223, '借': 2224, '郑': 2225, '祠': 2226, '喜': 2227, '歌': 2228, '况': 2229, '桉': 2230, '笔': 2231, '聆': 2232, '树': 2233, '啃': 2234, '飞': 2235, '从': 2236, '門': 2237, 'G': 2238, '仓': 2239, '位': 2240, '欣': 2241, '音': 2242, '扑': 2243, '❗': 2244, '透': 2245, '述': 2246, '報': 2247, '咎': 2248, '肌': 2249, '吊': 2250, '了': 2251, '贾': 2252, '半': 2253, '截': 2254, '‼': 2255, '允': 2256, '瞄': 2257, '奴': 2258, '鹿': 2259, '蓆': 2260, 'め': 2261, '故': 2262, '革': 2263, '循': 2264, '诩': 2265, '拉': 2266, '\ue112': 2267, '〜': 2268, '粘': 2269, '眨': 2270, '垮': 2271, '⒋': 2272, '≧': 2273, '呸': 2274, '量': 2275, '氰': 2276, '涩': 2277, '吁': 2278, '瑜': 2279, '有': 2280, '罚': 2281, '邢': 2282, '英': 2283, '鼠': 2284, '蜘': 2285, '⑦': 2286, '別': 2287, '際': 2288, '记': 2289, '麼': 2290, '城': 2291, '邊': 2292, '哉': 2293, '茹': 2294, '矣': 2295, '聞': 2296, '航': 2297, '瘙': 2298, '椅': 2299, '泰': 2300, '屬': 2301, '蹂': 2302, '咁': 2303, '躁': 2304, '|': 2305, '变': 2306, '胜': 2307, '调': 2308, '疆': 2309, '该': 2310, '亡': 2311, '晔': 2312, '窒': 2313, '罡': 2314, '核': 2315, '·': 2316, '糠': 2317, '旨': 2318, '钱': 2319, '凰': 2320, '民': 2321, '祥': 2322, '洒': 2323, '锅': 2324, '悄': 2325, '迂': 2326, '器': 2327, '戳': 2328, '蒲': 2329, '诙': 2330, '喳': 2331, '為': 2332, '雨': 2333, '旻': 2334, '灼': 2335, '肝': 2336, '匠': 2337, '土': 2338, '琳': 2339, '惩': 2340, '・': 2341, '姐': 2342, '彩': 2343, '障': 2344, '進': 2345, '劵': 2346, '理': 2347, '沏': 2348, '外': 2349, '佛': 2350, 'か': 2351, '裝': 2352, '皙': 2353, '颇': 2354, '肪': 2355, '崔': 2356, '嚼': 2357, '讳': 2358, '救': 2359, '淮': 2360, '烁': 2361, '搂': 2362, '⒎': 2363, '臀': 2364, '💗': 2365, '诀': 2366, '踪': 2367, '辆': 2368, '殇': 2369, '岁': 2370, '猥': 2371, '墩': 2372, '晃': 2373, '渔': 2374, '腐': 2375, '觉': 2376, '吨': 2377, '芙': 2378, '🇸': 2379, '服': 2380, '需': 2381, 't': 2382, '琨': 2383, '丐': 2384, '昼': 2385, '兜': 2386, '事': 2387, '谬': 2388, '氛': 2389, '菠': 2390, '介': 2391, '径': 2392, '俐': 2393, '黯': 2394, '3': 2395, '陕': 2396, '➍': 2397, '蝙': 2398, '岐': 2399, '藝': 2400, '黏': 2401, '蓉': 2402, '陶': 2403, '准': 2404, '追': 2405, '衝': 2406, '雌': 2407, '沃': 2408, '關': 2409, '贝': 2410, 'd': 2411, '博': 2412, '速': 2413, '洁': 2414, '珐': 2415, '督': 2416, '瑞': 2417, '步': 2418, '嗯': 2419, '贸': 2420, '喀': 2421, '拟': 2422, '件': 2423, '💓': 2424, '生': 2425, '钨': 2426, '!': 2427, '機': 2428, '\ue41d': 2429, '皱': 2430, '族': 2431, '僭': 2432, '镐': 2433, '精': 2434, '艘': 2435, '镖': 2436, '曙': 2437, '扔': 2438, '😚': 2439, '勉': 2440, '疯': 2441, '赋': 2442, '騙': 2443, '徐': 2444, '塑': 2445, '凭': 2446, '人': 2447, '川': 2448, '\ue333': 2449, '弈': 2450, '賀': 2451, '党': 2452, '始': 2453, 'v': 2454, '腋': 2455, '致': 2456, '隊': 2457, '丸': 2458, '😭': 2459, '格': 2460, '幸': 2461, '與': 2462, '淌': 2463, '掩': 2464, '待': 2465, '于': 2466, '悍': 2467, '蹲': 2468, '难': 2469, '禺': 2470, '可': 2471, '義': 2472, '䄂': 2473, '谢': 2474, '咕': 2475, '毬': 2476, '喇': 2477, '戸': 2478, '魚': 2479, '娠': 2480, '圈': 2481, '弓': 2482, '蒋': 2483, '掘': 2484, '滾': 2485, '谶': 2486, '孱': 2487, '購': 2488, '躏': 2489, '呵': 2490, '焯': 2491, '\ue418': 2492, '仰': 2493, '密': 2494, '苗': 2495, '纠': 2496, '霆': 2497, '臥': 2498, '灬': 2499, '願': 2500, '荐': 2501, '惧': 2502, '兽': 2503, '渡': 2504, '酷': 2505, '森': 2506, '厘': 2507, '食': 2508, '办': 2509, '俞': 2510, '训': 2511, '灭': 2512, '婕': 2513, '袜': 2514, '罢': 2515, '旺': 2516, '瞥': 2517, '寧': 2518, '笨': 2519, '筷': 2520, '睦': 2521, '迪': 2522, '种': 2523, '題': 2524, '纲': 2525, '預': 2526, '螂': 2527, '醉': 2528, '息': 2529, '胭': 2530, '昕': 2531, '鲨': 2532, '衰': 2533, '逸': 2534, '享': 2535, '士': 2536, '纵': 2537, '莓': 2538, '顾': 2539, '孩': 2540, '拨': 2541, '乓': 2542, '吐': 2543, '显': 2544, '難': 2545, '泌': 2546, '舉': 2547, '剃': 2548, '∕': 2549, '無': 2550, '叔': 2551, '俗': 2552, '裕': 2553, '~': 2554, '讓': 2555, '卜': 2556, '奔': 2557, '凤': 2558, '畏': 2559, '6': 2560, '虐': 2561, '婆': 2562, '骆': 2563, '霧': 2564, '最': 2565, '缨': 2566, 'z': 2567, '晶': 2568, '粑': 2569, '觑': 2570, '砷': 2571, '劣': 2572, '濡': 2573, '骁': 2574, '附': 2575, '鱼': 2576, '综': 2577, '敷': 2578, '粟': 2579, 'x': 2580, '恩': 2581, '迫': 2582, 'з': 2583, '予': 2584, '谟': 2585, '辍': 2586, '螨': 2587, '幽': 2588, '讥': 2589, '填': 2590, '專': 2591, '报': 2592, '驴': 2593, '促': 2594, '语': 2595, '辣': 2596, '棵': 2597, '峙': 2598, '崎': 2599, '珑': 2600, '左': 2601, '東': 2602, '琥': 2603, '厢': 2604, '悦': 2605, '心': 2606, '莞': 2607, '☞': 2608, '阎': 2609, '琼': 2610, '赔': 2611, '厦': 2612, '瞑': 2613, '邃': 2614, '苍': 2615, '炉': 2616, '朗': 2617, '视': 2618, '劲': 2619, '臾': 2620, '颖': 2621, '哋': 2622, '堆': 2623, '课': 2624, '咪': 2625, '缘': 2626, '屍': 2627, '恻': 2628, '裹': 2629, '市': 2630, '魯': 2631, '卵': 2632, '扎': 2633, '钞': 2634, '禀': 2635, '瘋': 2636, '窿': 2637, '差': 2638, '脂': 2639, '化': 2640, '掺': 2641, '菩': 2642, '溟': 2643, '焰': 2644, '淳': 2645, '逢': 2646, '铎': 2647, '訂': 2648, '鬣': 2649, '括': 2650, '启': 2651, '吾': 2652, '输': 2653, '芽': 2654, '昆': 2655, '旦': 2656, '套': 2657, '韦': 2658, '姻': 2659, '弗': 2660, '戒': 2661, '遁': 2662, 'B': 2663, '蔬': 2664, '俠': 2665, '读': 2666, '早': 2667, '并': 2668, '三': 2669, '剿': 2670, '颈': 2671, '渭': 2672, '罒': 2673, '亭': 2674, '湛': 2675, '铛': 2676, '嗜': 2677, '巍': 2678, '讣': 2679, '恋': 2680, '酒': 2681, '蔓': 2682, '冠': 2683, '绚': 2684, '碉': 2685, '減': 2686, '抓': 2687, '眠': 2688, '%': 2689, 'q': 2690, '婚': 2691, '肛': 2692, '让': 2693, '梦': 2694, '李': 2695, '得': 2696, '乞': 2697, '赂': 2698, '圆': 2699, '擎': 2700, 'F': 2701, '务': 2702, '=': 2703, '解': 2704, '宴': 2705, '名': 2706, '鹂': 2707, '碑': 2708, '篮': 2709, '带': 2710, '议': 2711, '鲍': 2712, '慰': 2713, '舊': 2714, '感': 2715, '煥': 2716, '饰': 2717, '爆': 2718, '梁': 2719, '副': 2720, '米': 2721, '腹': 2722, '🐵': 2723, '耻': 2724, '赵': 2725, '蛛': 2726, '羯': 2727, '瑚': 2728, '忏': 2729, '箴': 2730, '驚': 2731, '除': 2732, '娃': 2733, '链': 2734, '嬉': 2735, '袱': 2736, '㎡': 2737, '噜': 2738, '中': 2739, '谐': 2740, '识': 2741, '禅': 2742, '秽': 2743, '眩': 2744, '彦': 2745, '塞': 2746, '摒': 2747, '魂': 2748, '秋': 2749, '铭': 2750, '\\': 2751, '泱': 2752, '胶': 2753, '樣': 2754, '妃': 2755, '厄': 2756, '尅': 2757, '术': 2758, '转': 2759, '途': 2760, '灯': 2761, '爹': 2762, '喻': 2763, '痒': 2764, '栎': 2765, '馬': 2766, '訓': 2767, '囂': 2768, '▽': 2769, '联': 2770, '熄': 2771, '周': 2772, '殷': 2773, '整': 2774, '睇': 2775, '便': 2776, '蜷': 2777, '硕': 2778, '彻': 2779, '试': 2780, '傭': 2781, '冼': 2782, '避': 2783, 'ノ': 2784, '镜': 2785, '瓣': 2786, '噤': 2787, '耐': 2788, '炸': 2789, '疾': 2790, '商': 2791, '愁': 2792, '腑': 2793, '吏': 2794, '贷': 2795, '算': 2796, '瞧': 2797, '孰': 2798, '婪': 2799, '氧': 2800, '详': 2801, '崛': 2802, '福': 2803, '营': 2804, '姓': 2805, '霾': 2806, '奈': 2807, '潜': 2808, '✨': 2809, '铱': 2810, '妝': 2811, '裸': 2812, '递': 2813, '番': 2814, '薇': 2815, '瑟': 2816, '挚': 2817, '默': 2818, '妍': 2819, '诽': 2820, '忠': 2821, '欠': 2822, '诋': 2823, '秘': 2824, '栗': 2825, '风': 2826, '跋': 2827, '師': 2828, '取': 2829, '灾': 2830, '瑪': 2831, '遏': 2832, '彝': 2833, '侦': 2834, '妩': 2835, '"': 2836, '院': 2837, '础': 2838, '藍': 2839, '也': 2840, '此': 2841, '灌': 2842, '兴': 2843, '覆': 2844, '馍': 2845, '公': 2846, '怎': 2847, '亚': 2848, '跳': 2849, '肠': 2850, '歡': 2851, '坡': 2852, '邂': 2853, '凹': 2854, '谁': 2855, '插': 2856, '荷': 2857, '琵': 2858, '兒': 2859, '槃': 2860, '芒': 2861, 'k': 2862, '豢': 2863, '她': 2864, '穿': 2865, '劈': 2866, '尴': 2867, '击': 2868, '滴': 2869, '茜': 2870, '募': 2871, '烙': 2872, '柱': 2873, '嘘': 2874, '夙': 2875, '】': 2876, '擇': 2877, '肢': 2878, '璐': 2879, '粮': 2880, '阻': 2881, '绞': 2882, '赤': 2883, '捂': 2884, '泵': 2885, '圃': 2886, '蓬': 2887, '赖': 2888, '悯': 2889, '底': 2890, '岩': 2891, '淤': 2892, '闲': 2893, '慶': 2894, '媛': 2895, '惕': 2896, '岂': 2897, '为': 2898, '贩': 2899, '田': 2900, '勒': 2901, '捅': 2902, '业': 2903, '黃': 2904, '话': 2905, '愛': 2906, '徒': 2907, '什': 2908, '屁': 2909, '孝': 2910, '胳': 2911, '闭': 2912, '雕': 2913, 'し': 2914, '卧': 2915, '农': 2916, '奥': 2917, '伟': 2918, '轰': 2919, '昏': 2920, '馥': 2921, '戚': 2922, '戶': 2923, '饿': 2924, '糸': 2925, '入': 2926, '逗': 2927, '豬': 2928, '波': 2929, '尋': 2930, '颠': 2931, '堂': 2932, '枚': 2933, '枝': 2934, '珉': 2935, '送': 2936, '脖': 2937, '成': 2938, '咬': 2939, '鲟': 2940, '抚': 2941, '与': 2942, '茬': 2943, '拱': 2944, '学': 2945, '?': 2946, '摸': 2947, '腌': 2948, '怒': 2949, '哗': 2950, '选': 2951, '眼': 2952, '芬': 2953, '罕': 2954, '创': 2955, '涂': 2956, '稻': 2957, '大': 2958, '腱': 2959, '辈': 2960, '億': 2961, '猴': 2962, '新': 2963, 'y': 2964, '射': 2965, '概': 2966, '娇': 2967, '败': 2968, '辞': 2969, '裱': 2970, '個': 2971, '额': 2972, '帖': 2973, '遂': 2974, '質': 2975, '頭': 2976, '绕': 2977, '噢': 2978, '래': 2979, '房': 2980, '丹': 2981, '条': 2982, '苒': 2983, '捐': 2984, '顶': 2985, '檬': 2986, '災': 2987, '返': 2988, '史': 2989, '逊': 2990, '糜': 2991, '题': 2992, '嫌': 2993, '蓝': 2994, '饲': 2995, '沙': 2996, '蘑': 2997, '雪': 2998, '材': 2999, '媚': 3000, '』': 3001, '葵': 3002, '妄': 3003, '穷': 3004, '贈': 3005, '焕': 3006, '嘱': 3007, '播': 3008, '援': 3009, '脸': 3010, '废': 3011, '菜': 3012, '糯': 3013, '-': 3014, '蘭': 3015, '!': 3016, '四': 3017, '临': 3018, '苹': 3019, '缕': 3020, '迄': 3021, '窗': 3022, '孤': 3023, '罹': 3024, '萄': 3025, '莹': 3026, '蜕': 3027, '遵': 3028, '橄': 3029, '乘': 3030, '那': 3031, '仿': 3032, '絲': 3033, '\ue109': 3034, '扫': 3035, '贫': 3036, '隅': 3037, '觎': 3038, '雲': 3039, '洛': 3040, '踢': 3041, '抛': 3042, '磁': 3043, '穆': 3044, '涛': 3045, 'H': 3046, '贼': 3047, '噩': 3048, '昭': 3049, '蝠': 3050, '墅': 3051, '屹': 3052, '堕': 3053, '祇': 3054, '靜': 3055, '禄': 3056, '购': 3057, '瑶': 3058, 'à': 3059, '言': 3060, '泽': 3061, '揚': 3062, '宣': 3063, '瀑': 3064, '书': 3065, '澈': 3066, '玑': 3067, '违': 3068, '劳': 3069, '較': 3070, '指': 3071, '詩': 3072, '纤': 3073, '笑': 3074, '華': 3075, '诗': 3076, '袂': 3077, '倪': 3078, '羞': 3079, '拾': 3080, '小': 3081, '¥': 3082, '轮': 3083, '纽': 3084, '蹬': 3085, '惯': 3086, '➌': 3087, '下': 3088, '宽': 3089, '好': 3090, '店': 3091, '芝': 3092, '藻': 3093, '暑': 3094, '跑': 3095, '褐': 3096, '響': 3097, '、': 3098, '☑': 3099, '短': 3100, '晚': 3101, '挪': 3102, '⒏': 3103, '哕': 3104, '形': 3105, '陪': 3106, '芭': 3107, '枣': 3108, '總': 3109, '〞': 3110, '涅': 3111, '但': 3112, '影': 3113, '据': 3114, '笫': 3115, '港': 3116, '月': 3117, '版': 3118, '彷': 3119, '柴': 3120, '阿': 3121, '玩': 3122, '损': 3123, '结': 3124, '虎': 3125, '殖': 3126, '韓': 3127, '鯉': 3128, '歇': 3129, '屯': 3130, '句': 3131, '坊': 3132, '酸': 3133, '某': 3134, '屏': 3135, '養': 3136, '迟': 3137, '萌': 3138, '产': 3139, '减': 3140, '嘍': 3141, '颚': 3142, '遇': 3143, '倦': 3144, '嘶': 3145, '獻': 3146, '枫': 3147, '置': 3148, '钗': 3149, '响': 3150, '奘': 3151, '现': 3152, '➏': 3153, '消': 3154, '屋': 3155, '粗': 3156, '痊': 3157, '狈': 3158, '海': 3159, '卓': 3160, '郭': 3161, '帛': 3162, '过': 3163, '坤': 3164, '晗': 3165, '杨': 3166, '賓': 3167, '岼': 3168, '嘿': 3169, '辉': 3170, '蜡': 3171, '愣': 3172, '伐': 3173, '张': 3174, '帆': 3175, '龈': 3176, '害': 3177, '團': 3178, '重': 3179, '自': 3180, '剧': 3181, '骂': 3182, '亲': 3183, '践': 3184, '寡': 3185, '荫': 3186, '用': 3187, '系': 3188, '\u200b': 3189, '橙': 3190, '愉': 3191, '缉': 3192, '哦': 3193, '窟': 3194, '砖': 3195, '鴻': 3196, '体': 3197, '空': 3198, '汉': 3199, '阅': 3200, '淡': 3201, '祭': 3202, '痈': 3203, '映': 3204, '卡': 3205, '牠': 3206, '夕': 3207, '财': 3208, '豊': 3209, '麟': 3210, '贵': 3211, 'X': 3212, '驼': 3213, '脱': 3214, '¥': 3215, '@': 3216, '(': 3217, '矛': 3218, '瓷': 3219, '汨': 3220, '框': 3221, '悱': 3222, '竖': 3223, '宾': 3224, '霸': 3225, '坟': 3226, '栋': 3227, 'a': 3228, '同': 3229, '正': 3230, '片': 3231, 'b': 3232, '边': 3233, '樱': 3234, '畑': 3235, '要': 3236, '斯': 3237, '咯': 3238, '的': 3239, '亦': 3240, '摊': 3241, '赁': 3242, '續': 3243, '呻': 3244, '司': 3245, '摆': 3246, '绳': 3247, '唠': 3248, '嬷': 3249, '煌': 3250, '章': 3251, '翅': 3252, '\': 3253, '腿': 3254, '棘': 3255, '老': 3256, '{': 3257, '姬': 3258, '惶': 3259, '晴': 3260, '兮': 3261, '咏': 3262, '号': 3263, '漠': 3264, '厅': 3265, '匙': 3266, '議': 3267, '滥': 3268, '飆': 3269, '锤': 3270, '屎': 3271, '幕': 3272, '祝': 3273, '阴': 3274, '盟': 3275, '壤': 3276, '胸': 3277, '妓': 3278, '囉': 3279, '瑕': 3280, '阮': 3281, '㎝': 3282, '峰': 3283, '溧': 3284, '轺': 3285, '止': 3286, '浩': 3287, '趕': 3288, '衛': 3289, '遷': 3290, '奶': 3291, '供': 3292, '这': 3293, '現': 3294, '塌': 3295, '慎': 3296, '提': 3297, '良': 3298, '津': 3299, '威': 3300, '州': 3301, '售': 3302, '筒': 3303, '┮': 3304, '🇺': 3305, ')': 3306, '溺': 3307, '春': 3308, '鳥': 3309, '驳': 3310, '辖': 3311, '苛': 3312, '赘': 3313, '敏': 3314, '飘': 3315, '筹': 3316, '激': 3317, '毫': 3318, '掀': 3319, '宇': 3320, '稿': 3321, '瘪': 3322, '誕': 3323, '✅': 3324, '赐': 3325, '恳': 3326, '岭': 3327, '白': 3328, '声': 3329, '村': 3330, '頁': 3331, '淚': 3332, '鲵': 3333, '恪': 3334, '错': 3335, '香': 3336, '靶': 3337, '骨': 3338, '雄': 3339, '萍': 3340, '昊': 3341, 'リ': 3342, '五': 3343, '挟': 3344, '鉛': 3345, '滨': 3346, '漱': 3347, '喷': 3348, '油': 3349, '状': 3350, '髓': 3351, '丰': 3352, '培': 3353, '裁': 3354, '繹': 3355, '蔑': 3356, '棉': 3357, '泼': 3358, '③': 3359, '掐': 3360, '喺': 3361, '克': 3362, '硬': 3363, '闪': 3364, '伺': 3365, '褪': 3366, '猬': 3367, '哭': 3368, '費': 3369, '薛': 3370, '淫': 3371, '矜': 3372, '丑': 3373, '清': 3374, '馋': 3375, '伍': 3376, '预': 3377, '駿': 3378, '丶': 3379, '其': 3380, '潸': 3381, '辗': 3382, '妮': 3383, '未': 3384, '疑': 3385, '盖': 3386, '刻': 3387, '悼': 3388, '◆': 3389, '评': 3390, '籍': 3391, '巨': 3392, '迅': 3393, '秒': 3394, '斩': 3395, '◇': 3396, '胀': 3397, '杀': 3398, '杭': 3399, '萨': 3400, '鑿': 3401, '該': 3402, '郁': 3403, '换': 3404, '距': 3405, '茨': 3406, '搁': 3407, '歹': 3408, '帕': 3409, '劉': 3410, '缔': 3411, '漢': 3412, '裡': 3413, '屡': 3414, '[': 3415, '毛': 3416, '誉': 3417, '涯': 3418, '儿': 3419, '躯': 3420, '驶': 3421, '荼': 3422, '啫': 3423, '彤': 3424, '烤': 3425, '收': 3426, '瓜': 3427, '侈': 3428, '斗': 3429, '里': 3430, '辩': 3431, '熙': 3432, '采': 3433, '忧': 3434, '穴': 3435, '符': 3436, '免': 3437, '握': 3438, '請': 3439, '鸠': 3440, '慈': 3441, '廈': 3442, '抬': 3443, '嚴': 3444, '身': 3445, '虔': 3446, '然': 3447, '斋': 3448, '控': 3449, '患': 3450, '飛': 3451, '赃': 3452, '撵': 3453, '燥': 3454, '舜': 3455, '國': 3456, '膝': 3457, '羅': 3458, '葱': 3459, '汀': 3460, '乖': 3461, '蛟': 3462, '露': 3463, '梆': 3464, '麽': 3465, '医': 3466, '條': 3467, '板': 3468, '割': 3469, '祖': 3470, '钢': 3471, '渺': 3472, '點': 3473, '惰': 3474, '戏': 3475, '具': 3476, '延': 3477, '刹': 3478, '塘': 3479, '铅': 3480, '诊': 3481, '凝': 3482, '綸': 3483, '☆': 3484, '壶': 3485, '計': 3486, '锋': 3487, '在': 3488, '颤': 3489, '伯': 3490, '固': 3491, '①': 3492, '游': 3493, '囚': 3494, '帼': 3495, '每': 3496, '亮': 3497, '蚊': 3498, '而': 3499, 'Q': 3500, '奢': 3501, '赠': 3502, '檔': 3503, '含': 3504, '继': 3505, '蛙': 3506, '顷': 3507, '艰': 3508, '撮': 3509, '`': 3510, '怕': 3511, '夺': 3512, '咳': 3513, '認': 3514, '隐': 3515, '⒈': 3516, '②': 3517, '蜃': 3518, '衬': 3519, '喬': 3520, '牲': 3521, '淇': 3522, '私': 3523, '哲': 3524, '雙': 3525, '痪': 3526, '嵘': 3527, '晕': 3528, '撒': 3529, '莉': 3530, '霍': 3531, '園': 3532, '摧': 3533, '➎': 3534, '艱': 3535, '🍀': 3536, '姆': 3537, '谍': 3538, '军': 3539, '越': 3540, '撰': 3541, '双': 3542, '唯': 3543, '嘻': 3544, '狗': 3545, '襄': 3546, ']': 3547, '脚': 3548, '貴': 3549, '湊': 3550, '懊': 3551, '斜': 3552, ',': 3553, '智': 3554, '蠢': 3555, '幅': 3556, '惨': 3557, '俺': 3558, '膀': 3559, '年': 3560, '震': 3561, '禁': 3562, '桌': 3563, '⋯': 3564, '厂': 3565, 'と': 3566, '翁': 3567, '瓯': 3568, '花': 3569, '詞': 3570, 'j': 3571, '战': 3572, '魇': 3573, '舒': 3574, '雹': 3575, '主': 3576, '鄉': 3577, '❀': 3578, '惹': 3579, '扰': 3580, '棍': 3581, '啥': 3582, '柿': 3583, '坠': 3584, '译': 3585, '泓': 3586, '否': 3587, '粒': 3588, '酝': 3589, '敗': 3590, '猿': 3591, '跃': 3592, '泉': 3593, '饕': 3594, '狮': 3595, '浪': 3596, '背': 3597, '至': 3598, '罂': 3599, '岚': 3600, '骑': 3601, '苏': 3602, '测': 3603, '仔': 3604, '>': 3605, '}': 3606, '毅': 3607, '突': 3608, '数': 3609, '齐': 3610, 'n': 3611, '丙': 3612, '敢': 3613, '掠': 3614, '犀': 3615, '码': 3616, '盒': 3617, '雜': 3618, '析': 3619, '乔': 3620, '🐒': 3621, '蒜': 3622, '♪': 3623, '架': 3624, '脐': 3625, '倩': 3626, '刘': 3627, '馄': 3628, '扳': 3629, '销': 3630, '彈': 3631, '滚': 3632, ']': 3633, '豌': 3634, '規': 3635, '羡': 3636, '佣': 3637, '讶': 3638, '代': 3639, '裳': 3640, '疤': 3641, '哪': 3642, '何': 3643, '聋': 3644, '绩': 3645, '發': 3646, '振': 3647, '鎮': 3648, '户': 3649, '亟': 3650, '虾': 3651, '沦': 3652, '泛': 3653, '淑': 3654, '寰': 3655, '黛': 3656, '溫': 3657, '粽': 3658, '溢': 3659, '蠻': 3660, '廿': 3661, '類': 3662, '椎': 3663, '扼': 3664, '😱': 3665, 'Z': 3666, '麦': 3667, '西': 3668, '卫': 3669, '瞻': 3670, '舵': 3671, '2': 3672, '富': 3673, '暹': 3674, '道': 3675, '渣': 3676, '查': 3677, '命': 3678, '噗': 3679, '令': 3680, '请': 3681, '腾': 3682, '决': 3683, '搡': 3684, '帶': 3685, '娉': 3686, '膏': 3687, '展': 3688, '累': 3689, '眉': 3690, '壁': 3691, '剎': 3692, '睾': 3693, '很': 3694, '八': 3695, '蟒': 3696, '茶': 3697, '朩': 3698, '銳': 3699, '描': 3700, '快': 3701, '嫂': 3702, '厚': 3703, '④': 3704, '≫': 3705, '陵': 3706, '签': 3707, '诬': 3708, '由': 3709, '马': 3710, '昂': 3711, '溪': 3712, '石': 3713, '暂': 3714, 's': 3715, '橡': 3716, '运': 3717, '漫': 3718, '刮': 3719, '呗': 3720, '綦': 3721, '勘': 3722, '亩': 3723, '布': 3724, '盈': 3725, '谛': 3726, '嗽': 3727, '罗': 3728, '宝': 3729, '痺': 3730, '漂': 3731, 'Y': 3732, '凉': 3733, '胆': 3734, '․': 3735, '婉': 3736, '艇': 3737, '鳗': 3738, '幹': 3739, '碧': 3740, '們': 3741, '催': 3742, '´': 3743, '讹': 3744, '隣': 3745, 'T': 3746, '骼': 3747, '颁': 3748, '罄': 3749, '木': 3750, '慢': 3751, '腫': 3752, '度': 3753, '恐': 3754, '百': 3755, '鹏': 3756, 'u': 3757, '往': 3758, ':': 3759, '模': 3760, '魔': 3761, '十': 3762, '郎': 3763, '讽': 3764, '婀': 3765, '揭': 3766, '耽': 3767, '栏': 3768, '绣': 3769, '頻': 3770, '拥': 3771, '層': 3772, '面': 3773, '酱': 3774, '😲': 3775, '書': 3776, '睽': 3777, '偷': 3778, '兔': 3779, '叛': 3780, '肯': 3781, '衫': 3782, '集': 3783, '络': 3784, '类': 3785, '翰': 3786, '磊': 3787, '牡': 3788, '氯': 3789, '特': 3790, '标': 3791, 'W': 3792, '妨': 3793, '效': 3794, '冀': 3795, '召': 3796, '政': 3797, '囧': 3798, '惜': 3799, '讪': 3800, '磨': 3801, '深': 3802, '璧': 3803, '犹': 3804, '瘤': 3805, '餐': 3806, '挽': 3807, '吉': 3808, '廷': 3809, '呲': 3810, '訊': 3811, '酗': 3812, '佬': 3813, '酶': 3814, '轨': 3815, '型': 3816, '偕': 3817, '诵': 3818, '漯': 3819, '似': 3820, '嗦': 3821, '乃': 3822, '梅': 3823, '⑧': 3824, '靖': 3825, '票': 3826, '滿': 3827, '色': 3828, '址': 3829, 'r': 3830, '屑': 3831, '衣': 3832, '%': 3833, '咋': 3834, '棚': 3835, '_': 3836, '帅': 3837, '娑': 3838, '窕': 3839, '拜': 3840, '酵': 3841, '埔': 3842, '茅': 3843, '他': 3844, '見': 3845, '操': 3846, '等': 3847, '境': 3848, '叉': 3849, '遭': 3850, '札': 3851, '来': 3852, '水': 3853, '鄭': 3854, '历': 3855, '劫': 3856, '署': 3857, '孙': 3858, '红': 3859, '养': 3860, '壳': 3861, '艳': 3862, '捣': 3863, '饶': 3864, '恤': 3865, '醋': 3866, '憐': 3867, '植': 3868, '翱': 3869, '辅': 3870, '蛋': 3871, '鄂': 3872, '媳': 3873, '泣': 3874, '替': 3875, '猎': 3876, '憔': 3877, '晋': 3878, '韌': 3879, '统': 3880, '雍': 3881, '翡': 3882, '偶': 3883, '弥': 3884, '兩': 3885, '戀': 3886, '嗎': 3887, '≦': 3888, '烫': 3889, '😢': 3890, '聪': 3891, '﹏': 3892, '佟': 3893, '厉': 3894, '甸': 3895, '普': 3896, '轴': 3897, '寅': 3898, '优': 3899, '坑': 3900, '哼': 3901, '拆': 3902, '验': 3903, '内': 3904, 'U': 3905, '婵': 3906, '搭': 3907, '時': 3908, 'D': 3909, '颜': 3910, '繼': 3911, '坞': 3912, '斷': 3913, '咱': 3914, '諒': 3915, '郸': 3916, '康': 3917, '六': 3918, '娶': 3919, '獸': 3920, '巩': 3921, '睁': 3922, '奇': 3923, '汁': 3924, '拿': 3925, '黔': 3926, '捍': 3927, '溶': 3928, '瓢': 3929, '阁': 3930, '阂': 3931, '蟑': 3932, '瑋': 3933, '谣': 3934, '去': 3935, '悸': 3936, '麥': 3937, '創': 3938, '袋': 3939, '立': 3940, '册': 3941, '榴': 3942, '荏': 3943, '乱': 3944, '常': 3945, '淹': 3946, '育': 3947, '藤': 3948, '汰': 3949, '缢': 3950, '倒': 3951, '偏': 3952, '瘫': 3953, '凡': 3954, ';': 3955, '辐': 3956, '诱': 3957, '忙': 3958, '熟': 3959, '零': 3960, '荒': 3961, '庵': 3962, '江': 3963, '逍': 3964, '煽': 3965, '佩': 3966, '凸': 3967, '泊': 3968, '巷': 3969, '凯': 3970, '丞': 3971, '學': 3972, '騰': 3973, '碾': 3974, '萱': 3975, '钓': 3976, '勿': 3977, '煤': 3978, '扈': 3979, '灰': 3980, '烹': 3981, '磐': 3982, '冻': 3983, '围': 3984, '筝': 3985, '嫡': 3986, '耶': 3987, '矫': 3988, '鼻': 3989, '粉': 3990, '踹': 3991, '捡': 3992, '赚': 3993, '绍': 3994, '泪': 3995, '善': 3996, '弟': 3997, '萃': 3998, '诶': 3999, '試': 4000, '垂': 4001, '庭': 4002, '费': 4003, '乡': 4004, '礁': 4005, '申': 4006, '呜': 4007, '坷': 4008, '坝': 4009, '飒': 4010, '证': 4011, '扮': 4012, '痿': 4013, '阐': 4014, '庚': 4015, '1': 4016, '问': 4017, '5': 4018, '俱': 4019, '祺': 4020, '嫩': 4021, '礼': 4022, '琶': 4023, '疫': 4024, '针': 4025, '盡': 4026, '汇': 4027, '暧': 4028, '乐': 4029, '尾': 4030, '德': 4031, '膜': 4032, '湖': 4033, '缪': 4034, '极': 4035, '☎': 4036, '獒': 4037, '恶': 4038, '熹': 4039, '谠': 4040, '凄': 4041, '买': 4042, '午': 4043, '狞': 4044, '伸': 4045, '贪': 4046, '兵': 4047, '唁': 4048, '察': 4049, '燕': 4050, '浏': 4051, '剛': 4052, '龟': 4053, '浅': 4054, '橇': 4055, '艹': 4056, '薄': 4057, '扛': 4058, '绛': 4059, '委': 4060, '勢': 4061, '憾': 4062, '污': 4063, '螃': 4064, '郊': 4065, '"': 4066, '官': 4067, '虽': 4068, '啤': 4069, '诲': 4070, '蓄': 4071, '喘': 4072, '软': 4073, '排': 4074, '遠': 4075, '彭': 4076, '倾': 4077, '授': 4078, '眸': 4079, 'p': 4080, '遮': 4081, '恒': 4082, '师': 4083, '崇': 4084, '般': 4085, '琐': 4086, '责': 4087, '宗': 4088, '呆': 4089, '鳌': 4090, '处': 4091, '攻': 4092, '钥': 4093, '松': 4094, '醺': 4095, '鼎': 4096, '储': 4097, '陌': 4098, '咲': 4099, '3': 4100, '幂': 4101, '恣': 4102, '谓': 4103, '過': 4104, '緊': 4105, '咨': 4106, '宵': 4107, '抖': 4108, '鑑': 4109, '到': 4110, '盔': 4111, '望': 4112, '浑': 4113, '给': 4114, '剪': 4115, '妙': 4116, '僵': 4117, '饱': 4118, '岳': 4119, '髮': 4120, '怺': 4121, '工': 4122, '鸦': 4123, '渐': 4124, '驾': 4125, '娛': 4126, '葛': 4127, '風': 4128, '愈': 4129, '糊': 4130, '週': 4131, '洲': 4132, '颂': 4133, '曲': 4134, '助': 4135, '懂': 4136, '王': 4137, '妻': 4138, '俚': 4139, '肋': 4140, '潼': 4141, '氓': 4142, '袭': 4143, '&': 4144, '🇨': 4145, '草': 4146, '広': 4147, '子': 4148, '🌟': 4149, '呈': 4150, '景': 4151, '二': 4152, '捕': 4153, '绒': 4154, '忍': 4155, '迎': 4156, '礴': 4157, '瘾': 4158, '序': 4159, '7': 4160, '胧': 4161, '锢': 4162, 'f': 4163, '掇': 4164, '咻': 4165, '吝': 4166, '寶': 4167, '氏': 4168, '窝': 4169, '阵': 4170, '坚': 4171, '疲': 4172, '兼': 4173, '皆': 4174, '攒': 4175, '酣': 4176, '仪': 4177, '變': 4178, '桂': 4179, '兆': 4180, '昶': 4181, '装': 4182, '尖': 4183, 'L': 4184, '瓶': 4185, '稀': 4186, '诡': 4187, '妒': 4188, '裂': 4189, '弦': 4190, '翔': 4191, '葬': 4192, '馈': 4193, '扉': 4194, '囔': 4195, '喧': 4196, '盛': 4197, '笛': 4198, '態': 4199, '町': 4200, '餮': 4201, '钛': 4202, '🍁': 4203, '灣': 4204, '鬥': 4205, '嵯': 4206, '粥': 4207, '慵': 4208, '如': 4209, '葆': 4210, '記': 4211, '足': 4212, '约': 4213, '屌': 4214, '移': 4215, '门': 4216, '詹': 4217, '價': 4218, '闽': 4219, '屆': 4220, '碱': 4221, '袖': 4222, '長': 4223, '画': 4224, '余': 4225, '琢': 4226, '帐': 4227, '嚎': 4228, '留': 4229, '跚': 4230, '床': 4231, '刚': 4232, '哒': 4233, '鸽': 4234, '知': 4235, '块': 4236, '杉': 4237, '尼': 4238, '’': 4239, '敛': 4240, '涨': 4241, '橫': 4242, '思': 4243, '媒': 4244, '朝': 4245, '輝': 4246, '例': 4247, '押': 4248, '槽': 4249, '挑': 4250, '狭': 4251, '間': 4252, '前': 4253, '考': 4254, '娱': 4255, '械': 4256, '✈': 4257, '嗓': 4258, '斥': 4259, '【': 4260, '紐': 4261, '罪': 4262, '皈': 4263, '长': 4264, '仇': 4265, '捭': 4266, '猜': 4267, 'm': 4268, '罩': 4269, '逾': 4270, '宜': 4271, '光': 4272, '后': 4273, '撑': 4274, '剖': 4275, '盆': 4276, '️': 4277, '峭': 4278, '牵': 4279, '砍': 4280, '沂': 4281, 'れ': 4282, '樊': 4283, '贺': 4284, '略': 4285, '🇳': 4286, '—': 4287, '吓': 4288, '拣': 4289, '亵': 4290, '静': 4291, '谴': 4292, '鬧': 4293, '論': 4294, '耿': 4295, '护': 4296, '苦': 4297, '艾': 4298, '∠': 4299, '猝': 4300, 'P': 4301, '黄': 4302, '君': 4303, 'こ': 4304, '弛': 4305, '恙': 4306, '笼': 4307, '柬': 4308, '猛': 4309, '酯': 4310, '划': 4311, '肖': 4312, '撬': 4313, '郫': 4314, '~': 4315, '缸': 4316, '種': 4317, '崭': 4318, '毗': 4319, '薯': 4320, '粪': 4321, '俭': 4322, '篷': 4323, '萤': 4324, '標': 4325, '糖': 4326, '裆': 4327, '熬': 4328, '一': 4329, '库': 4330, '▲': 4331, '冥': 4332, '锁': 4333, '俘': 4334, '抢': 4335, '征': 4336, '玫': 4337, '厲': 4338, '芯': 4339, '众': 4340, '吗': 4341, '歧': 4342, '楊': 4343, '篱': 4344, '夹': 4345, '悴': 4346, ';': 4347, '菁': 4348, '示': 4349, '衍': 4350, '抽': 4351, '纯': 4352, '您': 4353, '答': 4354, '法': 4355, '>': 4356, '窜': 4357, '坎': 4358, '柠': 4359, 'ら': 4360, '給': 4361, '♥': 4362, '噪': 4363, '⚫': 4364, '枕': 4365, '榆': 4366, '樂': 4367, '气': 4368, '末': 4369, '這': 4370, '矿': 4371, '員': 4372, '蚤': 4373, '梯': 4374, '通': 4375, '脆': 4376, '聲': 4377, '0': 4378, '弹': 4379, '怖': 4380, '俨': 4381, '域': 4382, '冉': 4383, '痹': 4384, '府': 4385, '啡': 4386, '绽': 4387, '頒': 4388, '辦': 4389, '发': 4390, '碌': 4391, '社': 4392, '🚬': 4393, '渗': 4394, '珠': 4395, '兄': 4396, '鸿': 4397, '哺': 4398, '俯': 4399, '妇': 4400, '蒙': 4401, '幢': 4402, '叽': 4403, '幡': 4404, '鎖': 4405, '安': 4406, '作': 4407, '情': 4408, '': 4409} \ No newline at end of file diff --git a/modules/text/text_generation/Rumor_prediction/module.py b/modules/text/text_generation/Rumor_prediction/module.py new file mode 100644 index 00000000..24799f1d --- /dev/null +++ b/modules/text/text_generation/Rumor_prediction/module.py @@ -0,0 +1,152 @@ +# coding:utf-8 +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import ast +import os +import math +import six +import time +from pathlib import Path + +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import runnable, serving, moduleinfo +from paddlehub.io.parser import txt_parser +from paddlehub.compat.module.nlp_module import DataFormatError +import numpy as np +import paddle +import paddlehub as hub + +@moduleinfo( + name="Rumor_prediction", + version="1.0.0", + type="nlp/semantic_model", + summary= + "Is the input text prediction a rumor", + author="彭兆帅,郑博培", + author_email="1084667371@qq.com,2733821739@qq.com") +class Rumorprediction(hub.Module): + def _initialize(self): + """ + Initialize with the necessary elements + """ + # 加载模型路径 + self.default_pretrained_model_path = os.path.join(self.directory, "infer_model") + + def Rumor(self, texts, use_gpu=False): + """ + Get the input and program of the infer model + + Args: + image (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR. + use_gpu(bool): Weather to use gpu + """ + # 获取数据 + def get_data(sentence): + # 读取数据字典 + with open(self.directory + '/dict.txt', 'r', encoding='utf-8') as f_data: + dict_txt = eval(f_data.readlines()[0]) + dict_txt = dict(dict_txt) + # 把字符串数据转换成列表数据 + keys = dict_txt.keys() + data = [] + for s in sentence: + # 判断是否存在未知字符 + if not s in keys: + s = '' + data.append(int(dict_txt[s])) + return data + data = [] + for text in texts: + text = get_data(text) + data.append(text) + base_shape = [[len(c) for c in data]] + paddle.enable_static() + place = paddle.CUDAPlace(0) if use_gpu else paddle.CPUPlace() + exe = paddle.static.Executor(place) + exe.run(paddle.static.default_startup_program()) + [infer_program, feeded_var_names, target_var] = paddle.fluid.io.load_inference_model(dirname=self.default_pretrained_model_path, executor=exe) + # 生成预测数据 + tensor_words = paddle.fluid.create_lod_tensor(data, base_shape, place) + # 执行预测 + result = exe.run(program=infer_program, + feed={feeded_var_names[0]: tensor_words}, + fetch_list=target_var) + # 分类名称 + names = [ '谣言', '非谣言'] + + + results = [] + + # 获取结果概率最大的label + for i in range(len(data)): + content = texts[i] + lab = np.argsort(result)[0][i][-1] + + alltext = { + 'content': content, + 'prediction': names[lab], + 'probability': result[0][i][lab] + } + alltext = [alltext] + results = results + alltext + + return results + + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU for prediction") + + def add_module_input_arg(self): + """ + Add the command input options + """ + self.arg_input_group.add_argument( + '--input_text', + type=str, + default=None, + help="input_text is str") + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description='Run the %s module.' % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group( + title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", + description= + "Run configuration for controlling module behavior, optional.") + + self.add_module_config_arg() + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + input_text = [args.input_text] + results = self.Rumor( + texts=input_text, use_gpu=args.use_gpu) + + return results diff --git a/modules/text/text_generation/ernie_gen_leave/README.md b/modules/text/text_generation/ernie_gen_leave/README.md new file mode 100644 index 00000000..ddde23ca --- /dev/null +++ b/modules/text/text_generation/ernie_gen_leave/README.md @@ -0,0 +1,52 @@ +## 概述 + + +ernie_gen_leave是基于ERNIE-GEN进行微调的模型,该模型的主要功能为生成请假条。输出一个关键词,给出你的请假理由。 + +## 命令行预测 + +```shell +$ hub run ernie_gen_leave --input_text="理由" --use_gpu True --beam_width 5 +``` + +## API + +```python +def generate(texts, use_gpu=False, beam_width=5): +``` + +预测API,输入关键字给出请假理由。 + +**参数** + +* texts (list\[str\]): 请假关键字; +* use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA\_VISIBLE\_DEVICES环境变量**; +* beam\_width: beam search宽度,决定输出多少理由的数量。 + +**返回** + +* results (list\[list\]\[str\]): 输出请假理由。 + +**代码示例** + +```python +import paddlehub as hub + +module = hub.Module(name="ernie_gen_leave") + +test_texts = ["理由"] +results = module.generate(texts=test_texts, use_gpu=False, beam_width=2) +for result in results: + print(result) +``` + + +## 查看代码 + +https://github.com/PaddlePaddle/PaddleHub/tree/release/v2.0.0-rc/modules/text/text_generation/ernie_gen_leave + +### 依赖 + +paddlepaddle >= 2.0.0rc1 + +paddlehub >= 2.0.0rc0 diff --git a/modules/text/text_generation/ernie_gen_leave/model/decode.py b/modules/text/text_generation/ernie_gen_leave/model/decode.py new file mode 100644 index 00000000..a42bfee1 --- /dev/null +++ b/modules/text/text_generation/ernie_gen_leave/model/decode.py @@ -0,0 +1,305 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import numpy as np +from collections import namedtuple + +import paddle.fluid as F +import paddle.fluid.layers as L +import paddle.fluid.dygraph as D + + +def gen_bias(encoder_inputs, decoder_inputs, step): + decoder_bsz, decoder_seqlen = decoder_inputs.shape[:2] + attn_bias = L.reshape( + L.range(0, decoder_seqlen, 1, dtype='float32') + 1, [1, -1, 1]) + decoder_bias = L.cast( + (L.matmul(attn_bias, 1. / attn_bias, transpose_y=True) >= 1.), + 'float32') #[1, 1, decoderlen, decoderlen] + encoder_bias = L.unsqueeze( + L.cast(L.ones_like(encoder_inputs), 'float32'), + [1]) #[bsz, 1, encoderlen] + encoder_bias = L.expand( + encoder_bias, [1, decoder_seqlen, 1]) #[bsz,decoderlen, encoderlen] + decoder_bias = L.expand(decoder_bias, + [decoder_bsz, 1, 1]) #[bsz, decoderlen, decoderlen] + if step > 0: + bias = L.concat([ + encoder_bias, + L.ones([decoder_bsz, decoder_seqlen, step], 'float32'), decoder_bias + ], -1) + else: + bias = L.concat([encoder_bias, decoder_bias], -1) + return bias + + +@D.no_grad +def greedy_search_infilling(model, + q_ids, + q_sids, + sos_id, + eos_id, + attn_id, + max_encode_len=640, + max_decode_len=100, + tgt_type_id=3): + model.eval() + _, logits, info = model(q_ids, q_sids) + gen_ids = L.argmax(logits, -1) + d_batch, d_seqlen = q_ids.shape + seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) + has_stopped = np.zeros([d_batch], dtype=np.bool) + gen_seq_len = np.zeros([d_batch], dtype=np.int64) + output_ids = [] + + past_cache = info['caches'] + + cls_ids = L.ones([d_batch], dtype='int64') * sos_id + attn_ids = L.ones([d_batch], dtype='int64') * attn_id + ids = L.stack([cls_ids, attn_ids], -1) + for step in range(max_decode_len): + bias = gen_bias(q_ids, ids, step) + pos_ids = D.to_variable( + np.tile(np.array([[step, step + 1]], dtype=np.int64), [d_batch, 1])) + pos_ids += seqlen + _, logits, info = model( + ids, + L.ones_like(ids) * tgt_type_id, + pos_ids=pos_ids, + attn_bias=bias, + past_cache=past_cache) + gen_ids = L.argmax(logits, -1) + + past_cached_k, past_cached_v = past_cache + cached_k, cached_v = info['caches'] + cached_k = [ + L.concat([pk, k[:, :1, :]], 1) + for pk, k in zip(past_cached_k, cached_k) + ] # concat cached + cached_v = [ + L.concat([pv, v[:, :1, :]], 1) + for pv, v in zip(past_cached_v, cached_v) + ] + past_cache = (cached_k, cached_v) + + gen_ids = gen_ids[:, 1] + ids = L.stack([gen_ids, attn_ids], 1) + + gen_ids = gen_ids.numpy() + has_stopped |= (gen_ids == eos_id).astype(np.bool) + gen_seq_len += (1 - has_stopped.astype(np.int64)) + output_ids.append(gen_ids.tolist()) + if has_stopped.all(): + break + output_ids = np.array(output_ids).transpose([1, 0]) + return output_ids + + +BeamSearchState = namedtuple('BeamSearchState', + ['log_probs', 'lengths', 'finished']) +BeamSearchOutput = namedtuple('BeamSearchOutput', + ['scores', 'predicted_ids', 'beam_parent_ids']) + + +def log_softmax(x): + e_x = np.exp(x - np.max(x)) + return np.log(e_x / e_x.sum()) + + +def mask_prob(p, onehot_eos, finished): + is_finished = L.cast(L.reshape(finished, [-1, 1]) != 0, 'float32') + p = is_finished * (1. - L.cast(onehot_eos, 'float32')) * -9999. + ( + 1. - is_finished) * p + return p + + +def hyp_score(log_probs, length, length_penalty): + lp = L.pow((5. + L.cast(length, 'float32')) / 6., length_penalty) + return log_probs / lp + + +def beam_search_step(state, logits, eos_id, beam_width, is_first_step, + length_penalty): + """logits.shape == [B*W, V]""" + beam_size, vocab_size = logits.shape # as batch size=1 in this hub module. the first dim means bsz * beam_size equals beam_size + logits_np = logits.numpy() + for i in range(beam_size): + logits_np[i][17963] = 0 # make [UNK] prob = 0 + logits = D.to_variable(logits_np) + + bsz, beam_width = state.log_probs.shape + onehot_eos = L.cast( + F.one_hot(L.ones([1], 'int64') * eos_id, vocab_size), 'int64') #[1, V] + + probs = L.log(L.softmax(logits)) #[B*W, V] + probs = mask_prob(probs, onehot_eos, state.finished) #[B*W, V] + allprobs = L.reshape(state.log_probs, [-1, 1]) + probs #[B*W, V] + + not_finished = 1 - L.reshape(state.finished, [-1, 1]) #[B*W,1] + not_eos = 1 - onehot_eos + length_to_add = not_finished * not_eos #[B*W,V] + alllen = L.reshape(state.lengths, [-1, 1]) + length_to_add + + allprobs = L.reshape(allprobs, [-1, beam_width * vocab_size]) + alllen = L.reshape(alllen, [-1, beam_width * vocab_size]) + allscore = hyp_score(allprobs, alllen, length_penalty) + if is_first_step: + allscore = L.reshape( + allscore, + [bsz, beam_width, -1])[:, 0, :] # first step only consiter beam 0 + scores, idx = L.topk(allscore, k=beam_width) #[B, W] + next_beam_id = idx // vocab_size #[B, W] + next_word_id = idx % vocab_size + + gather_idx = L.concat([L.where(idx != -1)[:, :1], + L.reshape(idx, [-1, 1])], 1) + next_probs = L.reshape(L.gather_nd(allprobs, gather_idx), idx.shape) + next_len = L.reshape(L.gather_nd(alllen, gather_idx), idx.shape) + + gather_idx = L.concat( + [L.where(next_beam_id != -1)[:, :1], + L.reshape(next_beam_id, [-1, 1])], 1) + next_finished = L.reshape( + L.gather_nd(state.finished, gather_idx), + state.finished.shape) #[gather new beam state according to new beam id] + + next_finished += L.cast(next_word_id == eos_id, 'int64') + next_finished = L.cast(next_finished > 0, 'int64') + + next_state = BeamSearchState( + log_probs=next_probs, lengths=next_len, finished=next_finished) + output = BeamSearchOutput( + scores=scores, predicted_ids=next_word_id, beam_parent_ids=next_beam_id) + + return output, next_state + + +@D.no_grad +def beam_search_infilling(model, + q_ids, + q_sids, + sos_id, + eos_id, + attn_id, + max_encode_len=640, + max_decode_len=100, + beam_width=5, + tgt_type_id=3, + length_penalty=1.0): + model.eval() + _, __, info = model(q_ids, q_sids) + d_batch, d_seqlen = q_ids.shape + + state = BeamSearchState( + log_probs=L.zeros([d_batch, beam_width], 'float32'), + lengths=L.zeros([d_batch, beam_width], 'int64'), + finished=L.zeros([d_batch, beam_width], 'int64')) + outputs = [] + + def reorder_(t, parent_id): + """reorder cache according to parent beam id""" + gather_idx = L.where(parent_id != -1)[:, 0] * beam_width + L.reshape( + parent_id, [-1]) + t = L.gather(t, gather_idx) + return t + + def tile_(t, times): + _shapes = list(t.shape[1:]) + ret = L.reshape( + L.expand(L.unsqueeze(t, [1]), [ + 1, + times, + ] + [ + 1, + ] * len(_shapes)), [ + -1, + ] + _shapes) + return ret + + cached_k, cached_v = info['caches'] + cached_k = [tile_(k, beam_width) for k in cached_k] + cached_v = [tile_(v, beam_width) for v in cached_v] + past_cache = (cached_k, cached_v) + + q_ids = tile_(q_ids, beam_width) + seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) + + cls_ids = L.ones([d_batch * beam_width], dtype='int64') * sos_id + attn_ids = L.ones([d_batch * beam_width], dtype='int64') * attn_id # SOS + ids = L.stack([cls_ids, attn_ids], -1) + for step in range(max_decode_len): + bias = gen_bias(q_ids, ids, step) + pos_ids = D.to_variable( + np.tile( + np.array([[step, step + 1]], dtype=np.int64), + [d_batch * beam_width, 1])) + pos_ids += seqlen + + _, logits, info = model( + ids, + L.ones_like(ids) * tgt_type_id, + pos_ids=pos_ids, + attn_bias=bias, + past_cache=past_cache) + + output, state = beam_search_step( + state, + logits[:, 1], + eos_id=eos_id, + beam_width=beam_width, + is_first_step=(step == 0), + length_penalty=length_penalty) + outputs.append(output) + + past_cached_k, past_cached_v = past_cache + cached_k, cached_v = info['caches'] + cached_k = [ + reorder_(L.concat([pk, k[:, :1, :]], 1), output.beam_parent_ids) + for pk, k in zip(past_cached_k, cached_k) + ] # concat cached + cached_v = [ + reorder_(L.concat([pv, v[:, :1, :]], 1), output.beam_parent_ids) + for pv, v in zip(past_cached_v, cached_v) + ] + past_cache = (cached_k, cached_v) + + pred_ids_flatten = L.reshape(output.predicted_ids, + [d_batch * beam_width]) + ids = L.stack([pred_ids_flatten, attn_ids], 1) + + if state.finished.numpy().all(): + break + + final_ids = L.stack([o.predicted_ids for o in outputs], 0) + final_parent_ids = L.stack([o.beam_parent_ids for o in outputs], 0) + final_ids = L.gather_tree(final_ids, final_parent_ids) #[:, :, + #0] #pick best beam + final_ids = L.transpose( + L.reshape(final_ids, [-1, d_batch * 1, beam_width]), [1, 2, 0]) + return final_ids + + +en_patten = re.compile(r'^[a-zA-Z0-9]*$') + + +def post_process(token): + if token.startswith('##'): + ret = token[2:] + else: + if en_patten.match(token): + ret = ' ' + token + else: + ret = token + return ret diff --git a/modules/text/text_generation/ernie_gen_leave/model/file_utils.py b/modules/text/text_generation/ernie_gen_leave/model/file_utils.py new file mode 100644 index 00000000..613a5213 --- /dev/null +++ b/modules/text/text_generation/ernie_gen_leave/model/file_utils.py @@ -0,0 +1,49 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +from tqdm import tqdm +from paddlehub.common.logger import logger +from paddlehub.common.dir import MODULE_HOME + + +def _fetch_from_remote(url, force_download=False): + import tempfile, requests, tarfile + cached_dir = os.path.join(MODULE_HOME, "ernie_for_gen") + if force_download or not os.path.exists(cached_dir): + with tempfile.NamedTemporaryFile() as f: + #url = 'https://ernie.bj.bcebos.com/ERNIE_stable.tgz' + r = requests.get(url, stream=True) + total_len = int(r.headers.get('content-length')) + for chunk in tqdm( + r.iter_content(chunk_size=1024), + total=total_len // 1024, + desc='downloading %s' % url, + unit='KB'): + if chunk: + f.write(chunk) + f.flush() + logger.debug('extacting... to %s' % f.name) + with tarfile.open(f.name) as tf: + tf.extractall(path=cached_dir) + logger.debug('%s cached in %s' % (url, cached_dir)) + return cached_dir + + +def add_docstring(doc): + def func(f): + f.__doc__ += ('\n======other docs from supper class ======\n%s' % doc) + return f + + return func diff --git a/modules/text/text_generation/ernie_gen_leave/model/modeling_ernie.py b/modules/text/text_generation/ernie_gen_leave/model/modeling_ernie.py new file mode 100644 index 00000000..7c2304f6 --- /dev/null +++ b/modules/text/text_generation/ernie_gen_leave/model/modeling_ernie.py @@ -0,0 +1,379 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +import logging + +import paddle.fluid.dygraph as D +import paddle.fluid as F +import paddle.fluid.layers as L + +log = logging.getLogger(__name__) + + +def _build_linear(n_in, n_out, name, init, act=None): + return D.Linear( + n_in, + n_out, + param_attr=F.ParamAttr( + name='%s.w_0' % name if name is not None else None, + initializer=init), + bias_attr='%s.b_0' % name if name is not None else None, + act=act) + + +def _build_ln(n_in, name): + return D.LayerNorm( + normalized_shape=n_in, + param_attr=F.ParamAttr( + name='%s_layer_norm_scale' % name if name is not None else None, + initializer=F.initializer.Constant(1.)), + bias_attr=F.ParamAttr( + name='%s_layer_norm_bias' % name if name is not None else None, + initializer=F.initializer.Constant(1.)), + ) + + +def append_name(name, postfix): + if name is None: + return None + elif name == '': + return postfix + else: + return '%s_%s' % (name, postfix) + + +class AttentionLayer(D.Layer): + def __init__(self, cfg, name=None): + super(AttentionLayer, self).__init__() + initializer = F.initializer.TruncatedNormal( + scale=cfg['initializer_range']) + d_model = cfg['hidden_size'] + n_head = cfg['num_attention_heads'] + assert d_model % n_head == 0 + d_model_q = cfg.get('query_hidden_size_per_head', + d_model // n_head) * n_head + d_model_v = cfg.get('value_hidden_size_per_head', + d_model // n_head) * n_head + self.n_head = n_head + self.d_key = d_model_q // n_head + self.q = _build_linear(d_model, d_model_q, append_name( + name, 'query_fc'), initializer) + self.k = _build_linear(d_model, d_model_q, append_name(name, 'key_fc'), + initializer) + self.v = _build_linear(d_model, d_model_v, append_name( + name, 'value_fc'), initializer) + self.o = _build_linear(d_model_v, d_model, append_name( + name, 'output_fc'), initializer) + self.dropout = lambda i: L.dropout( + i, + dropout_prob=cfg['attention_probs_dropout_prob'], + dropout_implementation="upscale_in_train", + ) if self.training else i + + def forward(self, queries, keys, values, attn_bias, past_cache): + assert len(queries.shape) == len(keys.shape) == len(values.shape) == 3 + + q = self.q(queries) + k = self.k(keys) + v = self.v(values) + + cache = (k, v) + if past_cache is not None: + cached_k, cached_v = past_cache + k = L.concat([cached_k, k], 1) + v = L.concat([cached_v, v], 1) + + q = L.transpose( + L.reshape(q, [0, 0, self.n_head, q.shape[-1] // self.n_head]), + [0, 2, 1, 3]) #[batch, head, seq, dim] + k = L.transpose( + L.reshape(k, [0, 0, self.n_head, k.shape[-1] // self.n_head]), + [0, 2, 1, 3]) #[batch, head, seq, dim] + v = L.transpose( + L.reshape(v, [0, 0, self.n_head, v.shape[-1] // self.n_head]), + [0, 2, 1, 3]) #[batch, head, seq, dim] + + q = L.scale(q, scale=self.d_key**-0.5) + score = L.matmul(q, k, transpose_y=True) + if attn_bias is not None: + score += attn_bias + score = L.softmax(score, use_cudnn=True) + score = self.dropout(score) + + out = L.matmul(score, v) + out = L.transpose(out, [0, 2, 1, 3]) + out = L.reshape(out, [0, 0, out.shape[2] * out.shape[3]]) + + out = self.o(out) + return out, cache + + +class PositionwiseFeedForwardLayer(D.Layer): + def __init__(self, cfg, name=None): + super(PositionwiseFeedForwardLayer, self).__init__() + initializer = F.initializer.TruncatedNormal( + scale=cfg['initializer_range']) + d_model = cfg['hidden_size'] + d_ffn = cfg.get('intermediate_size', 4 * d_model) + assert cfg['hidden_act'] in ['relu', 'gelu'] + self.i = _build_linear( + d_model, + d_ffn, + append_name(name, 'fc_0'), + initializer, + act=cfg['hidden_act']) + self.o = _build_linear(d_ffn, d_model, append_name(name, 'fc_1'), + initializer) + prob = cfg.get('intermediate_dropout_prob', 0.) + self.dropout = lambda i: L.dropout( + i, + dropout_prob=prob, + dropout_implementation="upscale_in_train", + ) if self.training else i + + def forward(self, inputs): + hidden = self.i(inputs) + hidden = self.dropout(hidden) + out = self.o(hidden) + return out + + +class ErnieBlock(D.Layer): + def __init__(self, cfg, name=None): + super(ErnieBlock, self).__init__() + d_model = cfg['hidden_size'] + initializer = F.initializer.TruncatedNormal( + scale=cfg['initializer_range']) + + self.attn = AttentionLayer( + cfg, name=append_name(name, 'multi_head_att')) + self.ln1 = _build_ln(d_model, name=append_name(name, 'post_att')) + self.ffn = PositionwiseFeedForwardLayer( + cfg, name=append_name(name, 'ffn')) + self.ln2 = _build_ln(d_model, name=append_name(name, 'post_ffn')) + prob = cfg.get('intermediate_dropout_prob', cfg['hidden_dropout_prob']) + self.dropout = lambda i: L.dropout( + i, + dropout_prob=prob, + dropout_implementation="upscale_in_train", + ) if self.training else i + + def forward(self, inputs, attn_bias=None, past_cache=None): + attn_out, cache = self.attn( + inputs, inputs, inputs, attn_bias, + past_cache=past_cache) #self attn + attn_out = self.dropout(attn_out) + hidden = attn_out + inputs + hidden = self.ln1(hidden) # dropout/ add/ norm + + ffn_out = self.ffn(hidden) + ffn_out = self.dropout(ffn_out) + hidden = ffn_out + hidden + hidden = self.ln2(hidden) + return hidden, cache + + +class ErnieEncoderStack(D.Layer): + def __init__(self, cfg, name=None): + super(ErnieEncoderStack, self).__init__() + n_layers = cfg['num_hidden_layers'] + self.block = D.LayerList([ + ErnieBlock(cfg, append_name(name, 'layer_%d' % i)) + for i in range(n_layers) + ]) + + def forward(self, inputs, attn_bias=None, past_cache=None): + if past_cache is not None: + assert isinstance( + past_cache, tuple + ), 'unknown type of `past_cache`, expect tuple or list. got %s' % repr( + type(past_cache)) + past_cache = list(zip(*past_cache)) + else: + past_cache = [None] * len(self.block) + cache_list_k, cache_list_v, hidden_list = [], [], [inputs] + + for b, p in zip(self.block, past_cache): + inputs, cache = b(inputs, attn_bias=attn_bias, past_cache=p) + cache_k, cache_v = cache + cache_list_k.append(cache_k) + cache_list_v.append(cache_v) + hidden_list.append(inputs) + + return inputs, hidden_list, (cache_list_k, cache_list_v) + + +class ErnieModel(D.Layer): + def __init__(self, cfg, name=None): + """ + Fundamental pretrained Ernie model + """ + log.debug('init ErnieModel with config: %s' % repr(cfg)) + D.Layer.__init__(self) + d_model = cfg['hidden_size'] + d_emb = cfg.get('emb_size', cfg['hidden_size']) + d_vocab = cfg['vocab_size'] + d_pos = cfg['max_position_embeddings'] + d_sent = cfg.get("sent_type_vocab_size") or cfg['type_vocab_size'] + self.n_head = cfg['num_attention_heads'] + self.return_additional_info = cfg.get('return_additional_info', False) + initializer = F.initializer.TruncatedNormal( + scale=cfg['initializer_range']) + + self.ln = _build_ln(d_model, name=append_name(name, 'pre_encoder')) + self.word_emb = D.Embedding([d_vocab, d_emb], + param_attr=F.ParamAttr( + name=append_name( + name, 'word_embedding'), + initializer=initializer)) + self.pos_emb = D.Embedding([d_pos, d_emb], + param_attr=F.ParamAttr( + name=append_name(name, 'pos_embedding'), + initializer=initializer)) + self.sent_emb = D.Embedding([d_sent, d_emb], + param_attr=F.ParamAttr( + name=append_name( + name, 'sent_embedding'), + initializer=initializer)) + prob = cfg['hidden_dropout_prob'] + self.dropout = lambda i: L.dropout( + i, + dropout_prob=prob, + dropout_implementation="upscale_in_train", + ) if self.training else i + + self.encoder_stack = ErnieEncoderStack(cfg, append_name( + name, 'encoder')) + if cfg.get('has_pooler', True): + self.pooler = _build_linear( + cfg['hidden_size'], + cfg['hidden_size'], + append_name(name, 'pooled_fc'), + initializer, + act='tanh') + else: + self.pooler = None + self.train() + + def eval(self): + if F.in_dygraph_mode(): + super(ErnieModel, self).eval() + self.training = False + for l in self.sublayers(): + l.training = False + + def train(self): + if F.in_dygraph_mode(): + super(ErnieModel, self).train() + self.training = True + for l in self.sublayers(): + l.training = True + + def forward(self, + src_ids, + sent_ids=None, + pos_ids=None, + input_mask=None, + attn_bias=None, + past_cache=None, + use_causal_mask=False): + """ + Args: + src_ids (`Variable` of shape `[batch_size, seq_len]`): + Indices of input sequence tokens in the vocabulary. + sent_ids (optional, `Variable` of shape `[batch_size, seq_len]`): + aka token_type_ids, Segment token indices to indicate first and second portions of the inputs. + if None, assume all tokens come from `segment_a` + pos_ids(optional, `Variable` of shape `[batch_size, seq_len]`): + Indices of positions of each input sequence tokens in the position embeddings. + input_mask(optional `Variable` of shape `[batch_size, seq_len]`): + Mask to avoid performing attention on the padding token indices of the encoder input. + attn_bias(optional, `Variable` of shape `[batch_size, seq_len, seq_len] or False`): + 3D version of `input_mask`, if set, overrides `input_mask`; if set not False, will not apply attention mask + past_cache(optional, tuple of two lists: cached key and cached value, + each is a list of `Variable`s of shape `[batch_size, seq_len, hidden_size]`): + cached key/value tensor that will be concated to generated key/value when performing self attention. + if set, `attn_bias` should not be None. + + Returns: + pooled (`Variable` of shape `[batch_size, hidden_size]`): + output logits of pooler classifier + encoded(`Variable` of shape `[batch_size, seq_len, hidden_size]`): + output logits of transformer stack + """ + assert len( + src_ids.shape + ) == 2, 'expect src_ids.shape = [batch, sequecen], got %s' % (repr( + src_ids.shape)) + assert attn_bias is not None if past_cache else True, 'if `past_cache` is specified; attn_bias should not be None' + d_batch = L.shape(src_ids)[0] + d_seqlen = L.shape(src_ids)[1] + if pos_ids is None: + pos_ids = L.reshape(L.range(0, d_seqlen, 1, dtype='int32'), [1, -1]) + pos_ids = L.cast(pos_ids, 'int64') + if attn_bias is None: + if input_mask is None: + input_mask = L.cast(src_ids != 0, 'float32') + assert len(input_mask.shape) == 2 + input_mask = L.unsqueeze(input_mask, axes=[-1]) + attn_bias = L.matmul(input_mask, input_mask, transpose_y=True) + if use_causal_mask: + sequence = L.reshape( + L.range(0, d_seqlen, 1, dtype='float32') + 1., + [1, 1, -1, 1]) + causal_mask = L.cast( + (L.matmul(sequence, 1. / sequence, transpose_y=True) >= 1.), + 'float32') + attn_bias *= causal_mask + else: + assert len( + attn_bias.shape + ) == 3, 'expect attn_bias tobe rank 3, got %r' % attn_bias.shape + attn_bias = (1. - attn_bias) * -10000.0 + attn_bias = L.unsqueeze(attn_bias, [1]) + attn_bias = L.expand(attn_bias, + [1, self.n_head, 1, 1]) # avoid broadcast =_= + attn_bias.stop_gradient = True + + if sent_ids is None: + sent_ids = L.zeros_like(src_ids) + + src_embedded = self.word_emb(src_ids) + pos_embedded = self.pos_emb(pos_ids) + sent_embedded = self.sent_emb(sent_ids) + embedded = src_embedded + pos_embedded + sent_embedded + + embedded = self.dropout(self.ln(embedded)) + + encoded, hidden_list, cache_list = self.encoder_stack( + embedded, attn_bias, past_cache=past_cache) + if self.pooler is not None: + pooled = self.pooler(encoded[:, 0, :]) + else: + pooled = None + + additional_info = { + 'hiddens': hidden_list, + 'caches': cache_list, + } + + if self.return_additional_info: + return pooled, encoded, additional_info + else: + return pooled, encoded diff --git a/modules/text/text_generation/ernie_gen_leave/model/modeling_ernie_gen.py b/modules/text/text_generation/ernie_gen_leave/model/modeling_ernie_gen.py new file mode 100644 index 00000000..88f29c79 --- /dev/null +++ b/modules/text/text_generation/ernie_gen_leave/model/modeling_ernie_gen.py @@ -0,0 +1,78 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.fluid as F +import paddle.fluid.layers as L + +from .modeling_ernie import ErnieModel +from .modeling_ernie import _build_linear, _build_ln, append_name + + +class ErnieModelForGeneration(ErnieModel): + def __init__(self, cfg, name=None): + cfg['return_additional_info'] = True + cfg['has_pooler'] = False + super(ErnieModelForGeneration, self).__init__(cfg, name=name) + initializer = F.initializer.TruncatedNormal( + scale=cfg['initializer_range']) + d_model = cfg['hidden_size'] + d_vocab = cfg['vocab_size'] + + self.mlm = _build_linear( + d_model, + d_model, + append_name(name, 'mask_lm_trans_fc'), + initializer, + act=cfg['hidden_act']) + self.mlm_ln = _build_ln( + d_model, name=append_name(name, 'mask_lm_trans')) + self.mlm_bias = L.create_parameter( + dtype='float32', + shape=[d_vocab], + attr=F.ParamAttr( + name=append_name(name, 'mask_lm_out_fc.b_0'), + initializer=F.initializer.Constant(value=0.0)), + is_bias=True, + ) + + def forward(self, src_ids, *args, **kwargs): + tgt_labels = kwargs.pop('tgt_labels', None) + tgt_pos = kwargs.pop('tgt_pos', None) + encode_only = kwargs.pop('encode_only', False) + _, encoded, info = ErnieModel.forward(self, src_ids, *args, **kwargs) + if encode_only: + return None, None, info + elif tgt_labels is None: + encoded = self.mlm(encoded) + encoded = self.mlm_ln(encoded) + logits = L.matmul( + encoded, self.word_emb.weight, transpose_y=True) + self.mlm_bias + output_ids = L.argmax(logits, -1) + return output_ids, logits, info + else: + encoded_2d = L.gather_nd(encoded, tgt_pos) + encoded_2d = self.mlm(encoded_2d) + encoded_2d = self.mlm_ln(encoded_2d) + logits_2d = L.matmul( + encoded_2d, self.word_emb.weight, + transpose_y=True) + self.mlm_bias + if len(tgt_labels.shape) == 1: + tgt_labels = L.reshape(tgt_labels, [-1, 1]) + + loss = L.reduce_mean( + L.softmax_with_cross_entropy( + logits_2d, + tgt_labels, + soft_label=(tgt_labels.shape[-1] != 1))) + return loss, logits_2d, info diff --git a/modules/text/text_generation/ernie_gen_leave/model/tokenizing_ernie.py b/modules/text/text_generation/ernie_gen_leave/model/tokenizing_ernie.py new file mode 100644 index 00000000..3039b702 --- /dev/null +++ b/modules/text/text_generation/ernie_gen_leave/model/tokenizing_ernie.py @@ -0,0 +1,171 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import six +import re +import logging +from functools import partial + +import numpy as np + +import io + +open = partial(io.open, encoding='utf8') + +log = logging.getLogger(__name__) + +_max_input_chars_per_word = 100 + + +def _wordpiece(token, vocab, unk_token, prefix='##', sentencepiece_prefix=''): + """ wordpiece: helloworld => [hello, ##world] """ + chars = list(token) + if len(chars) > _max_input_chars_per_word: + return [unk_token], [(0, len(chars))] + + is_bad = False + start = 0 + sub_tokens = [] + sub_pos = [] + while start < len(chars): + end = len(chars) + cur_substr = None + while start < end: + substr = "".join(chars[start:end]) + if start == 0: + substr = sentencepiece_prefix + substr + if start > 0: + substr = prefix + substr + if substr in vocab: + cur_substr = substr + break + end -= 1 + if cur_substr is None: + is_bad = True + break + sub_tokens.append(cur_substr) + sub_pos.append((start, end)) + start = end + if is_bad: + return [unk_token], [(0, len(chars))] + else: + return sub_tokens, sub_pos + + +class ErnieTokenizer(object): + def __init__(self, + vocab, + unk_token='[UNK]', + sep_token='[SEP]', + cls_token='[CLS]', + pad_token='[PAD]', + mask_token='[MASK]', + wordpiece_prefix='##', + sentencepiece_prefix='', + lower=True, + encoding='utf8', + special_token_list=[]): + if not isinstance(vocab, dict): + raise ValueError( + 'expect `vocab` to be instance of dict, got %s' % type(vocab)) + self.vocab = vocab + self.lower = lower + self.prefix = wordpiece_prefix + self.sentencepiece_prefix = sentencepiece_prefix + self.pad_id = self.vocab[pad_token] + self.cls_id = cls_token and self.vocab[cls_token] + self.sep_id = sep_token and self.vocab[sep_token] + self.unk_id = unk_token and self.vocab[unk_token] + self.mask_id = mask_token and self.vocab[mask_token] + self.unk_token = unk_token + special_tokens = { + pad_token, cls_token, sep_token, unk_token, mask_token + } | set(special_token_list) + pat_str = '' + for t in special_tokens: + if t is None: + continue + pat_str += '(%s)|' % re.escape(t) + pat_str += r'([a-zA-Z0-9]+|\S)' + log.debug('regex: %s' % pat_str) + self.pat = re.compile(pat_str) + self.encoding = encoding + + def tokenize(self, text): + if len(text) == 0: + return [] + if six.PY3 and not isinstance(text, six.string_types): + text = text.decode(self.encoding) + if six.PY2 and isinstance(text, str): + text = text.decode(self.encoding) + + res = [] + for match in self.pat.finditer(text): + match_group = match.group(0) + if match.groups()[-1]: + if self.lower: + match_group = match_group.lower() + words, _ = _wordpiece( + match_group, + vocab=self.vocab, + unk_token=self.unk_token, + prefix=self.prefix, + sentencepiece_prefix=self.sentencepiece_prefix) + else: + words = [match_group] + res += words + return res + + def convert_tokens_to_ids(self, tokens): + return [self.vocab.get(t, self.unk_id) for t in tokens] + + def truncate(self, id1, id2, seqlen): + len1 = len(id1) + len2 = len(id2) + half = seqlen // 2 + if len1 > len2: + len1_truncated, len2_truncated = max(half, seqlen - len2), min( + half, len2) + else: + len1_truncated, len2_truncated = min(half, seqlen - len1), max( + half, seqlen - len1) + return id1[:len1_truncated], id2[:len2_truncated] + + def build_for_ernie(self, text_id, pair_id=[]): + """build sentence type id, add [CLS] [SEP]""" + text_id_type = np.zeros_like(text_id, dtype=np.int64) + ret_id = np.concatenate([[self.cls_id], text_id, [self.sep_id]], 0) + ret_id_type = np.concatenate([[0], text_id_type, [0]], 0) + + if len(pair_id): + pair_id_type = np.ones_like(pair_id, dtype=np.int64) + ret_id = np.concatenate([ret_id, pair_id, [self.sep_id]], 0) + ret_id_type = np.concatenate([ret_id_type, pair_id_type, [1]], 0) + return ret_id, ret_id_type + + def encode(self, text, pair=None, truncate_to=None): + text_id = np.array( + self.convert_tokens_to_ids(self.tokenize(text)), dtype=np.int64) + text_id_type = np.zeros_like(text_id, dtype=np.int64) + if pair is not None: + pair_id = np.array( + self.convert_tokens_to_ids(self.tokenize(pair)), dtype=np.int64) + else: + pair_id = [] + if truncate_to is not None: + text_id, pair_id = self.truncate( + text_id, [] if pair_id is None else pair_id, truncate_to) + + ret_id, ret_id_type = self.build_for_ernie(text_id, pair_id) + return ret_id, ret_id_type diff --git a/modules/text/text_generation/ernie_gen_leave/module.py b/modules/text/text_generation/ernie_gen_leave/module.py new file mode 100644 index 00000000..24aefe24 --- /dev/null +++ b/modules/text/text_generation/ernie_gen_leave/module.py @@ -0,0 +1,177 @@ +# coding:utf-8 +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import ast +import json + +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.module.module import runnable +from paddlehub.compat.module.nlp_module import DataFormatError +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo, serving + +import argparse +import os +import numpy as np + +import paddle.fluid.dygraph as D + +from .model.tokenizing_ernie import ErnieTokenizer +from .model.decode import beam_search_infilling +from .model.modeling_ernie_gen import ErnieModelForGeneration + + +@moduleinfo( + name="ernie_gen_leave", + version="1.0.0", + summary= + "", + author="彭兆帅,郑博培", + author_email="1084667371@qq.com,2733821739@qq.com", + type="nlp/text_generation", +) +class ErnieGen(hub.NLPPredictionModule): + def _initialize(self): + """ + initialize with the necessary elements + """ + assets_path = os.path.join(self.directory, "assets") + gen_checkpoint_path = os.path.join(assets_path, "ernie_gen") + ernie_cfg_path = os.path.join(assets_path, 'ernie_config.json') + with open(ernie_cfg_path, encoding='utf8') as ernie_cfg_file: + ernie_cfg = dict(json.loads(ernie_cfg_file.read())) + ernie_vocab_path = os.path.join(assets_path, 'vocab.txt') + with open(ernie_vocab_path, encoding='utf8') as ernie_vocab_file: + ernie_vocab = { + j.strip().split('\t')[0]: i + for i, j in enumerate(ernie_vocab_file.readlines()) + } + + with fluid.dygraph.guard(fluid.CPUPlace()): + with fluid.unique_name.guard(): + self.model = ErnieModelForGeneration(ernie_cfg) + finetuned_states, _ = D.load_dygraph(gen_checkpoint_path) + self.model.set_dict(finetuned_states) + + self.tokenizer = ErnieTokenizer(ernie_vocab) + self.rev_dict = {v: k for k, v in self.tokenizer.vocab.items()} + self.rev_dict[self.tokenizer.pad_id] = '' # replace [PAD] + self.rev_dict[self.tokenizer.unk_id] = '' # replace [PAD] + self.rev_lookup = np.vectorize(lambda i: self.rev_dict[i]) + + @serving + def generate(self, texts, use_gpu=False, beam_width=5): + """ + Get the predict result from the input texts. + + Args: + texts(list): the input texts. + use_gpu(bool): whether use gpu to predict or not + beam_width(int): the beam search width. + + Returns: + results(list): the predict result. + """ + if texts and isinstance(texts, list) and all(texts) and all( + [isinstance(text, str) for text in texts]): + predicted_data = texts + else: + raise ValueError( + "The input texts should be a list with nonempty string elements." + ) + + if use_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ: + use_gpu = False + logger.warning( + "use_gpu has been set False as you didn't set the environment variable CUDA_VISIBLE_DEVICES while using use_gpu=True" + ) + if use_gpu: + place = fluid.CUDAPlace(0) + else: + place = fluid.CPUPlace() + + with fluid.dygraph.guard(place): + self.model.eval() + results = [] + for text in predicted_data: + sample_results = [] + ids, sids = self.tokenizer.encode(text) + src_ids = D.to_variable(np.expand_dims(ids, 0)) + src_sids = D.to_variable(np.expand_dims(sids, 0)) + output_ids = beam_search_infilling( + self.model, + src_ids, + src_sids, + eos_id=self.tokenizer.sep_id, + sos_id=self.tokenizer.cls_id, + attn_id=self.tokenizer.vocab['[MASK]'], + max_decode_len=50, + max_encode_len=50, + beam_width=beam_width, + tgt_type_id=1) + output_str = self.rev_lookup(output_ids[0].numpy()) + + for ostr in output_str.tolist(): + if '[SEP]' in ostr: + ostr = ostr[:ostr.index('[SEP]')] + sample_results.append("".join(ostr)) + results.append(sample_results) + return results + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU for prediction") + + self.arg_config_group.add_argument( + '--beam_width', type=int, default=5, help="the beam search width") + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description='Run the %s module.' % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group( + title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", + description= + "Run configuration for controlling module behavior, optional.") + + self.add_module_config_arg() + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + + try: + input_data = self.check_input_data(args) + except DataFormatError and RuntimeError: + self.parser.print_help() + return None + + results = self.generate( + texts=input_data, use_gpu=args.use_gpu, beam_width=args.beam_width) + + return results diff --git a/modules/text/text_generation/ernie_gen_leave/test.py b/modules/text/text_generation/ernie_gen_leave/test.py new file mode 100644 index 00000000..e5b68140 --- /dev/null +++ b/modules/text/text_generation/ernie_gen_leave/test.py @@ -0,0 +1,8 @@ +import paddlehub as hub + +module = hub.Module(name="ernie_gen_leave") + +test_texts = ["理由"] +results = module.generate(texts=test_texts, use_gpu=False, beam_width=2) +for result in results: + print(result) \ No newline at end of file -- GitLab