提交 0e2d4eb0 编写于 作者: L Luo Tao

fix bug in text-code, remove readme.html

上级 0edac0ac
......@@ -53,6 +53,7 @@ marked.setOptions({
code = code.replace(/&/g, "&")
code = code.replace(/>/g, ">")
code = code.replace(/&lt;/g, "<")
code = code.replace(/&nbsp;/g, " ")
return hljs.highlightAuto(code, [lang]).value;
}
});
......
<!DOCTYPE html>
<html class="theme theme-white">
<head>
<meta charset="utf-8">
<title>线性回归</title>
<link href="https://www.zybuluo.com/static/assets/template-theme-white.css" rel="stylesheet" media="screen">
<style type="text/css">
#wmd-preview h1 {
color: #0077bb; /* 将标题改为蓝色 */
}</style>
</head>
<body class="theme theme-white">
<div style="visibility: hidden; overflow: hidden; position: absolute; top: 0px; height: 1px; width: auto; padding: 0px; border: 0px; margin: 0px; text-align: left; text-indent: 0px; text-transform: none; line-height: normal; letter-spacing: normal; word-spacing: normal;"><div id="MathJax_SVG_Hidden"></div><svg><defs id="MathJax_SVG_glyphs"><path id="MJMATHI-6E" stroke-width="1" d="M21 287Q22 293 24 303T36 341T56 388T89 425T135 442Q171 442 195 424T225 390T231 369Q231 367 232 367L243 378Q304 442 382 442Q436 442 469 415T503 336T465 179T427 52Q427 26 444 26Q450 26 453 27Q482 32 505 65T540 145Q542 153 560 153Q580 153 580 145Q580 144 576 130Q568 101 554 73T508 17T439 -10Q392 -10 371 17T350 73Q350 92 386 193T423 345Q423 404 379 404H374Q288 404 229 303L222 291L189 157Q156 26 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 112 180T152 343Q153 348 153 366Q153 405 129 405Q91 405 66 305Q60 285 60 284Q58 278 41 278H27Q21 284 21 287Z"></path><path id="MJMAIN-7B" stroke-width="1" d="M434 -231Q434 -244 428 -250H410Q281 -250 230 -184Q225 -177 222 -172T217 -161T213 -148T211 -133T210 -111T209 -84T209 -47T209 0Q209 21 209 53Q208 142 204 153Q203 154 203 155Q189 191 153 211T82 231Q71 231 68 234T65 250T68 266T82 269Q116 269 152 289T203 345Q208 356 208 377T209 529V579Q209 634 215 656T244 698Q270 724 324 740Q361 748 377 749Q379 749 390 749T408 750H428Q434 744 434 732Q434 719 431 716Q429 713 415 713Q362 710 332 689T296 647Q291 634 291 499V417Q291 370 288 353T271 314Q240 271 184 255L170 250L184 245Q202 239 220 230T262 196T290 137Q291 131 291 1Q291 -134 296 -147Q306 -174 339 -192T415 -213Q429 -213 431 -216Q434 -219 434 -231Z"></path><path id="MJMATHI-79" stroke-width="1" d="M21 287Q21 301 36 335T84 406T158 442Q199 442 224 419T250 355Q248 336 247 334Q247 331 231 288T198 191T182 105Q182 62 196 45T238 27Q261 27 281 38T312 61T339 94Q339 95 344 114T358 173T377 247Q415 397 419 404Q432 431 462 431Q475 431 483 424T494 412T496 403Q496 390 447 193T391 -23Q363 -106 294 -155T156 -205Q111 -205 77 -183T43 -117Q43 -95 50 -80T69 -58T89 -48T106 -45Q150 -45 150 -87Q150 -107 138 -122T115 -142T102 -147L99 -148Q101 -153 118 -160T152 -167H160Q177 -167 186 -165Q219 -156 247 -127T290 -65T313 -9T321 21L315 17Q309 13 296 6T270 -6Q250 -11 231 -11Q185 -11 150 11T104 82Q103 89 103 113Q103 170 138 262T173 379Q173 380 173 381Q173 390 173 393T169 400T158 404H154Q131 404 112 385T82 344T65 302T57 280Q55 278 41 278H27Q21 284 21 287Z"></path><path id="MJMATHI-69" stroke-width="1" d="M184 600Q184 624 203 642T247 661Q265 661 277 649T290 619Q290 596 270 577T226 557Q211 557 198 567T184 600ZM21 287Q21 295 30 318T54 369T98 420T158 442Q197 442 223 419T250 357Q250 340 236 301T196 196T154 83Q149 61 149 51Q149 26 166 26Q175 26 185 29T208 43T235 78T260 137Q263 149 265 151T282 153Q302 153 302 143Q302 135 293 112T268 61T223 11T161 -11Q129 -11 102 10T74 74Q74 91 79 106T122 220Q160 321 166 341T173 380Q173 404 156 404H154Q124 404 99 371T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Z"></path><path id="MJMAIN-2C" stroke-width="1" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path><path id="MJMATHI-78" stroke-width="1" d="M52 289Q59 331 106 386T222 442Q257 442 286 424T329 379Q371 442 430 442Q467 442 494 420T522 361Q522 332 508 314T481 292T458 288Q439 288 427 299T415 328Q415 374 465 391Q454 404 425 404Q412 404 406 402Q368 386 350 336Q290 115 290 78Q290 50 306 38T341 26Q378 26 414 59T463 140Q466 150 469 151T485 153H489Q504 153 504 145Q504 144 502 134Q486 77 440 33T333 -11Q263 -11 227 52Q186 -10 133 -10H127Q78 -10 57 16T35 71Q35 103 54 123T99 143Q142 143 142 101Q142 81 130 66T107 46T94 41L91 40Q91 39 97 36T113 29T132 26Q168 26 194 71Q203 87 217 139T245 247T261 313Q266 340 266 352Q266 380 251 392T217 404Q177 404 142 372T93 290Q91 281 88 280T72 278H58Q52 284 52 289Z"></path><path id="MJMAIN-31" stroke-width="1" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path><path id="MJMAIN-2E" stroke-width="1" d="M78 60Q78 84 95 102T138 120Q162 120 180 104T199 61Q199 36 182 18T139 0T96 17T78 60Z"></path><path id="MJMATHI-64" stroke-width="1" d="M366 683Q367 683 438 688T511 694Q523 694 523 686Q523 679 450 384T375 83T374 68Q374 26 402 26Q411 27 422 35Q443 55 463 131Q469 151 473 152Q475 153 483 153H487H491Q506 153 506 145Q506 140 503 129Q490 79 473 48T445 8T417 -8Q409 -10 393 -10Q359 -10 336 5T306 36L300 51Q299 52 296 50Q294 48 292 46Q233 -10 172 -10Q117 -10 75 30T33 157Q33 205 53 255T101 341Q148 398 195 420T280 442Q336 442 364 400Q369 394 369 396Q370 400 396 505T424 616Q424 629 417 632T378 637H357Q351 643 351 645T353 664Q358 683 366 683ZM352 326Q329 405 277 405Q242 405 210 374T160 293Q131 214 119 129Q119 126 119 118T118 106Q118 61 136 44T179 26Q233 26 290 98L298 109L352 326Z"></path><path id="MJMAIN-7D" stroke-width="1" d="M65 731Q65 745 68 747T88 750Q171 750 216 725T279 670Q288 649 289 635T291 501Q292 362 293 357Q306 312 345 291T417 269Q428 269 431 266T434 250T431 234T417 231Q380 231 345 210T298 157Q293 143 292 121T291 -28V-79Q291 -134 285 -156T256 -198Q202 -250 89 -250Q71 -250 68 -247T65 -230Q65 -224 65 -223T66 -218T69 -214T77 -213Q91 -213 108 -210T146 -200T183 -177T207 -139Q208 -134 209 3L210 139Q223 196 280 230Q315 247 330 250Q305 257 280 270Q225 304 212 352L210 362L209 498Q208 635 207 640Q195 680 154 696T77 713Q68 713 67 716T65 731Z"></path><path id="MJMAIN-3D" stroke-width="1" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path id="MJMAIN-2026" stroke-width="1" d="M78 60Q78 84 95 102T138 120Q162 120 180 104T199 61Q199 36 182 18T139 0T96 17T78 60ZM525 60Q525 84 542 102T585 120Q609 120 627 104T646 61Q646 36 629 18T586 0T543 17T525 60ZM972 60Q972 84 989 102T1032 120Q1056 120 1074 104T1093 61Q1093 36 1076 18T1033 0T990 17T972 60Z"></path><path id="MJMATHI-3C9" stroke-width="1" d="M495 384Q495 406 514 424T555 443Q574 443 589 425T604 364Q604 334 592 278T555 155T483 38T377 -11Q297 -11 267 66Q266 68 260 61Q201 -11 125 -11Q15 -11 15 139Q15 230 56 325T123 434Q135 441 147 436Q160 429 160 418Q160 406 140 379T94 306T62 208Q61 202 61 187Q61 124 85 100T143 76Q201 76 245 129L253 137V156Q258 297 317 297Q348 297 348 261Q348 243 338 213T318 158L308 135Q309 133 310 129T318 115T334 97T358 83T393 76Q456 76 501 148T546 274Q546 305 533 325T508 357T495 384Z"></path><path id="MJMAIN-2B" stroke-width="1" d="M56 237T56 250T70 270H369V420L370 570Q380 583 389 583Q402 583 409 568V270H707Q722 262 722 250T707 230H409V-68Q401 -82 391 -82H389H387Q375 -82 369 -68V230H70Q56 237 56 250Z"></path><path id="MJMAIN-32" stroke-width="1" d="M109 429Q82 429 66 447T50 491Q50 562 103 614T235 666Q326 666 387 610T449 465Q449 422 429 383T381 315T301 241Q265 210 201 149L142 93L218 92Q375 92 385 97Q392 99 409 186V189H449V186Q448 183 436 95T421 3V0H50V19V31Q50 38 56 46T86 81Q115 113 136 137Q145 147 170 174T204 211T233 244T261 278T284 308T305 340T320 369T333 401T340 431T343 464Q343 527 309 573T212 619Q179 619 154 602T119 569T109 550Q109 549 114 549Q132 549 151 535T170 489Q170 464 154 447T109 429Z"></path><path id="MJMATHI-62" stroke-width="1" d="M73 647Q73 657 77 670T89 683Q90 683 161 688T234 694Q246 694 246 685T212 542Q204 508 195 472T180 418L176 399Q176 396 182 402Q231 442 283 442Q345 442 383 396T422 280Q422 169 343 79T173 -11Q123 -11 82 27T40 150V159Q40 180 48 217T97 414Q147 611 147 623T109 637Q104 637 101 637H96Q86 637 83 637T76 640T73 647ZM336 325V331Q336 405 275 405Q258 405 240 397T207 376T181 352T163 330L157 322L136 236Q114 150 114 114Q114 66 138 42Q154 26 178 26Q211 26 245 58Q270 81 285 114T318 219Q336 291 336 325Z"></path><path id="MJMATHI-6A" stroke-width="1" d="M297 596Q297 627 318 644T361 661Q378 661 389 651T403 623Q403 595 384 576T340 557Q322 557 310 567T297 596ZM288 376Q288 405 262 405Q240 405 220 393T185 362T161 325T144 293L137 279Q135 278 121 278H107Q101 284 101 286T105 299Q126 348 164 391T252 441Q253 441 260 441T272 442Q296 441 316 432Q341 418 354 401T367 348V332L318 133Q267 -67 264 -75Q246 -125 194 -164T75 -204Q25 -204 7 -183T-12 -137Q-12 -110 7 -91T53 -71Q70 -71 82 -81T95 -112Q95 -148 63 -167Q69 -168 77 -168Q111 -168 139 -140T182 -74L193 -32Q204 11 219 72T251 197T278 308T289 365Q289 372 288 376Z"></path><path id="MJMATHI-59" stroke-width="1" d="M66 637Q54 637 49 637T39 638T32 641T30 647T33 664T42 682Q44 683 56 683Q104 680 165 680Q288 680 306 683H316Q322 677 322 674T320 656Q316 643 310 637H298Q242 637 242 624Q242 619 292 477T343 333L346 336Q350 340 358 349T379 373T411 410T454 461Q546 568 561 587T577 618Q577 634 545 637Q528 637 528 647Q528 649 530 661Q533 676 535 679T549 683Q551 683 578 682T657 680Q684 680 713 681T746 682Q763 682 763 673Q763 669 760 657T755 643Q753 637 734 637Q662 632 617 587Q608 578 477 424L348 273L322 169Q295 62 295 57Q295 46 363 46Q379 46 384 45T390 35Q390 33 388 23Q384 6 382 4T366 1Q361 1 324 1T232 2Q170 2 138 2T102 1Q84 1 84 9Q84 14 87 24Q88 27 89 30T90 35T91 39T93 42T96 44T101 45T107 45T116 46T129 46Q168 47 180 50T198 63Q201 68 227 171L252 274L129 623Q128 624 127 625T125 627T122 629T118 631T113 633T105 634T96 635T83 636T66 637Z"></path><path id="MJMAIN-5E" stroke-width="1" d="M112 560L249 694L257 686Q387 562 387 560L361 531Q359 532 303 581L250 627L195 580Q182 569 169 557T148 538L140 532Q138 530 125 546L112 560Z"></path><path id="MJMATHI-58" stroke-width="1" d="M42 0H40Q26 0 26 11Q26 15 29 27Q33 41 36 43T55 46Q141 49 190 98Q200 108 306 224T411 342Q302 620 297 625Q288 636 234 637H206Q200 643 200 645T202 664Q206 677 212 683H226Q260 681 347 681Q380 681 408 681T453 682T473 682Q490 682 490 671Q490 670 488 658Q484 643 481 640T465 637Q434 634 411 620L488 426L541 485Q646 598 646 610Q646 628 622 635Q617 635 609 637Q594 637 594 648Q594 650 596 664Q600 677 606 683H618Q619 683 643 683T697 681T738 680Q828 680 837 683H845Q852 676 852 672Q850 647 840 637H824Q790 636 763 628T722 611T698 593L687 584Q687 585 592 480L505 384Q505 383 536 304T601 142T638 56Q648 47 699 46Q734 46 734 37Q734 35 732 23Q728 7 725 4T711 1Q708 1 678 1T589 2Q528 2 496 2T461 1Q444 1 444 10Q444 11 446 25Q448 35 450 39T455 44T464 46T480 47T506 54Q523 62 523 64Q522 64 476 181L429 299Q241 95 236 84Q232 76 232 72Q232 53 261 47Q262 47 267 47T273 46Q276 46 277 46T280 45T283 42T284 35Q284 26 282 19Q279 6 276 4T261 1Q258 1 243 1T201 2T142 2Q64 2 42 0Z"></path><path id="MJMAIN-33" stroke-width="1" d="M127 463Q100 463 85 480T69 524Q69 579 117 622T233 665Q268 665 277 664Q351 652 390 611T430 522Q430 470 396 421T302 350L299 348Q299 347 308 345T337 336T375 315Q457 262 457 175Q457 96 395 37T238 -22Q158 -22 100 21T42 130Q42 158 60 175T105 193Q133 193 151 175T169 130Q169 119 166 110T159 94T148 82T136 74T126 70T118 67L114 66Q165 21 238 21Q293 21 321 74Q338 107 338 175V195Q338 290 274 322Q259 328 213 329L171 330L168 332Q166 335 166 348Q166 366 174 366Q202 366 232 371Q266 376 294 413T322 525V533Q322 590 287 612Q265 626 240 626Q208 626 181 615T143 592T132 580H135Q138 579 143 578T153 573T165 566T175 555T183 540T186 520Q186 498 172 481T127 463Z"></path><path id="MJMATHI-4D" stroke-width="1" d="M289 629Q289 635 232 637Q208 637 201 638T194 648Q194 649 196 659Q197 662 198 666T199 671T201 676T203 679T207 681T212 683T220 683T232 684Q238 684 262 684T307 683Q386 683 398 683T414 678Q415 674 451 396L487 117L510 154Q534 190 574 254T662 394Q837 673 839 675Q840 676 842 678T846 681L852 683H948Q965 683 988 683T1017 684Q1051 684 1051 673Q1051 668 1048 656T1045 643Q1041 637 1008 637Q968 636 957 634T939 623Q936 618 867 340T797 59Q797 55 798 54T805 50T822 48T855 46H886Q892 37 892 35Q892 19 885 5Q880 0 869 0Q864 0 828 1T736 2Q675 2 644 2T609 1Q592 1 592 11Q592 13 594 25Q598 41 602 43T625 46Q652 46 685 49Q699 52 704 61Q706 65 742 207T813 490T848 631L654 322Q458 10 453 5Q451 4 449 3Q444 0 433 0Q418 0 415 7Q413 11 374 317L335 624L267 354Q200 88 200 79Q206 46 272 46H282Q288 41 289 37T286 19Q282 3 278 1Q274 0 267 0Q265 0 255 0T221 1T157 2Q127 2 95 1T58 0Q43 0 39 2T35 11Q35 13 38 25T43 40Q45 46 65 46Q135 46 154 86Q158 92 223 354T289 629Z"></path><path id="MJMATHI-53" stroke-width="1" d="M308 24Q367 24 416 76T466 197Q466 260 414 284Q308 311 278 321T236 341Q176 383 176 462Q176 523 208 573T273 648Q302 673 343 688T407 704H418H425Q521 704 564 640Q565 640 577 653T603 682T623 704Q624 704 627 704T632 705Q645 705 645 698T617 577T585 459T569 456Q549 456 549 465Q549 471 550 475Q550 478 551 494T553 520Q553 554 544 579T526 616T501 641Q465 662 419 662Q362 662 313 616T263 510Q263 480 278 458T319 427Q323 425 389 408T456 390Q490 379 522 342T554 242Q554 216 546 186Q541 164 528 137T492 78T426 18T332 -20Q320 -22 298 -22Q199 -22 144 33L134 44L106 13Q83 -14 78 -18T65 -22Q52 -22 52 -14Q52 -11 110 221Q112 227 130 227H143Q149 221 149 216Q149 214 148 207T144 186T142 153Q144 114 160 87T203 47T255 29T308 24Z"></path><path id="MJMATHI-45" stroke-width="1" d="M492 213Q472 213 472 226Q472 230 477 250T482 285Q482 316 461 323T364 330H312Q311 328 277 192T243 52Q243 48 254 48T334 46Q428 46 458 48T518 61Q567 77 599 117T670 248Q680 270 683 272Q690 274 698 274Q718 274 718 261Q613 7 608 2Q605 0 322 0H133Q31 0 31 11Q31 13 34 25Q38 41 42 43T65 46Q92 46 125 49Q139 52 144 61Q146 66 215 342T285 622Q285 629 281 629Q273 632 228 634H197Q191 640 191 642T193 659Q197 676 203 680H757Q764 676 764 669Q764 664 751 557T737 447Q735 440 717 440H705Q698 445 698 453L701 476Q704 500 704 528Q704 558 697 578T678 609T643 625T596 632T532 634H485Q397 633 392 631Q388 629 386 622Q385 619 355 499T324 377Q347 376 372 376H398Q464 376 489 391T534 472Q538 488 540 490T557 493Q562 493 565 493T570 492T572 491T574 487T577 483L544 351Q511 218 508 216Q505 213 492 213Z"></path><path id="MJSZ2-2211" stroke-width="1" d="M60 948Q63 950 665 950H1267L1325 815Q1384 677 1388 669H1348L1341 683Q1320 724 1285 761Q1235 809 1174 838T1033 881T882 898T699 902H574H543H251L259 891Q722 258 724 252Q725 250 724 246Q721 243 460 -56L196 -356Q196 -357 407 -357Q459 -357 548 -357T676 -358Q812 -358 896 -353T1063 -332T1204 -283T1307 -196Q1328 -170 1348 -124H1388Q1388 -125 1381 -145T1356 -210T1325 -294L1267 -449L666 -450Q64 -450 61 -448Q55 -446 55 -439Q55 -437 57 -433L590 177Q590 178 557 222T452 366T322 544L56 909L55 924Q55 945 60 948Z"></path><path id="MJMAIN-28" stroke-width="1" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path id="MJMAIN-2212" stroke-width="1" d="M84 237T84 250T98 270H679Q694 262 694 250T679 230H98Q84 237 84 250Z"></path><path id="MJMAIN-29" stroke-width="1" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path><path id="MJMAIN-38" stroke-width="1" d="M70 417T70 494T124 618T248 666Q319 666 374 624T429 515Q429 485 418 459T392 417T361 389T335 371T324 363L338 354Q352 344 366 334T382 323Q457 264 457 174Q457 95 399 37T249 -22Q159 -22 101 29T43 155Q43 263 172 335L154 348Q133 361 127 368Q70 417 70 494ZM286 386L292 390Q298 394 301 396T311 403T323 413T334 425T345 438T355 454T364 471T369 491T371 513Q371 556 342 586T275 624Q268 625 242 625Q201 625 165 599T128 534Q128 511 141 492T167 463T217 431Q224 426 228 424L286 386ZM250 21Q308 21 350 55T392 137Q392 154 387 169T375 194T353 216T330 234T301 253T274 270Q260 279 244 289T218 306L210 311Q204 311 181 294T133 239T107 157Q107 98 150 60T250 21Z"></path><path id="MJMAIN-3A" stroke-width="1" d="M78 370Q78 394 95 412T138 430Q162 430 180 414T199 371Q199 346 182 328T139 310T96 327T78 370ZM78 60Q78 84 95 102T138 120Q162 120 180 104T199 61Q199 36 182 18T139 0T96 17T78 60Z"></path></defs></svg></div><div id="wmd-preview" class="wmd-preview wmd-preview-full-reader"><div class="md-section-divider"></div><div class="md-section-divider"></div><h1 data-anchor-id="oxpz" id="线性回归">线性回归</h1><p data-anchor-id="5xhj">让我们从经典的线性回归(Linear Regression [<a href="#参考文献">1</a>])模型开始这份教程。在这一章里,你将使用真实的数据集建立起一个房价预测模型,并且了解到机器学习中的若干重要概念。</p><div class="md-section-divider"></div><h2 data-anchor-id="9ijb" id="背景介绍">背景介绍</h2><p data-anchor-id="z636">给定一个大小为<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-59-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 600.5 496.10337069604896" style="width: 1.39ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E"></use></g></svg></span><script type="math/tex" id="MathJax-Element-59">n</script>的数据集 <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-60-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -811.9875111244761 8313.188488672427 1126.8798169060667" style="width: 19.344ex; height: 2.664ex; vertical-align: -0.811ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7B"></use><g transform="translate(500,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="693" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="1335" y="0"></use><g transform="translate(1780,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><g transform="translate(572,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="345" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="3051" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="3496" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="3941" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="4386" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="4831" y="0"></use><g transform="translate(5277,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><g transform="translate(572,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-64" x="345" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7D" x="6563" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="9990" y="675"></use><g transform="translate(7064,-287)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="345" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1124" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-60">{\{y_{i}, x_{i1}, ..., x_{id}\}}_{i=1}^{n}</script>,其中<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-61-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -463.0516853480245 4787.188129734929 679.103370696049" style="width: 11.12ex; height: 1.622ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><g transform="translate(572,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="345" y="0"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="1270" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2026" x="1715" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="3055" y="0"></use><g transform="translate(3500,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><g transform="translate(572,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-64" x="345" y="0"></use></g></g></g></svg></span><script type="math/tex" id="MathJax-Element-61">x_{i1}, \ldots, x_{id}</script>是第<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-62-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -682.0516853480245 345.5 714.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use></g></svg></span><script type="math/tex" id="MathJax-Element-62">i</script>个样本<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-63-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 523.5 746.103370696049" style="width: 1.158ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-64"></use></g></svg></span><script type="math/tex" id="MathJax-Element-63">d</script>个属性上的取值,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-64-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 834.8053928999522 690.103370696049" style="width: 1.969ex; height: 1.622ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="693" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-64">y_i</script>是该样本待预测的目标。线性回归模型假设目标<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-65-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 834.8053928999522 690.103370696049" style="width: 1.969ex; height: 1.622ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="693" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-65">y_i</script>可以被属性间的线性组合描述,即</p><div class="md-section-divider"></div><p data-anchor-id="76r1"><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-66-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 21191.74570299315 941.103370696049" style="width: 49.228ex; height: 2.201ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="693" y="-213"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="1112" y="0"></use><g transform="translate(2168,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3C9"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="880" y="-213"></use></g><g transform="translate(3245,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><g transform="translate(572,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="345" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="4738" y="0"></use><g transform="translate(5738,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3C9"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="880" y="-213"></use></g><g transform="translate(6815,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><g transform="translate(572,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="345" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="8308" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2026" x="9308" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="10703" y="0"></use><g transform="translate(11704,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3C9"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-64" x="880" y="-213"></use></g><g transform="translate(12797,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><g transform="translate(572,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-64" x="345" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="14306" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62" x="15307" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="15736" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="16181" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="16804" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="17861" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="18361" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2026" x="18806" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="20146" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="20591" y="0"></use></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-66">y_i = \omega_1x_{i1} + \omega_2x_{i2} + \ldots + \omega_dx_{id} + b, i=1,\ldots,n</script></p><p data-anchor-id="xieq">例如,在我们将要建模的房价预测问题里,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-67-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -463.0516853480245 1208.486940139403 778.0602608392912" style="width: 2.78ex; height: 1.853ex; vertical-align: -0.811ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><g transform="translate(572,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6A" x="345" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-67">x_{ij}</script>是描述房子<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-68-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -682.0516853480245 345.5 714.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use></g></svg></span><script type="math/tex" id="MathJax-Element-68">i</script>的各种属性(比如房间的个数、周围学校和医院的个数、交通状况等),而 <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-69-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 834.8053928999522 690.103370696049" style="width: 1.969ex; height: 1.622ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="693" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-69">y_i</script>是房屋的价格。</p><p data-anchor-id="xdaz">初看起来,这个假设实在过于简单了,变量间的真实关系很难是线性的。但由于线性回归模型有形式简单和易于建模分析的优点,它在实际问题中得到了大量的应用。很多经典的统计学习、机器学习书籍[<a href="#参考文献">2,3,4</a>]也选择对线性模型独立成章重点讲解。</p><div class="md-section-divider"></div><h2 data-anchor-id="a18j" id="效果展示">效果展示</h2><p data-anchor-id="f50x">我们使用从<a href="https://archive.ics.uci.edu/ml/datasets/Housing" target="_blank">UCI Housing Data Set</a>获得的波士顿房价数据集进行模型的训练和预测。下面的散点图展示了使用模型对部分房屋价格进行的预测。其中,每个点的横坐标表示同一类房屋真实价格的中位数,纵坐标表示线性回归模型根据特征预测的结果,当二者值完全相等的时候就会落在虚线上。所以模型预测得越准确,则点离虚线越近。</p><p align="center" data-anchor-id="4jfr">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/fit_a_line/image/predictions.png" width="400"><br>
图1. 预测值 V.S. 真实值
</p><div class="md-section-divider"></div><h2 data-anchor-id="739z" id="模型概览">模型概览</h2><div class="md-section-divider"></div><h3 data-anchor-id="tzb9" id="模型定义">模型定义</h3><p data-anchor-id="0i1k">在波士顿房价数据集中,和房屋相关的值共有14个:前13个用来描述房屋相关的各种信息,即模型中的 <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-70-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -463.0516853480245 916.8053928999522 641.5886520702876" style="width: 2.085ex; height: 1.506ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="809" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-70">x_i</script>;最后一个值为我们要预测的该类房屋价格的中位数,即模型中的 <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-71-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 834.8053928999522 690.103370696049" style="width: 1.969ex; height: 1.622ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="693" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-71">y_i</script>。因此,我们的模型就可以表示成:</p><div class="md-section-divider"></div><p data-anchor-id="94vk"><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-72-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -943.8583648847041 16375.58888520427 1130.1735062000191" style="width: 37.992ex; height: 2.664ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5E" x="249" y="228"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="1041" y="0"></use><g transform="translate(2097,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3C9"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="880" y="-213"></use></g><g transform="translate(3173,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1171" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="4678" y="0"></use><g transform="translate(5679,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3C9"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="880" y="-213"></use></g><g transform="translate(6755,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="1171" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="8260" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2026" x="9261" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="10655" y="0"></use><g transform="translate(11656,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3C9"></use><g transform="translate(622,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-33" x="500" y="0"></use></g></g><g transform="translate(13086,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58"></use><g transform="translate(828,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-33" x="500" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="14945" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62" x="15946" y="0"></use></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-72">\hat{Y} = \omega_1X_{1} + \omega_2X_{2} + \ldots + \omega_{13}X_{13} + b</script></p><p data-anchor-id="ch16"><span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-73-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -943.8583648847041 763.5 963.9100502327285" style="width: 1.737ex; height: 2.201ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5E" x="249" y="228"></use></g></svg></span><script type="math/tex" id="MathJax-Element-73">\hat{Y}</script> 表示模型的预测结果,用来和真实值<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-74-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 763.5 724.103370696049" style="width: 1.737ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59"></use></g></svg></span><script type="math/tex" id="MathJax-Element-74">Y</script>区分。模型要学习的参数即:<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-75-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 5610.887498618268 931.103370696049" style="width: 13.089ex; height: 2.201ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3C9"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="880" y="-213"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="1076" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2026" x="1521" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="2860" y="0"></use><g transform="translate(3305,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3C9"></use><g transform="translate(622,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-33" x="500" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="4736" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62" x="5181" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-75">\omega_1, \ldots, \omega_{13}, b</script></p><p data-anchor-id="9qug">建立模型后,我们需要给模型一个优化目标,使得学到的参数能够让预测值<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-76-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -943.8583648847041 763.5 963.9100502327285" style="width: 1.737ex; height: 2.201ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5E" x="249" y="228"></use></g></svg></span><script type="math/tex" id="MathJax-Element-76">\hat{Y}</script>尽可能地接近真实值<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-77-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 763.5 724.103370696049" style="width: 1.737ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59"></use></g></svg></span><script type="math/tex" id="MathJax-Element-77">Y</script>。这里我们引入损失函数(<a href="https://en.wikipedia.org/wiki/Loss_function" target="_blank">Loss Function</a>,或Cost Function)这个概念。 输入任意一个数据样本的目标值<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-78-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 834.8053928999522 690.103370696049" style="width: 1.969ex; height: 1.622ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="693" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-78">y_{i}</script>和模型给出的预测值<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-79-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -703.8583648847041 834.8053928999522 929.9100502327285" style="width: 1.969ex; height: 2.201ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="693" y="-213"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5E" x="167" y="-12"></use></g></svg></span><script type="math/tex" id="MathJax-Element-79">\hat{y_{i}}</script>,损失函数输出一个非负的实值。这个实质通常用来反映模型误差的大小。</p><p data-anchor-id="e3lc">对于线性回归模型来讲,最常见的损失函数就是均方误差(Mean Squared Error, <a href="https://en.wikipedia.org/wiki/Mean_squared_error" target="_blank">MSE</a>)了,它的形式是:</p><div class="md-section-divider"></div><p data-anchor-id="zib5"><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-80-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -1585.0070961948516 10841.351063117105 2802.850285968542" style="width: 25.135ex; height: 6.486ex; vertical-align: -2.896ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-4D"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-53" x="1051" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-45" x="1697" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="2739" y="0"></use><g transform="translate(3915,0)"><rect stroke="none" width="720" height="60" x="0" y="220"></rect><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="110" y="676"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="60" y="-686"></use></g><g transform="translate(4922,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ2-2211"></use><g transform="translate(147,-1090)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="345" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1124" y="0"></use></g><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="721" y="1627"></use></g><g transform="translate(6533,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28"></use><g transform="translate(389,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="822" y="-213"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5E" x="212" y="228"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="1537" y="0"></use><g transform="translate(2538,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="822" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="3464" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="5449" y="920"></use></g></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-80">MSE=\frac{1}{n}\sum_{i=1}^{n}{(\hat{Y_i}-Y_i)}^2</script></p><p data-anchor-id="etto">即对于一个大小为<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-81-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 600.5 496.10337069604896" style="width: 1.39ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E"></use></g></svg></span><script type="math/tex" id="MathJax-Element-81">n</script>的测试集,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-82-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -726.0516853480245 2461.5 769.103370696049" style="width: 5.676ex; height: 1.737ex; vertical-align: -0.232ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-4D"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-53" x="1051" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-45" x="1697" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-82">MSE</script><span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-83-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 600.5 496.10337069604896" style="width: 1.39ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E"></use></g></svg></span><script type="math/tex" id="MathJax-Element-83">n</script>个数据预测结果误差平方的均值。</p><div class="md-section-divider"></div><h3 data-anchor-id="m9rr" id="训练过程">训练过程</h3><p data-anchor-id="hj08">定义好模型结构之后,我们要通过以下几个步骤进行模型训练 <br>
1. 初始化参数,其中包括权重<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-84-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 966.8053928999522 642.5886520702876" style="width: 2.201ex; height: 1.506ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3C9"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="880" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-84">\omega_i</script>和偏置<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-85-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 429.5 747.103370696049" style="width: 1.042ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62"></use></g></svg></span><script type="math/tex" id="MathJax-Element-85">b</script>,对其进行初始化(如0均值,1方差)。 <br>
2. 网络正向传播计算网络输出和损失函数。 <br>
3. 根据损失函数进行反向误差传播 (<a href="https://en.wikipedia.org/wiki/Backpropagation" target="_blank">backpropagation</a>),将网络误差从输出层依次向前传递, 并更新网络中的参数。 <br>
4. 重复2~3步骤,直至网络训练误差达到规定的程度或训练轮次达到设定值。</p><div class="md-section-divider"></div><h2 data-anchor-id="wozw" id="数据准备">数据准备</h2><p data-anchor-id="ytgk">执行以下命令来准备数据:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="jh2a"><ol class="linenums"><li class="L0"><code class="language-bash"><span class="pln">cd data </span><span class="pun">&amp;&amp;</span><span class="pln"> python prepare_data</span><span class="pun">.</span><span class="pln">py</span></code></li></ol></pre><div class="md-section-divider"></div><div class="md-section-divider"></div><div class="md-section-divider"></div><p align="center" data-anchor-id="b8vw">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/fit_a_line/image/ranges.png" width="550"><br>
图2. 各维属性的取值范围
</p><div class="md-section-divider"></div><h4 data-anchor-id="ye0u" id="整理训练集与测试集">整理训练集与测试集</h4><p data-anchor-id="m87z">我们将数据集分割为两份:一份用于调整模型的参数,即进行模型的训练,模型在这份数据集上的误差被称为<strong>训练误差</strong>;另外一份被用来测试,模型在这份数据集上的误差被称为<strong>测试误差</strong>。我们训练模型的目的是为了通过从训练数据中找到规律来预测未知的新数据,所以测试误差是更能反映模型表现的指标。分割数据的比例要考虑到两个因素:更多的训练数据会降低参数估计的方差,从而得到更可信的模型;而更多的测试数据会降低测试误差的方差,从而得到更可信的测试误差。一种常见的分割比例为<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-86-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -687.0516853480245 1835.0555555555557 730.103370696049" style="width: 4.286ex; height: 1.737ex; vertical-align: -0.232ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-38"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3A" x="778" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="1334" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-86">8:2</script>,感兴趣的读者朋友们也可以尝试不同的设置来观察这两种误差的变化。</p><p data-anchor-id="gqtf">执行如下命令可以分割数据集,并将训练集和测试集的地址分别写入train.list 和 test.list两个文件中,供PaddlePaddle读取。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="g3k3"><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">python prepare_data</span><span class="pun">.</span><span class="pln">py </span><span class="pun">-</span><span class="pln">r </span><span class="lit">0.8</span><span class="pln"> </span><span class="com">#默认使用8:2的比例进行分割</span></code></li></ol></pre><p data-anchor-id="4qk8">在更复杂的模型训练过程中,我们往往还会多使用一种数据集:验证集。因为复杂的模型中常常还有一些超参数(<a href="https://en.wikipedia.org/wiki/Hyperparameter_optimization" target="_blank">Hyperparameter</a>)需要调节,所以我们会尝试多种超参数的组合来分别训练多个模型,然后对比它们在验证集上的表现选择相对最好的一组超参数,最后才使用这组参数下训练的模型在测试集上评估测试误差。由于本章训练的模型比较简单,我们暂且忽略掉这个过程。</p><div class="md-section-divider"></div><h3 data-anchor-id="qt61" id="提供数据给paddlepaddle">提供数据给PaddlePaddle</h3><p data-anchor-id="s85w">准备好数据之后,我们使用一个Python data provider来为PaddlePaddle的训练过程提供数据。一个 data provider 就是一个Python函数,它会被PaddlePaddle的训练过程调用。在这个例子里,只需要读取已经保存好的数据,然后一行一行地返回给PaddlePaddle的训练进程即可。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="yknv"><ol class="linenums"><li class="L0"><code class="language-python"><span class="kwd">from</span><span class="pln"> paddle</span><span class="pun">.</span><span class="pln">trainer</span><span class="pun">.</span><span class="typ">PyDataProvider2</span><span class="pln"> </span><span class="kwd">import</span><span class="pln"> </span><span class="pun">*</span></code></li><li class="L1"><code class="language-python"><span class="kwd">import</span><span class="pln"> numpy </span><span class="kwd">as</span><span class="pln"> np</span></code></li><li class="L2"><code class="language-python"><span class="com">#定义数据的类型和维度</span></code></li><li class="L3"><code class="language-python"><span class="lit">@provider</span><span class="pun">(</span><span class="pln">input_types</span><span class="pun">=[</span><span class="pln">dense_vector</span><span class="pun">(</span><span class="lit">13</span><span class="pun">),</span><span class="pln"> dense_vector</span><span class="pun">(</span><span class="lit">1</span><span class="pun">)])</span></code></li><li class="L4"><code class="language-python"><span class="kwd">def</span><span class="pln"> process</span><span class="pun">(</span><span class="pln">settings</span><span class="pun">,</span><span class="pln"> input_file</span><span class="pun">):</span></code></li><li class="L5"><code class="language-python"><span class="pln"> data </span><span class="pun">=</span><span class="pln"> np</span><span class="pun">.</span><span class="pln">load</span><span class="pun">(</span><span class="pln">input_file</span><span class="pun">.</span><span class="pln">strip</span><span class="pun">())</span></code></li><li class="L6"><code class="language-python"><span class="pln"> </span><span class="kwd">for</span><span class="pln"> row </span><span class="kwd">in</span><span class="pln"> data</span><span class="pun">:</span></code></li><li class="L7"><code class="language-python"><span class="pln"> </span><span class="kwd">yield</span><span class="pln"> row</span><span class="pun">[:-</span><span class="lit">1</span><span class="pun">].</span><span class="pln">tolist</span><span class="pun">(),</span><span class="pln"> row</span><span class="pun">[-</span><span class="lit">1</span><span class="pun">:].</span><span class="pln">tolist</span><span class="pun">()</span></code></li></ol></pre><div class="md-section-divider"></div><h2 data-anchor-id="9xee" id="模型配置说明">模型配置说明</h2><div class="md-section-divider"></div><h3 data-anchor-id="jcsa" id="数据定义">数据定义</h3><p data-anchor-id="rq5t">首先,通过 <code>define_py_data_sources2</code> 来配置PaddlePaddle从上面的<code>dataprovider.py</code>里读入训练数据和测试数据。 PaddlePaddle接受从命令行读入的配置信息,例如这里我们传入一个名为<code>is_predict</code>的变量来控制模型在训练和测试时的不同结构。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="v6zd"><ol class="linenums"><li class="L0"><code class="language-python"><span class="kwd">from</span><span class="pln"> paddle</span><span class="pun">.</span><span class="pln">trainer_config_helpers </span><span class="kwd">import</span><span class="pln"> </span><span class="pun">*</span></code></li><li class="L1"><code class="language-python"></code></li><li class="L2"><code class="language-python"><span class="pln">is_predict </span><span class="pun">=</span><span class="pln"> get_config_arg</span><span class="pun">(</span><span class="str">'is_predict'</span><span class="pun">,</span><span class="pln"> bool</span><span class="pun">,</span><span class="pln"> </span><span class="kwd">False</span><span class="pun">)</span></code></li><li class="L3"><code class="language-python"></code></li><li class="L4"><code class="language-python"><span class="pln">define_py_data_sources2</span><span class="pun">(</span></code></li><li class="L5"><code class="language-python"><span class="pln"> train_list</span><span class="pun">=</span><span class="str">'data/train.list'</span><span class="pun">,</span></code></li><li class="L6"><code class="language-python"><span class="pln"> test_list</span><span class="pun">=</span><span class="str">'data/test.list'</span><span class="pun">,</span></code></li><li class="L7"><code class="language-python"><span class="pln"> module</span><span class="pun">=</span><span class="str">'dataprovider'</span><span class="pun">,</span></code></li><li class="L8"><code class="language-python"><span class="pln"> obj</span><span class="pun">=</span><span class="str">'process'</span><span class="pun">)</span></code></li></ol></pre><div class="md-section-divider"></div><h3 data-anchor-id="0tzy" id="算法配置">算法配置</h3><p data-anchor-id="0n6j">接着,指定模型优化算法的细节。由于线性回归模型比较简单,我们只要设置基本的<code>batch_size</code>即可,它指定每次更新参数的时候使用多少条数据计算梯度信息。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="klfv"><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">settings</span><span class="pun">(</span><span class="pln">batch_size</span><span class="pun">=</span><span class="lit">2</span><span class="pun">)</span></code></li></ol></pre><div class="md-section-divider"></div><h3 data-anchor-id="omgp" id="网络结构">网络结构</h3><p data-anchor-id="8a14">最后,使用<code>fc_layer</code><code>LinearActivation</code>来表示线性回归的模型本身。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="v0zt"><ol class="linenums"><li class="L0"><code class="language-python"><span class="com">#输入数据,13维的房屋信息</span></code></li><li class="L1"><code class="language-python"><span class="pln">x </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="pln">name</span><span class="pun">=</span><span class="str">'x'</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="lit">13</span><span class="pun">)</span></code></li><li class="L2"><code class="language-python"></code></li><li class="L3"><code class="language-python"><span class="pln">y_predict </span><span class="pun">=</span><span class="pln"> fc_layer</span><span class="pun">(</span></code></li><li class="L4"><code class="language-python"><span class="pln"> input</span><span class="pun">=</span><span class="pln">x</span><span class="pun">,</span></code></li><li class="L5"><code class="language-python"><span class="pln"> param_attr</span><span class="pun">=</span><span class="typ">ParamAttr</span><span class="pun">(</span><span class="pln">name</span><span class="pun">=</span><span class="str">'w'</span><span class="pun">),</span></code></li><li class="L6"><code class="language-python"><span class="pln"> size</span><span class="pun">=</span><span class="lit">1</span><span class="pun">,</span></code></li><li class="L7"><code class="language-python"><span class="pln"> act</span><span class="pun">=</span><span class="typ">LinearActivation</span><span class="pun">(),</span></code></li><li class="L8"><code class="language-python"><span class="pln"> bias_attr</span><span class="pun">=</span><span class="typ">ParamAttr</span><span class="pun">(</span><span class="pln">name</span><span class="pun">=</span><span class="str">'b'</span><span class="pun">))</span></code></li><li class="L9"><code class="language-python"></code></li><li class="L0"><code class="language-python"><span class="kwd">if</span><span class="pln"> </span><span class="kwd">not</span><span class="pln"> is_predict</span><span class="pun">:</span><span class="pln"> </span><span class="com">#训练时,我们使用MSE,即regression_cost作为损失函数</span></code></li><li class="L1"><code class="language-python"><span class="pln"> y </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="pln">name</span><span class="pun">=</span><span class="str">'y'</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="lit">1</span><span class="pun">)</span></code></li><li class="L2"><code class="language-python"><span class="pln"> cost </span><span class="pun">=</span><span class="pln"> regression_cost</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">y_predict</span><span class="pun">,</span><span class="pln"> label</span><span class="pun">=</span><span class="pln">y</span><span class="pun">)</span></code></li><li class="L3"><code class="language-python"><span class="pln"> outputs</span><span class="pun">(</span><span class="pln">cost</span><span class="pun">)</span><span class="pln"> </span><span class="com">#训练时输出MSE来监控损失的变化</span></code></li><li class="L4"><code class="language-python"><span class="kwd">else</span><span class="pun">:</span><span class="pln"> </span><span class="com">#测试时,输出预测值</span></code></li><li class="L5"><code class="language-python"><span class="pln"> outputs</span><span class="pun">(</span><span class="pln">y_predict</span><span class="pun">)</span></code></li></ol></pre><div class="md-section-divider"></div><h2 data-anchor-id="ak2p" id="训练模型">训练模型</h2><p data-anchor-id="e18b">在对应代码的根目录下执行PaddlePaddle的命令行训练程序。这里指定模型配置文件为<code>trainer_config.py</code>,训练30轮,结果保存在<code>output</code>路径下。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="k7nw"><ol class="linenums"><li class="L0"><code class="language-bash"><span class="pun">./</span><span class="pln">train</span><span class="pun">.</span><span class="pln">sh</span></code></li></ol></pre><div class="md-section-divider"></div><h2 data-anchor-id="tjol" id="应用模型">应用模型</h2><p data-anchor-id="z7d8">现在来看下如何使用已经训练好的模型进行预测。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="ew76"><ol class="linenums"><li class="L0"><code class="language-bash"><span class="pln">python predict</span><span class="pun">.</span><span class="pln">py</span></code></li></ol></pre><p data-anchor-id="1bsn">这里默认使用<code>output/pass-00029</code>中保存的模型进行预测,并将数据中的房价与预测结果进行对比,结果保存在 <code>predictions.png</code>中。 <br>
如果你想使用别的模型或者其它的数据进行预测,只要传入新的路径即可:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="9q38"><ol class="linenums"><li class="L0"><code class="language-bash"><span class="pln">python predict</span><span class="pun">.</span><span class="pln">py </span><span class="pun">-</span><span class="pln">m output</span><span class="pun">/</span><span class="pln">pass</span><span class="pun">-</span><span class="lit">00020</span><span class="pln"> </span><span class="pun">-</span><span class="pln">t data</span><span class="pun">/</span><span class="pln">housing</span><span class="pun">.</span><span class="pln">test</span><span class="pun">.</span><span class="pln">npy</span></code></li></ol></pre><div class="md-section-divider"></div><h2 data-anchor-id="p7c3" id="总结">总结</h2><p data-anchor-id="3chd">在这章里,我们借助波士顿房价这一数据集,介绍了线性回归模型的基本概念,以及如何使用PaddlePaddle实现训练和测试的过程。很多的模型和技巧都是从简单的线性回归模型演化而来,因此弄清楚线性模型的原理和局限非常重要。</p><div class="md-section-divider"></div><h2 data-anchor-id="r5es" id="参考文献">参考文献</h2><ol data-anchor-id="2umn">
<li><a href="https://en.wikipedia.org/wiki/Linear_regression" target="_blank">https://en.wikipedia.org/wiki/Linear_regression</a></li>
<li>Friedman J, Hastie T, Tibshirani R. The elements of statistical learning[M]. Springer, Berlin: Springer series in statistics, 2001.</li>
<li>Murphy K P. Machine learning: a probabilistic perspective[M]. MIT press, 2012.</li>
<li>Bishop C M. Pattern recognition[J]. Machine Learning, 2006, 128.</li>
</ol><p data-anchor-id="x9ai"><br> <br>
<img src="https://i.creativecommons.org/l/by-nc-sa/4.0/88x31.png" alt="知识共享许可协议"> <br>
本教程由<a href="http://book.paddlepaddle.org" target="_blank">PaddlePaddle</a>创作,采用<a href="http://creativecommons.org/licenses/by-nc-sa/4.0/" target="_blank">知识共享 署名-非商业性使用-相同方式共享 4.0 国际 许可协议</a>进行许可。</p></div>
</body>
</html>
......@@ -241,6 +241,7 @@ marked.setOptions({
code = code.replace(/&amp;/g, "&")
code = code.replace(/&gt;/g, ">")
code = code.replace(/&lt;/g, "<")
code = code.replace(/&nbsp;/g, " ")
return hljs.highlightAuto(code, [lang]).value;
}
});
......
......@@ -53,6 +53,7 @@ marked.setOptions({
code = code.replace(/&amp;/g, "&")
code = code.replace(/&gt;/g, ">")
code = code.replace(/&lt;/g, "<")
code = code.replace(/&nbsp;/g, " ")
return hljs.highlightAuto(code, [lang]).value;
}
});
......
......@@ -52,6 +52,7 @@ marked.setOptions({
code = code.replace(/&amp;/g, "&")
code = code.replace(/&gt;/g, ">")
code = code.replace(/&lt;/g, "<")
code = code.replace(/&nbsp;/g, " ")
return hljs.highlightAuto(code, [lang]).value;
}
});
......
<!DOCTYPE html>
<html class="theme theme-white">
<head>
<meta charset="utf-8">
<title>图像分类</title>
<link href="https://www.zybuluo.com/static/assets/template-theme-white.css" rel="stylesheet" media="screen">
<style type="text/css">
#wmd-preview h1 {
color: #0077bb; /* 将标题改为蓝色 */
}</style>
</head>
<body class="theme theme-white">
<div style="visibility: hidden; overflow: hidden; position: absolute; top: 0px; height: 1px; width: auto; padding: 0px; border: 0px; margin: 0px; text-align: left; text-indent: 0px; text-transform: none; line-height: normal; letter-spacing: normal; word-spacing: normal;"><div id="MathJax_SVG_Hidden"></div><svg><defs id="MathJax_SVG_glyphs"><path id="MJMATHI-61" stroke-width="1" d="M33 157Q33 258 109 349T280 441Q331 441 370 392Q386 422 416 422Q429 422 439 414T449 394Q449 381 412 234T374 68Q374 43 381 35T402 26Q411 27 422 35Q443 55 463 131Q469 151 473 152Q475 153 483 153H487Q506 153 506 144Q506 138 501 117T481 63T449 13Q436 0 417 -8Q409 -10 393 -10Q359 -10 336 5T306 36L300 51Q299 52 296 50Q294 48 292 46Q233 -10 172 -10Q117 -10 75 30T33 157ZM351 328Q351 334 346 350T323 385T277 405Q242 405 210 374T160 293Q131 214 119 129Q119 126 119 118T118 106Q118 61 136 44T179 26Q217 26 254 59T298 110Q300 114 325 217T351 328Z"></path><path id="MJMATHI-62" stroke-width="1" d="M73 647Q73 657 77 670T89 683Q90 683 161 688T234 694Q246 694 246 685T212 542Q204 508 195 472T180 418L176 399Q176 396 182 402Q231 442 283 442Q345 442 383 396T422 280Q422 169 343 79T173 -11Q123 -11 82 27T40 150V159Q40 180 48 217T97 414Q147 611 147 623T109 637Q104 637 101 637H96Q86 637 83 637T76 640T73 647ZM336 325V331Q336 405 275 405Q258 405 240 397T207 376T181 352T163 330L157 322L136 236Q114 150 114 114Q114 66 138 42Q154 26 178 26Q211 26 245 58Q270 81 285 114T318 219Q336 291 336 325Z"></path><path id="MJMATHI-6E" stroke-width="1" d="M21 287Q22 293 24 303T36 341T56 388T89 425T135 442Q171 442 195 424T225 390T231 369Q231 367 232 367L243 378Q304 442 382 442Q436 442 469 415T503 336T465 179T427 52Q427 26 444 26Q450 26 453 27Q482 32 505 65T540 145Q542 153 560 153Q580 153 580 145Q580 144 576 130Q568 101 554 73T508 17T439 -10Q392 -10 371 17T350 73Q350 92 386 193T423 345Q423 404 379 404H374Q288 404 229 303L222 291L189 157Q156 26 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 112 180T152 343Q153 348 153 366Q153 405 129 405Q91 405 66 305Q60 285 60 284Q58 278 41 278H27Q21 284 21 287Z"></path><path id="MJMATHI-6C" stroke-width="1" d="M117 59Q117 26 142 26Q179 26 205 131Q211 151 215 152Q217 153 225 153H229Q238 153 241 153T246 151T248 144Q247 138 245 128T234 90T214 43T183 6T137 -11Q101 -11 70 11T38 85Q38 97 39 102L104 360Q167 615 167 623Q167 626 166 628T162 632T157 634T149 635T141 636T132 637T122 637Q112 637 109 637T101 638T95 641T94 647Q94 649 96 661Q101 680 107 682T179 688Q194 689 213 690T243 693T254 694Q266 694 266 686Q266 675 193 386T118 83Q118 81 118 75T117 65V59Z"></path><path id="MJMATHI-72" stroke-width="1" d="M21 287Q22 290 23 295T28 317T38 348T53 381T73 411T99 433T132 442Q161 442 183 430T214 408T225 388Q227 382 228 382T236 389Q284 441 347 441H350Q398 441 422 400Q430 381 430 363Q430 333 417 315T391 292T366 288Q346 288 334 299T322 328Q322 376 378 392Q356 405 342 405Q286 405 239 331Q229 315 224 298T190 165Q156 25 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 114 189T154 366Q154 405 128 405Q107 405 92 377T68 316T57 280Q55 278 41 278H27Q21 284 21 287Z"></path><path id="MJMAIN-30" stroke-width="1" d="M96 585Q152 666 249 666Q297 666 345 640T423 548Q460 465 460 320Q460 165 417 83Q397 41 362 16T301 -15T250 -22Q224 -22 198 -16T137 16T82 83Q39 165 39 320Q39 494 96 585ZM321 597Q291 629 250 629Q208 629 178 597Q153 571 145 525T137 333Q137 175 145 125T181 46Q209 16 250 16Q290 16 318 46Q347 76 354 130T362 333Q362 478 354 524T321 597Z"></path><path id="MJMAIN-3D" stroke-width="1" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path id="MJMAIN-2217" stroke-width="1" d="M229 286Q216 420 216 436Q216 454 240 464Q241 464 245 464T251 465Q263 464 273 456T283 436Q283 419 277 356T270 286L328 328Q384 369 389 372T399 375Q412 375 423 365T435 338Q435 325 425 315Q420 312 357 282T289 250L355 219L425 184Q434 175 434 161Q434 146 425 136T401 125Q393 125 383 131T328 171L270 213Q283 79 283 63Q283 53 276 44T250 35Q231 35 224 44T216 63Q216 80 222 143T229 213L171 171Q115 130 110 127Q106 124 100 124Q87 124 76 134T64 161Q64 166 64 169T67 175T72 181T81 188T94 195T113 204T138 215T170 230T210 250L74 315Q65 324 65 338Q65 353 74 363T98 374Q106 374 116 368T171 328L229 286Z"></path><path id="MJMAIN-230A" stroke-width="1" d="M174 734Q174 735 175 737T177 740T180 744T184 747T189 749T196 750Q206 748 214 735V-210H310H373Q401 -210 411 -213T422 -230T411 -247T369 -251Q362 -251 338 -251T298 -250H190Q178 -246 174 -234V734Z"></path><path id="MJMAIN-230B" stroke-width="1" d="M229 734Q229 735 230 737T232 740T235 744T239 747T244 749T251 750Q262 748 269 735V-235Q266 -240 256 -249L147 -250H77Q43 -250 32 -247T21 -230T32 -213T72 -209Q79 -209 99 -209T133 -210H229V734Z"></path><path id="MJMAIN-28" stroke-width="1" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path id="MJMATHI-64" stroke-width="1" d="M366 683Q367 683 438 688T511 694Q523 694 523 686Q523 679 450 384T375 83T374 68Q374 26 402 26Q411 27 422 35Q443 55 463 131Q469 151 473 152Q475 153 483 153H487H491Q506 153 506 145Q506 140 503 129Q490 79 473 48T445 8T417 -8Q409 -10 393 -10Q359 -10 336 5T306 36L300 51Q299 52 296 50Q294 48 292 46Q233 -10 172 -10Q117 -10 75 30T33 157Q33 205 53 255T101 341Q148 398 195 420T280 442Q336 442 364 400Q369 394 369 396Q370 400 396 505T424 616Q424 629 417 632T378 637H357Q351 643 351 645T353 664Q358 683 366 683ZM352 326Q329 405 277 405Q242 405 210 374T160 293Q131 214 119 129Q119 126 119 118T118 106Q118 61 136 44T179 26Q233 26 290 98L298 109L352 326Z"></path><path id="MJMATHI-65" stroke-width="1" d="M39 168Q39 225 58 272T107 350T174 402T244 433T307 442H310Q355 442 388 420T421 355Q421 265 310 237Q261 224 176 223Q139 223 138 221Q138 219 132 186T125 128Q125 81 146 54T209 26T302 45T394 111Q403 121 406 121Q410 121 419 112T429 98T420 82T390 55T344 24T281 -1T205 -11Q126 -11 83 42T39 168ZM373 353Q367 405 305 405Q272 405 244 391T199 357T170 316T154 280T149 261Q149 260 169 260Q282 260 327 284T373 353Z"></path><path id="MJMATHI-70" stroke-width="1" d="M23 287Q24 290 25 295T30 317T40 348T55 381T75 411T101 433T134 442Q209 442 230 378L240 387Q302 442 358 442Q423 442 460 395T497 281Q497 173 421 82T249 -10Q227 -10 210 -4Q199 1 187 11T168 28L161 36Q160 35 139 -51T118 -138Q118 -144 126 -145T163 -148H188Q194 -155 194 -157T191 -175Q188 -187 185 -190T172 -194Q170 -194 161 -194T127 -193T65 -192Q-5 -192 -24 -194H-32Q-39 -187 -39 -183Q-37 -156 -26 -148H-6Q28 -147 33 -136Q36 -130 94 103T155 350Q156 355 156 364Q156 405 131 405Q109 405 94 377T71 316T59 280Q57 278 43 278H29Q23 284 23 287ZM178 102Q200 26 252 26Q282 26 310 49T356 107Q374 141 392 215T411 325V331Q411 405 350 405Q339 405 328 402T306 393T286 380T269 365T254 350T243 336T235 326L232 322Q232 321 229 308T218 264T204 212Q178 106 178 102Z"></path><path id="MJMATHI-74" stroke-width="1" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path id="MJMATHI-68" stroke-width="1" d="M137 683Q138 683 209 688T282 694Q294 694 294 685Q294 674 258 534Q220 386 220 383Q220 381 227 388Q288 442 357 442Q411 442 444 415T478 336Q478 285 440 178T402 50Q403 36 407 31T422 26Q450 26 474 56T513 138Q516 149 519 151T535 153Q555 153 555 145Q555 144 551 130Q535 71 500 33Q466 -10 419 -10H414Q367 -10 346 17T325 74Q325 90 361 192T398 345Q398 404 354 404H349Q266 404 205 306L198 293L164 158Q132 28 127 16Q114 -11 83 -11Q69 -11 59 -2T48 16Q48 30 121 320L195 616Q195 629 188 632T149 637H128Q122 643 122 645T124 664Q129 683 137 683Z"></path><path id="MJMAIN-2212" stroke-width="1" d="M84 237T84 250T98 270H679Q694 262 694 250T679 230H98Q84 237 84 250Z"></path><path id="MJMAIN-32" stroke-width="1" d="M109 429Q82 429 66 447T50 491Q50 562 103 614T235 666Q326 666 387 610T449 465Q449 422 429 383T381 315T301 241Q265 210 201 149L142 93L218 92Q375 92 385 97Q392 99 409 186V189H449V186Q448 183 436 95T421 3V0H50V19V31Q50 38 56 46T86 81Q115 113 136 137Q145 147 170 174T204 211T233 244T261 278T284 308T305 340T320 369T333 401T340 431T343 464Q343 527 309 573T212 619Q179 619 154 602T119 569T109 550Q109 549 114 549Q132 549 151 535T170 489Q170 464 154 447T109 429Z"></path><path id="MJMAIN-29" stroke-width="1" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path><path id="MJMATHI-63" stroke-width="1" d="M34 159Q34 268 120 355T306 442Q362 442 394 418T427 355Q427 326 408 306T360 285Q341 285 330 295T319 325T330 359T352 380T366 386H367Q367 388 361 392T340 400T306 404Q276 404 249 390Q228 381 206 359Q162 315 142 235T121 119Q121 73 147 50Q169 26 205 26H209Q321 26 394 111Q403 121 406 121Q410 121 419 112T429 98T420 83T391 55T346 25T282 0T202 -11Q127 -11 81 37T34 159Z"></path><path id="MJMATHI-66" stroke-width="1" d="M118 -162Q120 -162 124 -164T135 -167T147 -168Q160 -168 171 -155T187 -126Q197 -99 221 27T267 267T289 382V385H242Q195 385 192 387Q188 390 188 397L195 425Q197 430 203 430T250 431Q298 431 298 432Q298 434 307 482T319 540Q356 705 465 705Q502 703 526 683T550 630Q550 594 529 578T487 561Q443 561 443 603Q443 622 454 636T478 657L487 662Q471 668 457 668Q445 668 434 658T419 630Q412 601 403 552T387 469T380 433Q380 431 435 431Q480 431 487 430T498 424Q499 420 496 407T491 391Q489 386 482 386T428 385H372L349 263Q301 15 282 -47Q255 -132 212 -173Q175 -205 139 -205Q107 -205 81 -186T55 -132Q55 -95 76 -78T118 -61Q162 -61 162 -103Q162 -122 151 -136T127 -157L118 -162Z"></path><path id="MJMATHI-67" stroke-width="1" d="M311 43Q296 30 267 15T206 0Q143 0 105 45T66 160Q66 265 143 353T314 442Q361 442 401 394L404 398Q406 401 409 404T418 412T431 419T447 422Q461 422 470 413T480 394Q480 379 423 152T363 -80Q345 -134 286 -169T151 -205Q10 -205 10 -137Q10 -111 28 -91T74 -71Q89 -71 102 -80T116 -111Q116 -121 114 -130T107 -144T99 -154T92 -162L90 -164H91Q101 -167 151 -167Q189 -167 211 -155Q234 -144 254 -122T282 -75Q288 -56 298 -13Q311 35 311 43ZM384 328L380 339Q377 350 375 354T369 368T359 382T346 393T328 402T306 405Q262 405 221 352Q191 313 171 233T151 117Q151 38 213 38Q269 38 323 108L331 118L384 328Z"></path><path id="MJMAIN-20" stroke-width="1"></path><path id="MJMATHI-75" stroke-width="1" d="M21 287Q21 295 30 318T55 370T99 420T158 442Q204 442 227 417T250 358Q250 340 216 246T182 105Q182 62 196 45T238 27T291 44T328 78L339 95Q341 99 377 247Q407 367 413 387T427 416Q444 431 463 431Q480 431 488 421T496 402L420 84Q419 79 419 68Q419 43 426 35T447 26Q469 29 482 57T512 145Q514 153 532 153Q551 153 551 144Q550 139 549 130T540 98T523 55T498 17T462 -8Q454 -10 438 -10Q372 -10 347 46Q345 45 336 36T318 21T296 6T267 -6T233 -11Q189 -11 155 7Q103 38 103 113Q103 170 138 262T173 379Q173 380 173 381Q173 390 173 393T169 400T158 404H154Q131 404 112 385T82 344T65 302T57 280Q55 278 41 278H27Q21 284 21 287Z"></path><path id="MJMATHI-73" stroke-width="1" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"></path><path id="MJMATHI-69" stroke-width="1" d="M184 600Q184 624 203 642T247 661Q265 661 277 649T290 619Q290 596 270 577T226 557Q211 557 198 567T184 600ZM21 287Q21 295 30 318T54 369T98 420T158 442Q197 442 223 419T250 357Q250 340 236 301T196 196T154 83Q149 61 149 51Q149 26 166 26Q175 26 185 29T208 43T235 78T260 137Q263 149 265 151T282 153Q302 153 302 143Q302 135 293 112T268 61T223 11T161 -11Q129 -11 102 10T74 74Q74 91 79 106T122 220Q160 321 166 341T173 380Q173 404 156 404H154Q124 404 99 371T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Z"></path><path id="MJMATHI-6F" stroke-width="1" d="M201 -11Q126 -11 80 38T34 156Q34 221 64 279T146 380Q222 441 301 441Q333 441 341 440Q354 437 367 433T402 417T438 387T464 338T476 268Q476 161 390 75T201 -11ZM121 120Q121 70 147 48T206 26Q250 26 289 58T351 142Q360 163 374 216T388 308Q388 352 370 375Q346 405 306 405Q243 405 195 347Q158 303 140 230T121 120Z"></path><path id="MJMAIN-31" stroke-width="1" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path><path id="MJMATHI-6D" stroke-width="1" d="M21 287Q22 293 24 303T36 341T56 388T88 425T132 442T175 435T205 417T221 395T229 376L231 369Q231 367 232 367L243 378Q303 442 384 442Q401 442 415 440T441 433T460 423T475 411T485 398T493 385T497 373T500 364T502 357L510 367Q573 442 659 442Q713 442 746 415T780 336Q780 285 742 178T704 50Q705 36 709 31T724 26Q752 26 776 56T815 138Q818 149 821 151T837 153Q857 153 857 145Q857 144 853 130Q845 101 831 73T785 17T716 -10Q669 -10 648 17T627 73Q627 92 663 193T700 345Q700 404 656 404H651Q565 404 506 303L499 291L466 157Q433 26 428 16Q415 -11 385 -11Q372 -11 364 -4T353 8T350 18Q350 29 384 161L420 307Q423 322 423 345Q423 404 379 404H374Q288 404 229 303L222 291L189 157Q156 26 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 112 181Q151 335 151 342Q154 357 154 369Q154 405 129 405Q107 405 92 377T69 316T57 280Q55 278 41 278H27Q21 284 21 287Z"></path><path id="MJMAIN-33" stroke-width="1" d="M127 463Q100 463 85 480T69 524Q69 579 117 622T233 665Q268 665 277 664Q351 652 390 611T430 522Q430 470 396 421T302 350L299 348Q299 347 308 345T337 336T375 315Q457 262 457 175Q457 96 395 37T238 -22Q158 -22 100 21T42 130Q42 158 60 175T105 193Q133 193 151 175T169 130Q169 119 166 110T159 94T148 82T136 74T126 70T118 67L114 66Q165 21 238 21Q293 21 321 74Q338 107 338 175V195Q338 290 274 322Q259 328 213 329L171 330L168 332Q166 335 166 348Q166 366 174 366Q202 366 232 371Q266 376 294 413T322 525V533Q322 590 287 612Q265 626 240 626Q208 626 181 615T143 592T132 580H135Q138 579 143 578T153 573T165 566T175 555T183 540T186 520Q186 498 172 481T127 463Z"></path><path id="MJMATHI-76" stroke-width="1" d="M173 380Q173 405 154 405Q130 405 104 376T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Q21 294 29 316T53 368T97 419T160 441Q202 441 225 417T249 361Q249 344 246 335Q246 329 231 291T200 202T182 113Q182 86 187 69Q200 26 250 26Q287 26 319 60T369 139T398 222T409 277Q409 300 401 317T383 343T365 361T357 383Q357 405 376 424T417 443Q436 443 451 425T467 367Q467 340 455 284T418 159T347 40T241 -11Q177 -11 139 22Q102 54 102 117Q102 148 110 181T151 298Q173 362 173 380Z"></path></defs></svg></div><div id="wmd-preview" class="wmd-preview wmd-preview-full-reader"><div class="md-section-divider"></div><div class="md-section-divider"></div><h1 data-anchor-id="hrbw" id="图像分类">图像分类</h1><div class="md-section-divider"></div><h2 data-anchor-id="u1iv" id="背景介绍">背景介绍</h2><p data-anchor-id="u5pj">图像相比文字能够提供更加生动、容易理解及更具艺术感的信息,是人们转递与交换信息的重要来源。在本教程中,我们专注于图像识别领域的一个重要问题,即图像分类。</p><p data-anchor-id="5qbf">图像分类是根据图像的语义信息将不同类别图像区分开来,是计算机视觉中重要的基本问题,也是图像检测、图像分割、物体跟踪、行为分析等其他高层视觉任务的基础。图像分类在很多领域有广泛应用,包括安防领域的人脸识别和智能视频分析等,交通领域的交通场景识别,互联网领域基于内容的图像检索和相册自动归类,医学领域的图像识别等。</p><p data-anchor-id="u2y1">一般来说,图像分类通过手工特征或特征学习方法对整个图像进行全部描述,然后使用分类器判别物体类别,因此如何提取图像的特征至关重要。在深度学习算法之前使用较多的是基于词袋(Bag of Words)模型的物体分类方法。词袋方法从自然语言处理中引入,即一句话可以用一个装了词的袋子表示其特征,袋子中的词为句子中的单词、短语或字。对于图像而言,词袋方法需要构建字典。最简单的词袋模型框架可以设计为<strong>底层特征抽取</strong><strong>特征编码</strong><strong>分类器设计</strong>三个过程。</p><p data-anchor-id="qj3h">而基于深度学习的图像分类方法,可以通过有监督或无监督的方式<strong>学习</strong>层次化的特征描述,从而取代了手工设计或选择图像特征的工作。深度学习模型中的卷积神经网络(Convolution Neural Network, CNN)近年来在图像领域取得了惊人的成绩,CNN直接利用图像像素信息作为输入,最大程度上保留了输入图像的所有信息,通过卷积操作进行特征的提取和高层抽象,模型输出直接是图像识别的结果。这种基于"输入-输出"直接端到端的学习方法取得了非常好的效果,得到了广泛的应用。</p><p data-anchor-id="5q5x">本教程主要介绍图像分类的深度学习模型,以及如何使用PaddlePaddle训练CNN模型。</p><div class="md-section-divider"></div><h2 data-anchor-id="o34s" id="效果展示">效果展示</h2><p data-anchor-id="ko3q">图像分类包括通用图像分类、细粒度图像分类等。图1展示了通用图像分类效果,即模型可以正确识别图像上的主要物体。</p><p align="center" data-anchor-id="8dbj">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/image_classification/image/dog_cat.png" width="350"><br>
图1. 通用图像分类展示
</p><p data-anchor-id="303b">图2展示了细粒度图像分类-花卉识别的效果,要求模型可以正确识别花的类别。</p><p align="center" data-anchor-id="rnwr">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/image_classification/image/flowers.png" width="400"><br>
图2. 细粒度图像分类展示
</p><p data-anchor-id="r9d4">一个好的模型既要对不同类别识别正确,同时也应该能够对不同视角、光照、背景、变形或部分遮挡的图像正确识别(这里我们统一称作图像扰动)。图3展示了一些图像的扰动,较好的模型会像聪明的人类一样能够正确识别。</p><p align="center" data-anchor-id="ybfp">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/image_classification/image/variations.png" width="550"><br>
图3. 扰动图片展示[22]
</p><div class="md-section-divider"></div><h2 data-anchor-id="xx5f" id="模型概览">模型概览</h2><p data-anchor-id="bc81">图像识别领域大量的研究成果都是建立在<a href="http://host.robots.ox.ac.uk/pascal/VOC/" target="_blank">PASCAL VOC</a><a href="http://image-net.org/" target="_blank">ImageNet</a>等公开的数据集上,很多图像识别算法通常在这些数据集上进行测试和比较。PASCAL VOC是2005年发起的一个视觉挑战赛,ImageNet是2010年发起的大规模视觉识别竞赛(ILSVRC)的数据集,在本章中我们基于这些竞赛的一些论文介绍图像分类模型。</p><p data-anchor-id="lndh">在2012年之前的传统图像分类方法可以用背景描述中提到的三步完成,但通常完整建立图像识别模型一般包括底层特征学习、特征编码、空间约束、分类器设计、模型融合等几个阶段。 <br>
1). <strong>底层特征提取</strong>: 通常从图像中按照固定步长、尺度提取大量局部特征描述。常用的局部特征包括SIFT(Scale-Invariant Feature Transform, 尺度不变特征转换) [<a href="#参考文献">1</a>]、HOG(Histogram of Oriented Gradient, 方向梯度直方图) [<a href="#参考文献">2</a>]、LBP(Local Bianray Pattern, 局部二值模式) [<a href="#参考文献">3</a>] 等,一般也采用多种特征描述子,防止丢失过多的有用信息。 <br>
2). <strong>特征编码</strong>: 底层特征中包含了大量冗余与噪声,为了提高特征表达的鲁棒性,需要使用一种特征变换算法对底层特征进行编码,称作特征编码。常用的特征编码包括向量量化编码 [<a href="#参考文献">4</a>]、稀疏编码 [<a href="#参考文献">5</a>]、局部线性约束编码 [<a href="#参考文献">6</a>]、Fisher向量编码 [<a href="#参考文献">7</a>] 等。 <br>
3). <strong>空间特征约束</strong>: 特征编码之后一般会经过空间特征约束,也称作<strong>特征汇聚</strong>。特征汇聚是指在一个空间范围内,对每一维特征取最大值或者平均值,可以获得一定特征不变形的特征表达。金字塔特征匹配是一种常用的特征聚会方法,这种方法提出将图像均匀分块,在分块内做特征汇聚。 <br>
4). <strong>通过分类器分类</strong>: 经过前面步骤之后一张图像可以用一个固定维度的向量进行描述,接下来就是经过分类器对图像进行分类。通常使用的分类器包括SVM(Support Vector Machine, 支持向量机)、随机森林等。而使用核方法的SVM是最为广泛的分类器,在传统图像分类任务上性能很好。</p><p data-anchor-id="iyvz">这种方法在PASCAL VOC竞赛中的图像分类算法中被广泛使用 [<a href="#参考文献">18</a>]。<a href="http://www.nec-labs.com/" target="_blank">NEC实验室</a>在ILSVRC2010中采用SIFT和LBP特征,两个非线性编码器以及SVM分类器获得图像分类的冠军 [<a href="#参考文献">8</a>]。</p><p data-anchor-id="delz">Alex Krizhevsky在2012年ILSVRC提出的CNN模型 [<a href="#参考文献">9</a>] 取得了历史性的突破,效果大幅度超越传统方法,获得了ILSVRC2012冠军,该模型被称作AlexNet。这也是首次将深度学习用于大规模图像分类中。从AlexNet之后,涌现了一系列CNN模型,不断地在ImageNet上刷新成绩,如图4展示。随着模型变得越来越深以及精妙的结构设计,Top-5的错误率也越来越低,降到了3.5%附近。而在同样的ImageNet数据集上,人眼的辨识错误率大概在5.1%,也就是目前的深度学习模型的识别能力已经超过了人眼。</p><p align="center" data-anchor-id="wr6w">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/image_classification/image/ilsvrc.png" width="500"><br>
图4. ILSVRC图像分类Top-5错误率
</p><div class="md-section-divider"></div><h3 data-anchor-id="jppq" id="cnn">CNN</h3><p data-anchor-id="4ehp">传统CNN包含卷积层、全连接层等组件,并采用softmax多类别分类器和多类交叉熵损失函数,一个典型的卷积神经网络如图5所示,我们先介绍用来构造CNN的常见组件。</p><p align="center" data-anchor-id="p9hi">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/image_classification/image/lenet.png"><br>
图5. CNN网络示例[20]
</p><ul data-anchor-id="jine">
<li>卷积层(convolution layer): 执行卷积操作提取底层到高层的特征,发掘出图片局部关联性质和空间不变性质。</li>
<li>池化层(pooling layer): 执行降采样操作。通过取卷积输出特征图中局部区块的最大值(max-pooling)或者均值(avg-pooling)。降采样也是图像处理中常见的一种操作,可以过滤掉一些不重要的高频信息。</li>
<li>全连接层(fully-connected layer,或者fc layer): 输入层到隐藏层的神经元是全部连接的。</li>
<li>非线性变化: 卷积层、全连接层后面一般都会接非线性变化层,例如Sigmoid、Tanh、ReLu等来增强网络的表达能力,在CNN里最常使用的为ReLu激活函数。</li>
<li>Dropout [<a href="#参考文献">10</a>] : 在模型训练阶段随机让一些隐层节点权重不工作,提高网络的泛化能力,一定程度上防止过拟合。</li>
</ul><p data-anchor-id="1342">另外,在训练过程中由于每层参数不断更新,会导致下一次输入分布发生变化,这样导致训练过程需要精心设计超参数。如2015年Sergey Ioffe和Christian Szegedy提出了Batch Normalization (BN)算法 [<a href="#参考文献">14</a>] 中,每个batch对网络中的每一层特征都做归一化,使得每层分布相对稳定。BN算法不仅起到一定的正则作用,而且弱化了一些超参数的设计。经过实验证明,BN算法加速了模型收敛过程,在后来较深的模型中被广泛使用。</p><p data-anchor-id="22w8">接下来我们主要介绍VGG,GoogleNet和ResNet网络结构。</p><div class="md-section-divider"></div><h3 data-anchor-id="bpai" id="vgg">VGG</h3><p data-anchor-id="2q5r">牛津大学VGG(Visual Geometry Group)组在2014年ILSVRC提出的模型被称作VGG模型 [<a href="#参考文献">11</a>] 。该模型相比以往模型进一步加宽和加深了网络结构,它的核心是五组卷积操作,每两组之间做Max-Pooling空间降维。同一组内采用多次连续的3X3卷积,卷积核的数目由较浅组的64增多到最深组的512,同一组内的卷积核数目是一样的。卷积之后接两层全连接层,之后是分类层。由于每组内卷积层的不同,有11、13、16、19层这几种模型,下图展示一个16层的网络结构。VGG模型结构相对简洁,提出之后也有很多文章基于此模型进行研究,如在ImageNet上首次公开超过人眼识别的模型[<a href="#参考文献">19</a>]就是借鉴VGG模型的结构。</p><p align="center" data-anchor-id="kc4k">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/image_classification/image/vgg16.png" width="750"><br>
图6. 基于ImageNet的VGG16模型
</p><div class="md-section-divider"></div><h3 data-anchor-id="8lel" id="googlenet">GoogleNet</h3><p data-anchor-id="g6uy">GoogleNet [<a href="#参考文献">12</a>] 在2014年ILSVRC的获得了冠军,在介绍该模型之前我们先来了解NIN(Network in Network)模型 [<a href="#参考文献">13</a>] 和Inception模块,因为GoogleNet模型由多组Inception模块组成,模型设计借鉴了NIN的一些思想。</p><p data-anchor-id="r1b0">NIN模型主要有两个特点:1) 引入了多层感知卷积网络(Multi-Layer Perceptron Convolution, MLPconv)代替一层线性卷积网络。MLPconv是一个微小的多层卷积网络,即在线性卷积后面增加若干层1x1的卷积,这样可以提取出高度非线性特征。2) 传统的CNN最后几层一般都是全连接层,参数较多。而NIN模型设计最后一层卷积层包含类别维度大小的特征图,然后采用全局均值池化(Avg-Pooling)替代全连接层,得到类别维度大小的向量,再进行分类。这种替代全连接层的方式有利于减少参数。</p><p data-anchor-id="1r5c">Inception模块如下图7所示,图(a)是最简单的设计,输出是3个卷积层和一个池化层的特征拼接。这种设计的缺点是池化层不会改变特征通道数,拼接后会导致特征的通道数较大,经过几层这样的模块堆积后,通道数会越来越大,导致参数和计算量也随之增大。为了改善这个缺点,图(b)引入3个1x1卷积层进行降维,所谓的降维就是减少通道数,同时如NIN模型中提到的1x1卷积也可以修正线性特征。</p><p align="center" data-anchor-id="g0ha">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/image_classification/image/inception.png" width="800"><br>
图7. Inception模块
</p><p data-anchor-id="hdi3">GoogleNet由多组Inception模块堆积而成。另外,在网络最后也没有采用传统的多层全连接层,而是像NIN网络一样采用了均值池化层;但与NIN不同的是,池化层后面接了一层到类别数映射的全连接层。除了这两个特点之外,由于网络中间层特征也很有判别性,GoogleNet在中间层添加了两个辅助分类器,在后向传播中增强梯度并且增强正则化,而整个网络的损失函数是这个三个分类器的损失加权求和。</p><p data-anchor-id="3x7i">GoogleNet整体网络结构如图8所示,总共22层网络:开始由3层普通的卷积组成;接下来由三组子网络组成,第一组子网络包含2个Inception模块,第二组包含5个Inception模块,第三组包含2个Inception模块;然后接均值池化层、全连接层。</p><p align="center" data-anchor-id="krg6">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/image_classification/image/googlenet.jpeg"><br>
图8. GoogleNet[12]
</p><p data-anchor-id="av04">上面介绍的是GoogleNet第一版模型(称作GoogleNet-v1)。GoogleNet-v2 [<a href="#参考文献">14</a>] 引入BN层;GoogleNet-v3 [<a href="#参考文献">16</a>] 对一些卷积层做了分解,进一步提高网络非线性能力和加深网络;GoogleNet-v4 [<a href="#参考文献">17</a>] 引入下面要讲的ResNet设计思路。从v1到v4每一版的改进都会带来准确度的提升,介于篇幅,这里不再详细介绍v2到v4的结构。</p><div class="md-section-divider"></div><h3 data-anchor-id="6qb7" id="resnet">ResNet</h3><p data-anchor-id="fl7g">ResNet(Residual Network) [<a href="#参考文献">15</a>] 是2015年ImageNet图像分类、图像物体定位和图像物体检测比赛的冠军。针对训练卷积神经网络时加深网络导致准确度下降的问题,ResNet提出了采用残差学习。在已有设计思路(BN, 小卷积核,全卷积网络)的基础上,引入了残差模块。每个残差模块包含两条路径,其中一条路径是输入特征的直连通路,另一条路径对该特征做两到三次卷积操作得到该特征的残差,最后再将两条路径上的特征相加。</p><p data-anchor-id="rvdb">残差模块如图9所示,左边是基本模块连接方式,由两个输出通道数相同的3x3卷积组成。右边是瓶颈模块(Bottleneck)连接方式,之所以称为瓶颈,是因为上面的1x1卷积用来降维(图示例即256-&gt;64),下面的1x1卷积用来升维(图示例即64-&gt;256),这样中间3x3卷积的输入和输出通道数都较小(图示例即64-&gt;64)。</p><p align="center" data-anchor-id="fec2">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/image_classification/image/resnet_block.jpg" width="400"><br>
图9. 残差模块
</p><p data-anchor-id="txqs">图10展示了50、101、152层网络连接示意图,使用的是瓶颈模块。这三个模型的区别在于每组中残差模块的重复次数不同(见图右上角)。ResNet训练收敛较快,成功的训练了上百乃至近千层的卷积神经网络。</p><p align="center" data-anchor-id="mlgg">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/image_classification/image/resnet.png"><br>
图10. 基于ImageNet的ResNet模型
</p><div class="md-section-divider"></div><h2 data-anchor-id="pny1" id="数据准备">数据准备</h2><div class="md-section-divider"></div><h3 data-anchor-id="vsax" id="数据介绍与下载">数据介绍与下载</h3><p data-anchor-id="aq4f">通用图像分类公开的标准数据集常用的有<a href="https://www.cs.toronto.edu/~kriz/cifar.html" target="_blank">CIFAR</a><a href="http://image-net.org/" target="_blank">ImageNet</a><a href="http://mscoco.org/" target="_blank">COCO</a>等,常用的细粒度图像分类数据集包括<a href="http://www.vision.caltech.edu/visipedia/CUB-200-2011.html" target="_blank">CUB-200-2011</a><a href="http://vision.stanford.edu/aditya86/ImageNetDogs/" target="_blank">Stanford Dog</a><a href="http://www.robots.ox.ac.uk/~vgg/data/flowers/" target="_blank">Oxford-flowers</a>等。其中ImageNet数据集规模相对较大,如<a href="#模型概览">模型概览</a>一章所讲,大量研究成果基于ImageNet。ImageNet数据从2010年来稍有变化,常用的是ImageNet-2012数据集,该数据集包含1000个类别:训练集包含1,281,167张图片,每个类别数据732至1300张不等,验证集包含50,000张图片,平均每个类别50张图片。</p><p data-anchor-id="28sa">由于ImageNet数据集较大,下载和训练较慢,为了方便大家学习,我们使用<a href="https://www.cs.toronto.edu/~kriz/cifar.html" target="_blank">CIFAR10</a>数据集。CIFAR10数据集包含60,000张32x32的彩色图片,10个类别,每个类包含6,000张。其中50,000张图片作为训练集,10000张作为测试集。图11从每个类别中随机抽取了10张图片,展示了所有的类别。</p><p align="center" data-anchor-id="02tp">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/image_classification/image/cifar.png" width="350"><br>
图11. CIFAR10数据集[21]
</p><p data-anchor-id="ustq">下面命令用于下载数据和基于训练集计算图像均值,在网络输入前,基于该均值对输入数据做预处理。</p><pre data-anchor-id="wtms"><code>./data/get_data.sh
</code></pre><div class="md-section-divider"></div><h3 data-anchor-id="ioul" id="数据提供给paddlepaddle">数据提供给PaddlePaddle</h3><p data-anchor-id="u280">我们使用Python接口传递数据给系统,下面 <code>dataprovider.py</code> 针对CIFAR10数据给出了完整示例。</p><ul data-anchor-id="ptpg">
<li><p><code>initializer</code> 函数进行dataprovider的初始化,这里加载图像的均值,定义了输入image和label两个字段的类型。</p></li>
<li><p><code>process</code> 函数将数据逐条传输给系统,在图像分类任务里,可以在该函数中完成数据扰动操作,再传输给PaddlePaddle。这里对训练集做随机左右翻转,并将原始图片减去均值后传输给系统。</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"><span class="kwd">import</span><span class="pln"> numpy </span><span class="kwd">as</span><span class="pln"> np</span></code></li><li class="L1"><code class="language-python"><span class="kwd">import</span><span class="pln"> cPickle</span></code></li><li class="L2"><code class="language-python"><span class="kwd">from</span><span class="pln"> paddle</span><span class="pun">.</span><span class="pln">trainer</span><span class="pun">.</span><span class="typ">PyDataProvider2</span><span class="pln"> </span><span class="kwd">import</span><span class="pln"> </span><span class="pun">*</span></code></li><li class="L3"><code class="language-python"></code></li><li class="L4"><code class="language-python"><span class="kwd">def</span><span class="pln"> initializer</span><span class="pun">(</span><span class="pln">settings</span><span class="pun">,</span><span class="pln"> mean_path</span><span class="pun">,</span><span class="pln"> is_train</span><span class="pun">,</span><span class="pln"> </span><span class="pun">**</span><span class="pln">kwargs</span><span class="pun">):</span></code></li><li class="L5"><code class="language-python"><span class="pln"> settings</span><span class="pun">.</span><span class="pln">is_train </span><span class="pun">=</span><span class="pln"> is_train</span></code></li><li class="L6"><code class="language-python"><span class="pln"> settings</span><span class="pun">.</span><span class="pln">input_size </span><span class="pun">=</span><span class="pln"> </span><span class="lit">3</span><span class="pln"> </span><span class="pun">*</span><span class="pln"> </span><span class="lit">32</span><span class="pln"> </span><span class="pun">*</span><span class="pln"> </span><span class="lit">32</span></code></li><li class="L7"><code class="language-python"><span class="pln"> settings</span><span class="pun">.</span><span class="pln">mean </span><span class="pun">=</span><span class="pln"> np</span><span class="pun">.</span><span class="pln">load</span><span class="pun">(</span><span class="pln">mean_path</span><span class="pun">)[</span><span class="str">'mean'</span><span class="pun">]</span></code></li><li class="L8"><code class="language-python"><span class="pln"> settings</span><span class="pun">.</span><span class="pln">input_types </span><span class="pun">=</span><span class="pln"> </span><span class="pun">{</span></code></li><li class="L9"><code class="language-python"><span class="pln"> </span><span class="str">'image'</span><span class="pun">:</span><span class="pln"> dense_vector</span><span class="pun">(</span><span class="pln">settings</span><span class="pun">.</span><span class="pln">input_size</span><span class="pun">),</span></code></li><li class="L0"><code class="language-python"><span class="pln"> </span><span class="str">'label'</span><span class="pun">:</span><span class="pln"> integer_value</span><span class="pun">(</span><span class="lit">10</span><span class="pun">)</span></code></li><li class="L1"><code class="language-python"><span class="pln"> </span><span class="pun">}</span></code></li><li class="L2"><code class="language-python"></code></li><li class="L3"><code class="language-python"></code></li><li class="L4"><code class="language-python"><span class="lit">@provider</span><span class="pun">(</span><span class="pln">init_hook</span><span class="pun">=</span><span class="pln">initializer</span><span class="pun">,</span><span class="pln"> cache</span><span class="pun">=</span><span class="typ">CacheType</span><span class="pun">.</span><span class="pln">CACHE_PASS_IN_MEM</span><span class="pun">)</span></code></li><li class="L5"><code class="language-python"><span class="kwd">def</span><span class="pln"> process</span><span class="pun">(</span><span class="pln">settings</span><span class="pun">,</span><span class="pln"> file_list</span><span class="pun">):</span></code></li><li class="L6"><code class="language-python"><span class="pln"> </span><span class="kwd">with</span><span class="pln"> open</span><span class="pun">(</span><span class="pln">file_list</span><span class="pun">,</span><span class="pln"> </span><span class="str">'r'</span><span class="pun">)</span><span class="pln"> </span><span class="kwd">as</span><span class="pln"> fdata</span><span class="pun">:</span></code></li><li class="L7"><code class="language-python"><span class="pln"> </span><span class="kwd">for</span><span class="pln"> fname </span><span class="kwd">in</span><span class="pln"> fdata</span><span class="pun">:</span></code></li><li class="L8"><code class="language-python"><span class="pln"> fo </span><span class="pun">=</span><span class="pln"> open</span><span class="pun">(</span><span class="pln">fname</span><span class="pun">.</span><span class="pln">strip</span><span class="pun">(),</span><span class="pln"> </span><span class="str">'rb'</span><span class="pun">)</span></code></li><li class="L9"><code class="language-python"><span class="pln"> batch </span><span class="pun">=</span><span class="pln"> cPickle</span><span class="pun">.</span><span class="pln">load</span><span class="pun">(</span><span class="pln">fo</span><span class="pun">)</span></code></li><li class="L0"><code class="language-python"><span class="pln"> fo</span><span class="pun">.</span><span class="pln">close</span><span class="pun">()</span></code></li><li class="L1"><code class="language-python"><span class="pln"> images </span><span class="pun">=</span><span class="pln"> batch</span><span class="pun">[</span><span class="str">'data'</span><span class="pun">]</span></code></li><li class="L2"><code class="language-python"><span class="pln"> labels </span><span class="pun">=</span><span class="pln"> batch</span><span class="pun">[</span><span class="str">'labels'</span><span class="pun">]</span></code></li><li class="L3"><code class="language-python"><span class="pln"> </span><span class="kwd">for</span><span class="pln"> im</span><span class="pun">,</span><span class="pln"> lab </span><span class="kwd">in</span><span class="pln"> zip</span><span class="pun">(</span><span class="pln">images</span><span class="pun">,</span><span class="pln"> labels</span><span class="pun">):</span></code></li><li class="L4"><code class="language-python"><span class="pln"> </span><span class="kwd">if</span><span class="pln"> settings</span><span class="pun">.</span><span class="pln">is_train </span><span class="kwd">and</span><span class="pln"> np</span><span class="pun">.</span><span class="pln">random</span><span class="pun">.</span><span class="pln">randint</span><span class="pun">(</span><span class="lit">2</span><span class="pun">):</span></code></li><li class="L5"><code class="language-python"><span class="pln"> im </span><span class="pun">=</span><span class="pln"> im</span><span class="pun">[:,:,::-</span><span class="lit">1</span><span class="pun">]</span></code></li><li class="L6"><code class="language-python"><span class="pln"> im </span><span class="pun">=</span><span class="pln"> im </span><span class="pun">-</span><span class="pln"> settings</span><span class="pun">.</span><span class="pln">mean</span></code></li><li class="L7"><code class="language-python"><span class="pln"> </span><span class="kwd">yield</span><span class="pln"> </span><span class="pun">{</span></code></li><li class="L8"><code class="language-python"><span class="pln"> </span><span class="str">'image'</span><span class="pun">:</span><span class="pln"> im</span><span class="pun">.</span><span class="pln">astype</span><span class="pun">(</span><span class="str">'float32'</span><span class="pun">),</span></code></li><li class="L9"><code class="language-python"><span class="pln"> </span><span class="str">'label'</span><span class="pun">:</span><span class="pln"> int</span><span class="pun">(</span><span class="pln">lab</span><span class="pun">)</span></code></li><li class="L0"><code class="language-python"><span class="pln"> </span><span class="pun">}</span></code></li></ol></pre></li>
</ul><div class="md-section-divider"></div><h2 data-anchor-id="oc08" id="模型配置说明">模型配置说明</h2><div class="md-section-divider"></div><h3 data-anchor-id="45ek" id="数据定义">数据定义</h3><p data-anchor-id="zs50">在模型配置中,定义通过 <code>define_py_data_sources2</code> 函数从 dataprovider 中读入数据, 其中 args 指定均值文件的路径。如果该配置文件用于预测,则不需要数据定义部分。</p><pre data-anchor-id="gg54"><code>from paddle.trainer_config_helpers import *
is_predict = get_config_arg("is_predict", bool, False)
if not is_predict:
define_py_data_sources2(
train_list='data/train.list',
test_list='data/test.list',
module='dataprovider',
obj='process',
args={'mean_path': 'data/mean.meta'})
</code></pre><div class="md-section-divider"></div><h3 data-anchor-id="rwn5" id="算法配置">算法配置</h3><p data-anchor-id="01cy">在模型配置中,通过 <code>settings</code> 设置训练使用的优化算法,并指定batch size 、初始学习率、momentum以及L2正则。</p><pre data-anchor-id="20mk"><code>settings(
batch_size=128,
learning_rate=0.1 / 128.0,
learning_rate_decay_a=0.1,
learning_rate_decay_b=50000 * 100,
learning_rate_schedule='discexp',
learning_method=MomentumOptimizer(0.9),
regularization=L2Regularization(0.0005 * 128),)
</code></pre><p data-anchor-id="tmhm">通过 <code>learning_rate_decay_a</code> (简写<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-1-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -462.0516853480245 529.5 493.10337069604896" style="width: 1.274ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-61"></use></g></svg></span><script type="math/tex" id="MathJax-Element-1">a</script>) 、<code>learning_rate_decay_b</code> (简写<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-2-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 429.5 747.103370696049" style="width: 1.042ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62"></use></g></svg></span><script type="math/tex" id="MathJax-Element-2">b</script>) 和 <code>learning_rate_schedule</code> 指定学习率调整策略,这里采用离散指数的方式调节学习率,计算公式如下, <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-3-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 600.5 496.10337069604896" style="width: 1.39ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E"></use></g></svg></span><script type="math/tex" id="MathJax-Element-3">n</script> 代表已经处理过的累计总样本数,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-4-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -716.0516853480245 1203.906943983867 902.3668266633396" style="width: 2.78ex; height: 2.085ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6C"></use><g transform="translate(298,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-72"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-30" x="638" y="-213"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-4">lr_{0}</script> 即为 <code>settings</code> 里设置的 <code>learning_rate</code></p><div class="md-section-divider"></div><p data-anchor-id="6aml"><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-5-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -1016.2068841897235 6151.274872458707 1202.5220255050385" style="width: 14.247ex; height: 2.78ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6C"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-72" x="298" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="1027" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6C" x="2084" y="0"></use><g transform="translate(2382,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-72"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-30" x="638" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2217" x="3510" y="0"></use><g transform="translate(4232,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-61"></use><g transform="translate(529,437)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-230A"></use><g transform="translate(434,0)"><rect stroke="none" width="420" height="60" x="0" y="146"></rect><use transform="scale(0.5000000000000001)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="119" y="672"></use><use transform="scale(0.5000000000000001)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62" x="205" y="-649"></use></g><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-230B" x="1378" y="0"></use></g></g></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-5"> lr = lr_{0} * a^ {\lfloor \frac{n}{ b}\rfloor} </script></p><div class="md-section-divider"></div><h3 data-anchor-id="gykl" id="模型结构">模型结构</h3><p data-anchor-id="jvuq">本教程中我们提供了VGG和ResNet两个模型的配置。</p><div class="md-section-divider"></div><h4 data-anchor-id="lptu" id="vgg-1">VGG</h4><p data-anchor-id="bvtv">首先介绍VGG模型结构,由于CIFAR10图片大小和数量相比ImageNet数据小很多,因此这里的模型针对CIFAR10数据做了一定的适配。卷积部分引入了BN和Dropout操作。</p><ol data-anchor-id="elv2">
<li><p>定义数据输入及其维度</p>
<p>网络输入定义为 <code>data_layer</code> (数据层),在图像分类中即为图像像素信息。CIFRAR10是RGB 3通道32x32大小的彩色图,因此输入数据大小为3072(3x32x32),类别大小为10,即10分类。</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"></code></li><li class="L1"><code class="language-python"><span class="pln">datadim </span><span class="pun">=</span><span class="pln"> </span><span class="lit">3</span><span class="pln"> </span><span class="pun">*</span><span class="pln"> </span><span class="lit">32</span><span class="pln"> </span><span class="pun">*</span><span class="pln"> </span><span class="lit">32</span></code></li><li class="L2"><code class="language-python"><span class="pln">classdim </span><span class="pun">=</span><span class="pln"> </span><span class="lit">10</span></code></li><li class="L3"><code class="language-python"><span class="pln">data </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="pln">name</span><span class="pun">=</span><span class="str">'image'</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">datadim</span><span class="pun">)</span></code></li></ol></pre></li>
<li><p>定义VGG网络核心模块</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">net </span><span class="pun">=</span><span class="pln"> vgg_bn_drop</span><span class="pun">(</span><span class="pln">data</span><span class="pun">)</span></code></li></ol></pre>
<p>VGG核心模块的输入是数据层,<code>vgg_bn_drop</code> 定义了16层VGG结构,每层卷积后面引入BN层和Dropout层,详细的定义如下:</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"><span class="kwd">def</span><span class="pln"> vgg_bn_drop</span><span class="pun">(</span><span class="pln">input</span><span class="pun">,</span><span class="pln"> num_channels</span><span class="pun">):</span></code></li><li class="L1"><code class="language-python"><span class="pln"> </span><span class="kwd">def</span><span class="pln"> conv_block</span><span class="pun">(</span><span class="pln">ipt</span><span class="pun">,</span><span class="pln"> num_filter</span><span class="pun">,</span><span class="pln"> groups</span><span class="pun">,</span><span class="pln"> dropouts</span><span class="pun">,</span><span class="pln"> num_channels_</span><span class="pun">=</span><span class="kwd">None</span><span class="pun">):</span></code></li><li class="L2"><code class="language-python"><span class="pln"> </span><span class="kwd">return</span><span class="pln"> img_conv_group</span><span class="pun">(</span></code></li><li class="L3"><code class="language-python"><span class="pln"> input</span><span class="pun">=</span><span class="pln">ipt</span><span class="pun">,</span></code></li><li class="L4"><code class="language-python"><span class="pln"> num_channels</span><span class="pun">=</span><span class="pln">num_channels_</span><span class="pun">,</span></code></li><li class="L5"><code class="language-python"><span class="pln"> pool_size</span><span class="pun">=</span><span class="lit">2</span><span class="pun">,</span></code></li><li class="L6"><code class="language-python"><span class="pln"> pool_stride</span><span class="pun">=</span><span class="lit">2</span><span class="pun">,</span></code></li><li class="L7"><code class="language-python"><span class="pln"> conv_num_filter</span><span class="pun">=[</span><span class="pln">num_filter</span><span class="pun">]</span><span class="pln"> </span><span class="pun">*</span><span class="pln"> groups</span><span class="pun">,</span></code></li><li class="L8"><code class="language-python"><span class="pln"> conv_filter_size</span><span class="pun">=</span><span class="lit">3</span><span class="pun">,</span></code></li><li class="L9"><code class="language-python"><span class="pln"> conv_act</span><span class="pun">=</span><span class="typ">ReluActivation</span><span class="pun">(),</span></code></li><li class="L0"><code class="language-python"><span class="pln"> conv_with_batchnorm</span><span class="pun">=</span><span class="kwd">True</span><span class="pun">,</span></code></li><li class="L1"><code class="language-python"><span class="pln"> conv_batchnorm_drop_rate</span><span class="pun">=</span><span class="pln">dropouts</span><span class="pun">,</span></code></li><li class="L2"><code class="language-python"><span class="pln"> pool_type</span><span class="pun">=</span><span class="typ">MaxPooling</span><span class="pun">())</span></code></li><li class="L3"><code class="language-python"></code></li><li class="L4"><code class="language-python"><span class="pln"> conv1 </span><span class="pun">=</span><span class="pln"> conv_block</span><span class="pun">(</span><span class="pln">input</span><span class="pun">,</span><span class="pln"> </span><span class="lit">64</span><span class="pun">,</span><span class="pln"> </span><span class="lit">2</span><span class="pun">,</span><span class="pln"> </span><span class="pun">[</span><span class="lit">0.3</span><span class="pun">,</span><span class="pln"> </span><span class="lit">0</span><span class="pun">],</span><span class="pln"> </span><span class="lit">3</span><span class="pun">)</span></code></li><li class="L5"><code class="language-python"><span class="pln"> conv2 </span><span class="pun">=</span><span class="pln"> conv_block</span><span class="pun">(</span><span class="pln">conv1</span><span class="pun">,</span><span class="pln"> </span><span class="lit">128</span><span class="pun">,</span><span class="pln"> </span><span class="lit">2</span><span class="pun">,</span><span class="pln"> </span><span class="pun">[</span><span class="lit">0.4</span><span class="pun">,</span><span class="pln"> </span><span class="lit">0</span><span class="pun">])</span></code></li><li class="L6"><code class="language-python"><span class="pln"> conv3 </span><span class="pun">=</span><span class="pln"> conv_block</span><span class="pun">(</span><span class="pln">conv2</span><span class="pun">,</span><span class="pln"> </span><span class="lit">256</span><span class="pun">,</span><span class="pln"> </span><span class="lit">3</span><span class="pun">,</span><span class="pln"> </span><span class="pun">[</span><span class="lit">0.4</span><span class="pun">,</span><span class="pln"> </span><span class="lit">0.4</span><span class="pun">,</span><span class="pln"> </span><span class="lit">0</span><span class="pun">])</span></code></li><li class="L7"><code class="language-python"><span class="pln"> conv4 </span><span class="pun">=</span><span class="pln"> conv_block</span><span class="pun">(</span><span class="pln">conv3</span><span class="pun">,</span><span class="pln"> </span><span class="lit">512</span><span class="pun">,</span><span class="pln"> </span><span class="lit">3</span><span class="pun">,</span><span class="pln"> </span><span class="pun">[</span><span class="lit">0.4</span><span class="pun">,</span><span class="pln"> </span><span class="lit">0.4</span><span class="pun">,</span><span class="pln"> </span><span class="lit">0</span><span class="pun">])</span></code></li><li class="L8"><code class="language-python"><span class="pln"> conv5 </span><span class="pun">=</span><span class="pln"> conv_block</span><span class="pun">(</span><span class="pln">conv4</span><span class="pun">,</span><span class="pln"> </span><span class="lit">512</span><span class="pun">,</span><span class="pln"> </span><span class="lit">3</span><span class="pun">,</span><span class="pln"> </span><span class="pun">[</span><span class="lit">0.4</span><span class="pun">,</span><span class="pln"> </span><span class="lit">0.4</span><span class="pun">,</span><span class="pln"> </span><span class="lit">0</span><span class="pun">])</span></code></li><li class="L9"><code class="language-python"></code></li><li class="L0"><code class="language-python"><span class="pln"> drop </span><span class="pun">=</span><span class="pln"> dropout_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">conv5</span><span class="pun">,</span><span class="pln"> dropout_rate</span><span class="pun">=</span><span class="lit">0.5</span><span class="pun">)</span></code></li><li class="L1"><code class="language-python"><span class="pln"> fc1 </span><span class="pun">=</span><span class="pln"> fc_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">drop</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="lit">512</span><span class="pun">,</span><span class="pln"> act</span><span class="pun">=</span><span class="typ">LinearActivation</span><span class="pun">())</span></code></li><li class="L2"><code class="language-python"><span class="pln"> bn </span><span class="pun">=</span><span class="pln"> batch_norm_layer</span><span class="pun">(</span></code></li><li class="L3"><code class="language-python"><span class="pln"> input</span><span class="pun">=</span><span class="pln">fc1</span><span class="pun">,</span><span class="pln"> act</span><span class="pun">=</span><span class="typ">ReluActivation</span><span class="pun">(),</span><span class="pln"> layer_attr</span><span class="pun">=</span><span class="typ">ExtraAttr</span><span class="pun">(</span><span class="pln">drop_rate</span><span class="pun">=</span><span class="lit">0.5</span><span class="pun">))</span></code></li><li class="L4"><code class="language-python"><span class="pln"> fc2 </span><span class="pun">=</span><span class="pln"> fc_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">bn</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="lit">512</span><span class="pun">,</span><span class="pln"> act</span><span class="pun">=</span><span class="typ">LinearActivation</span><span class="pun">())</span></code></li><li class="L5"><code class="language-python"><span class="pln"> </span><span class="kwd">return</span><span class="pln"> fc2</span></code></li></ol></pre>
<p>2.1. 首先定义了一组卷积网络,即conv_block。卷积核大小为3x3,池化窗口大小为2x2,窗口滑动大小为2,groups决定每组VGG模块是几次连续的卷积操作,dropouts指定Dropout操作的概率。所使用的<code>img_conv_group</code>是在<code>paddle.trainer_config_helpers</code>中预定义的模块,由若干组 <code>Conv-&gt;BN-&gt;ReLu-&gt;Dropout</code> 和 一组 <code>Pooling</code> 组成,</p>
<p>2.2. 五组卷积操作,即 5个conv_block。 第一、二组采用两次连续的卷积操作。第三、四、五组采用三次连续的卷积操作。每组最后一个卷积后面Dropout概率为0,即不使用Dropout操作。</p>
<p>2.3. 最后接两层512维的全连接。</p></li>
<li><p>定义分类器</p>
<p>通过上面VGG网络提取高层特征,然后经过全连接层映射到类别维度大小的向量,再通过Softmax归一化得到每个类别的概率,也可称作分类器。</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">out </span><span class="pun">=</span><span class="pln"> fc_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">net</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">class_num</span><span class="pun">,</span><span class="pln"> act</span><span class="pun">=</span><span class="typ">SoftmaxActivation</span><span class="pun">())</span></code></li></ol></pre></li>
<li><p>定义损失函数和网络输出</p>
<p>在有监督训练中需要输入图像对应的类别信息,同样通过<code>data_layer</code>来定义。训练中采用多类交叉熵作为损失函数,并作为网络的输出,预测阶段定义网络的输出为分类器得到的概率信息。</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"><span class="kwd">if</span><span class="pln"> </span><span class="kwd">not</span><span class="pln"> is_predict</span><span class="pun">:</span></code></li><li class="L1"><code class="language-python"><span class="pln"> lbl </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="pln">name</span><span class="pun">=</span><span class="str">"label"</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">class_num</span><span class="pun">)</span></code></li><li class="L2"><code class="language-python"><span class="pln"> cost </span><span class="pun">=</span><span class="pln"> classification_cost</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">out</span><span class="pun">,</span><span class="pln"> label</span><span class="pun">=</span><span class="pln">lbl</span><span class="pun">)</span></code></li><li class="L3"><code class="language-python"><span class="pln"> outputs</span><span class="pun">(</span><span class="pln">cost</span><span class="pun">)</span></code></li><li class="L4"><code class="language-python"><span class="kwd">else</span><span class="pun">:</span></code></li><li class="L5"><code class="language-python"><span class="pln"> outputs</span><span class="pun">(</span><span class="pln">out</span><span class="pun">)</span></code></li></ol></pre></li>
</ol><div class="md-section-divider"></div><h3 data-anchor-id="svv5" id="resnet-1">ResNet</h3><p data-anchor-id="g96g">ResNet模型的第1、3、4步和VGG模型相同,这里不再介绍。主要介绍第2步即CIFAR10数据集上ResNet核心模块。</p><pre data-anchor-id="2ywh"><code>net = resnet_cifar10(data, depth=56)
</code></pre><p data-anchor-id="zw4f">先介绍<code>resnet_cifar10</code>中的一些基本函数,再介绍网络连接过程。</p><ul data-anchor-id="4wcj">
<li><code>conv_bn_layer</code> : 带BN的卷积层。</li>
<li><code>shortcut</code> : 残差模块的"直连"路径,"直连"实际分两种形式:残差模块输入和输出特征通道数不等时,采用1x1卷积的升维操作;残差模块输入和输出通道相等时,采用直连操作。</li>
<li><code>basicblock</code> : 一个基础残差模块,即图9左边所示,由两组3x3卷积组成的路径和一条"直连"路径组成。</li>
<li><code>bottleneck</code> : 一个瓶颈残差模块,即图9右边所示,由上下1x1卷积和中间3x3卷积组成的路径和一条"直连"路径组成。</li>
<li><p><code>layer_warp</code> : 一组残差模块,由若干个残差模块堆积而成。每组中第一个残差模块滑动窗口大小与其他可以不同,以用来减少特征图在垂直和水平方向的大小。</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"><span class="kwd">def</span><span class="pln"> conv_bn_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">,</span></code></li><li class="L1"><code class="language-python"><span class="pln"> ch_out</span><span class="pun">,</span></code></li><li class="L2"><code class="language-python"><span class="pln"> filter_size</span><span class="pun">,</span></code></li><li class="L3"><code class="language-python"><span class="pln"> stride</span><span class="pun">,</span></code></li><li class="L4"><code class="language-python"><span class="pln"> padding</span><span class="pun">,</span></code></li><li class="L5"><code class="language-python"><span class="pln"> active_type</span><span class="pun">=</span><span class="typ">ReluActivation</span><span class="pun">(),</span></code></li><li class="L6"><code class="language-python"><span class="pln"> ch_in</span><span class="pun">=</span><span class="kwd">None</span><span class="pun">):</span></code></li><li class="L7"><code class="language-python"><span class="pln"> tmp </span><span class="pun">=</span><span class="pln"> img_conv_layer</span><span class="pun">(</span></code></li><li class="L8"><code class="language-python"><span class="pln"> input</span><span class="pun">=</span><span class="pln">input</span><span class="pun">,</span></code></li><li class="L9"><code class="language-python"><span class="pln"> filter_size</span><span class="pun">=</span><span class="pln">filter_size</span><span class="pun">,</span></code></li><li class="L0"><code class="language-python"><span class="pln"> num_channels</span><span class="pun">=</span><span class="pln">ch_in</span><span class="pun">,</span></code></li><li class="L1"><code class="language-python"><span class="pln"> num_filters</span><span class="pun">=</span><span class="pln">ch_out</span><span class="pun">,</span></code></li><li class="L2"><code class="language-python"><span class="pln"> stride</span><span class="pun">=</span><span class="pln">stride</span><span class="pun">,</span></code></li><li class="L3"><code class="language-python"><span class="pln"> padding</span><span class="pun">=</span><span class="pln">padding</span><span class="pun">,</span></code></li><li class="L4"><code class="language-python"><span class="pln"> act</span><span class="pun">=</span><span class="typ">LinearActivation</span><span class="pun">(),</span></code></li><li class="L5"><code class="language-python"><span class="pln"> bias_attr</span><span class="pun">=</span><span class="kwd">False</span><span class="pun">)</span></code></li><li class="L6"><code class="language-python"><span class="pln"> </span><span class="kwd">return</span><span class="pln"> batch_norm_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">tmp</span><span class="pun">,</span><span class="pln"> act</span><span class="pun">=</span><span class="pln">active_type</span><span class="pun">)</span></code></li><li class="L7"><code class="language-python"></code></li><li class="L8"><code class="language-python"><span class="kwd">def</span><span class="pln"> shortcut</span><span class="pun">(</span><span class="pln">ipt</span><span class="pun">,</span><span class="pln"> n_in</span><span class="pun">,</span><span class="pln"> n_out</span><span class="pun">,</span><span class="pln"> stride</span><span class="pun">):</span></code></li><li class="L9"><code class="language-python"><span class="pln"> </span><span class="kwd">if</span><span class="pln"> n_in </span><span class="pun">!=</span><span class="pln"> n_out</span><span class="pun">:</span></code></li><li class="L0"><code class="language-python"><span class="pln"> </span><span class="kwd">return</span><span class="pln"> conv_bn_layer</span><span class="pun">(</span><span class="pln">ipt</span><span class="pun">,</span><span class="pln"> n_out</span><span class="pun">,</span><span class="pln"> </span><span class="lit">1</span><span class="pun">,</span><span class="pln"> stride</span><span class="pun">,</span><span class="pln"> </span><span class="lit">0</span><span class="pun">,</span><span class="pln"> </span><span class="typ">LinearActivation</span><span class="pun">())</span></code></li><li class="L1"><code class="language-python"><span class="pln"> </span><span class="kwd">else</span><span class="pun">:</span></code></li><li class="L2"><code class="language-python"><span class="pln"> </span><span class="kwd">return</span><span class="pln"> ipt</span></code></li><li class="L3"><code class="language-python"></code></li><li class="L4"><code class="language-python"><span class="kwd">def</span><span class="pln"> basicblock</span><span class="pun">(</span><span class="pln">ipt</span><span class="pun">,</span><span class="pln"> ch_out</span><span class="pun">,</span><span class="pln"> stride</span><span class="pun">):</span></code></li><li class="L5"><code class="language-python"><span class="pln"> ch_in </span><span class="pun">=</span><span class="pln"> ipt</span><span class="pun">.</span><span class="pln">num_filters</span></code></li><li class="L6"><code class="language-python"><span class="pln"> tmp </span><span class="pun">=</span><span class="pln"> conv_bn_layer</span><span class="pun">(</span><span class="pln">ipt</span><span class="pun">,</span><span class="pln"> ch_out</span><span class="pun">,</span><span class="pln"> </span><span class="lit">3</span><span class="pun">,</span><span class="pln"> stride</span><span class="pun">,</span><span class="pln"> </span><span class="lit">1</span><span class="pun">)</span></code></li><li class="L7"><code class="language-python"><span class="pln"> tmp </span><span class="pun">=</span><span class="pln"> conv_bn_layer</span><span class="pun">(</span><span class="pln">tmp</span><span class="pun">,</span><span class="pln"> ch_out</span><span class="pun">,</span><span class="pln"> </span><span class="lit">3</span><span class="pun">,</span><span class="pln"> </span><span class="lit">1</span><span class="pun">,</span><span class="pln"> </span><span class="lit">1</span><span class="pun">,</span><span class="pln"> </span><span class="typ">LinearActivation</span><span class="pun">())</span></code></li><li class="L8"><code class="language-python"><span class="pln"> short </span><span class="pun">=</span><span class="pln"> shortcut</span><span class="pun">(</span><span class="pln">ipt</span><span class="pun">,</span><span class="pln"> ch_in</span><span class="pun">,</span><span class="pln"> ch_out</span><span class="pun">,</span><span class="pln"> stride</span><span class="pun">)</span></code></li><li class="L9"><code class="language-python"><span class="pln"> </span><span class="kwd">return</span><span class="pln"> addto_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=[</span><span class="pln">ipt</span><span class="pun">,</span><span class="pln"> short</span><span class="pun">],</span><span class="pln"> act</span><span class="pun">=</span><span class="typ">ReluActivation</span><span class="pun">())</span></code></li><li class="L0"><code class="language-python"></code></li><li class="L1"><code class="language-python"><span class="kwd">def</span><span class="pln"> bottleneck</span><span class="pun">(</span><span class="pln">ipt</span><span class="pun">,</span><span class="pln"> ch_out</span><span class="pun">,</span><span class="pln"> stride</span><span class="pun">):</span></code></li><li class="L2"><code class="language-python"><span class="pln"> ch_in </span><span class="pun">=</span><span class="pln"> ipt</span><span class="pun">.</span><span class="pln">num_filter</span></code></li><li class="L3"><code class="language-python"><span class="pln"> tmp </span><span class="pun">=</span><span class="pln"> conv_bn_layer</span><span class="pun">(</span><span class="pln">ipt</span><span class="pun">,</span><span class="pln"> ch_out</span><span class="pun">,</span><span class="pln"> </span><span class="lit">1</span><span class="pun">,</span><span class="pln"> stride</span><span class="pun">,</span><span class="pln"> </span><span class="lit">0</span><span class="pun">)</span></code></li><li class="L4"><code class="language-python"><span class="pln"> tmp </span><span class="pun">=</span><span class="pln"> conv_bn_layer</span><span class="pun">(</span><span class="pln">tmp</span><span class="pun">,</span><span class="pln"> ch_out</span><span class="pun">,</span><span class="pln"> </span><span class="lit">3</span><span class="pun">,</span><span class="pln"> </span><span class="lit">1</span><span class="pun">,</span><span class="pln"> </span><span class="lit">1</span><span class="pun">)</span></code></li><li class="L5"><code class="language-python"><span class="pln"> tmp </span><span class="pun">=</span><span class="pln"> conv_bn_layer</span><span class="pun">(</span><span class="pln">tmp</span><span class="pun">,</span><span class="pln"> ch_out </span><span class="pun">*</span><span class="pln"> </span><span class="lit">4</span><span class="pun">,</span><span class="pln"> </span><span class="lit">1</span><span class="pun">,</span><span class="pln"> </span><span class="lit">1</span><span class="pun">,</span><span class="pln"> </span><span class="lit">0</span><span class="pun">,</span><span class="pln"> </span><span class="typ">LinearActivation</span><span class="pun">())</span></code></li><li class="L6"><code class="language-python"><span class="pln"> short </span><span class="pun">=</span><span class="pln"> shortcut</span><span class="pun">(</span><span class="pln">ipt</span><span class="pun">,</span><span class="pln"> ch_in</span><span class="pun">,</span><span class="pln"> ch_out</span><span class="pun">,</span><span class="pln"> stride</span><span class="pun">)</span></code></li><li class="L7"><code class="language-python"><span class="pln"> </span><span class="kwd">return</span><span class="pln"> addto_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=[</span><span class="pln">ipt</span><span class="pun">,</span><span class="pln"> short</span><span class="pun">],</span><span class="pln"> act</span><span class="pun">=</span><span class="typ">ReluActivation</span><span class="pun">())</span></code></li><li class="L8"><code class="language-python"></code></li><li class="L9"><code class="language-python"><span class="kwd">def</span><span class="pln"> layer_warp</span><span class="pun">(</span><span class="pln">block_func</span><span class="pun">,</span><span class="pln"> ipt</span><span class="pun">,</span><span class="pln"> features</span><span class="pun">,</span><span class="pln"> count</span><span class="pun">,</span><span class="pln"> stride</span><span class="pun">):</span></code></li><li class="L0"><code class="language-python"><span class="pln"> tmp </span><span class="pun">=</span><span class="pln"> block_func</span><span class="pun">(</span><span class="pln">ipt</span><span class="pun">,</span><span class="pln"> features</span><span class="pun">,</span><span class="pln"> stride</span><span class="pun">)</span></code></li><li class="L1"><code class="language-python"><span class="pln"> </span><span class="kwd">for</span><span class="pln"> i </span><span class="kwd">in</span><span class="pln"> range</span><span class="pun">(</span><span class="lit">1</span><span class="pun">,</span><span class="pln"> count</span><span class="pun">):</span></code></li><li class="L2"><code class="language-python"><span class="pln"> tmp </span><span class="pun">=</span><span class="pln"> block_func</span><span class="pun">(</span><span class="pln">tmp</span><span class="pun">,</span><span class="pln"> features</span><span class="pun">,</span><span class="pln"> </span><span class="lit">1</span><span class="pun">)</span></code></li><li class="L3"><code class="language-python"><span class="pln"> </span><span class="kwd">return</span><span class="pln"> tmp</span></code></li></ol></pre></li>
</ul><p data-anchor-id="piog"><code>resnet_cifar10</code> 的连接结构主要有以下几个过程。</p><ol data-anchor-id="s0uh">
<li>底层输入连接一层 <code>conv_bn_layer</code>,即带BN的卷积层。 </li>
<li>然后连接3组残差模块即下面配置3组 <code>layer_warp</code> ,每组采用图 10 左边残差模块组成。</li>
<li><p>最后对网络做均值池化并返回该层。 </p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"><span class="kwd">def</span><span class="pln"> resnet_cifar10</span><span class="pun">(</span><span class="pln">ipt</span><span class="pun">,</span><span class="pln"> depth</span><span class="pun">=</span><span class="lit">56</span><span class="pun">):</span></code></li><li class="L1"><code class="language-python"><span class="pln"> </span><span class="com"># depth should be one of 20, 32, 44, 56, 110, 1202</span></code></li><li class="L2"><code class="language-python"><span class="pln"> </span><span class="kwd">assert</span><span class="pln"> </span><span class="pun">(</span><span class="pln">depth </span><span class="pun">-</span><span class="pln"> </span><span class="lit">2</span><span class="pun">)</span><span class="pln"> </span><span class="pun">%</span><span class="pln"> </span><span class="lit">6</span><span class="pln"> </span><span class="pun">==</span><span class="pln"> </span><span class="lit">0</span></code></li><li class="L3"><code class="language-python"><span class="pln"> n </span><span class="pun">=</span><span class="pln"> </span><span class="pun">(</span><span class="pln">depth </span><span class="pun">-</span><span class="pln"> </span><span class="lit">2</span><span class="pun">)</span><span class="pln"> </span><span class="pun">/</span><span class="pln"> </span><span class="lit">6</span></code></li><li class="L4"><code class="language-python"><span class="pln"> nStages </span><span class="pun">=</span><span class="pln"> </span><span class="pun">{</span><span class="lit">16</span><span class="pun">,</span><span class="pln"> </span><span class="lit">64</span><span class="pun">,</span><span class="pln"> </span><span class="lit">128</span><span class="pun">}</span></code></li><li class="L5"><code class="language-python"><span class="pln"> conv1 </span><span class="pun">=</span><span class="pln"> conv_bn_layer</span><span class="pun">(</span><span class="pln">ipt</span><span class="pun">,</span></code></li><li class="L6"><code class="language-python"><span class="pln"> ch_in</span><span class="pun">=</span><span class="lit">3</span><span class="pun">,</span></code></li><li class="L7"><code class="language-python"><span class="pln"> ch_out</span><span class="pun">=</span><span class="lit">16</span><span class="pun">,</span></code></li><li class="L8"><code class="language-python"><span class="pln"> filter_size</span><span class="pun">=</span><span class="lit">3</span><span class="pun">,</span></code></li><li class="L9"><code class="language-python"><span class="pln"> stride</span><span class="pun">=</span><span class="lit">1</span><span class="pun">,</span></code></li><li class="L0"><code class="language-python"><span class="pln"> padding</span><span class="pun">=</span><span class="lit">1</span><span class="pun">)</span></code></li><li class="L1"><code class="language-python"><span class="pln"> res1 </span><span class="pun">=</span><span class="pln"> layer_warp</span><span class="pun">(</span><span class="pln">basicblock</span><span class="pun">,</span><span class="pln"> conv1</span><span class="pun">,</span><span class="pln"> </span><span class="lit">16</span><span class="pun">,</span><span class="pln"> n</span><span class="pun">,</span><span class="pln"> </span><span class="lit">1</span><span class="pun">)</span></code></li><li class="L2"><code class="language-python"><span class="pln"> res2 </span><span class="pun">=</span><span class="pln"> layer_warp</span><span class="pun">(</span><span class="pln">basicblock</span><span class="pun">,</span><span class="pln"> res1</span><span class="pun">,</span><span class="pln"> </span><span class="lit">32</span><span class="pun">,</span><span class="pln"> n</span><span class="pun">,</span><span class="pln"> </span><span class="lit">2</span><span class="pun">)</span></code></li><li class="L3"><code class="language-python"><span class="pln"> res3 </span><span class="pun">=</span><span class="pln"> layer_warp</span><span class="pun">(</span><span class="pln">basicblock</span><span class="pun">,</span><span class="pln"> res2</span><span class="pun">,</span><span class="pln"> </span><span class="lit">64</span><span class="pun">,</span><span class="pln"> n</span><span class="pun">,</span><span class="pln"> </span><span class="lit">2</span><span class="pun">)</span></code></li><li class="L4"><code class="language-python"><span class="pln"> pool </span><span class="pun">=</span><span class="pln"> img_pool_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">res3</span><span class="pun">,</span></code></li><li class="L5"><code class="language-python"><span class="pln"> pool_size</span><span class="pun">=</span><span class="lit">8</span><span class="pun">,</span></code></li><li class="L6"><code class="language-python"><span class="pln"> stride</span><span class="pun">=</span><span class="lit">1</span><span class="pun">,</span></code></li><li class="L7"><code class="language-python"><span class="pln"> pool_type</span><span class="pun">=</span><span class="typ">AvgPooling</span><span class="pun">())</span></code></li><li class="L8"><code class="language-python"><span class="pln"> </span><span class="kwd">return</span><span class="pln"> pool</span></code></li></ol></pre></li>
</ol><p data-anchor-id="ah15">注意:除过第一层卷积层和最后一层全连接层之外,要求三组 <code>layer_warp</code> 总的含参层数能够被6整除,即 <code>resnet_cifar10</code> 的 depth 要满足 <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-10-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 4933.944444444444 1042.103370696049" style="width: 11.467ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-64" x="389" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65" x="913" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-70" x="1379" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="1883" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68" x="2244" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="3043" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="4043" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="4544" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-10">(depth - 2) % 6 == 0</script></p><div class="md-section-divider"></div><h2 data-anchor-id="na01" id="模型训练">模型训练</h2><p data-anchor-id="okly">执行脚本 train.sh 进行模型训练, 其中指定配置文件、设备类型、线程个数、总共训练的轮数、模型存储路径等。</p><pre data-anchor-id="ef8j"><code>sh train.sh
</code></pre><p data-anchor-id="6k9d">脚本 <code>train.sh</code> 如下:</p><pre data-anchor-id="4002"><code>#cfg=models/resnet.py
cfg=models/vgg.py
output=output
log=train.log
paddle train \
--config=$cfg \
--use_gpu=true \
--trainer_count=1 \
--log_period=100 \
--num_passes=300 \
--save_dir=$output \
2&gt;&amp;1 | tee $log
</code></pre><ul data-anchor-id="2m07">
<li><code>--config=$cfg</code> : 指定配置文件,默认是 <code>models/vgg.py</code></li>
<li><code>--use_gpu=true</code> : 指定使用GPU训练,若使用CPU,设置为false。</li>
<li><code>--trainer_count=1</code> : 指定线程个数或GPU个数。</li>
<li><code>--log_period=100</code> : 指定日志打印的batch间隔。</li>
<li><code>--save_dir=$output</code> : 指定模型存储路径。</li>
</ul><p data-anchor-id="28l0">一轮训练log示例如下所示,经过1个pass, 训练集上平均error为0.79958 ,测试集上平均error为0.7858 。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="d5ea"><ol class="linenums"><li class="L0"><code class="language-text"><span class="typ">TrainerInternal</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">165</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Batch</span><span class="pun">=</span><span class="lit">300</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">38400</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">2.07708</span><span class="pln"> </span><span class="typ">CurrentCost</span><span class="pun">=</span><span class="lit">1.96158</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> classification_error_evaluator</span><span class="pun">=</span><span class="lit">0.81151</span><span class="pln"> </span><span class="typ">CurrentEval</span><span class="pun">:</span><span class="pln"> classification_error_evaluator</span><span class="pun">=</span><span class="lit">0.789297</span></code></li><li class="L1"><code class="language-text"><span class="typ">TrainerInternal</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">181</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Pass</span><span class="pun">=</span><span class="lit">0</span><span class="pln"> </span><span class="typ">Batch</span><span class="pun">=</span><span class="lit">391</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">50000</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">2.03348</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> classification_error_evaluator</span><span class="pun">=</span><span class="lit">0.79958</span></code></li><li class="L2"><code class="language-text"><span class="typ">Tester</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">115</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Test</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">10000</span><span class="pln"> cost</span><span class="pun">=</span><span class="lit">1.99246</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> classification_error_evaluator</span><span class="pun">=</span><span class="lit">0.7858</span></code></li></ol></pre><p align="center" data-anchor-id="07wk">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/image_classification/image/plot.png" width="400"><br>
图12. CIFAR10数据集上VGG模型的分类错误率
</p><div class="md-section-divider"></div><h2 data-anchor-id="1axb" id="模型应用">模型应用</h2><p data-anchor-id="a4xg">在训练完成后,模型会保存在路径 <code>output/pass-%05d</code> 下,例如第300个pass的模型会保存在路径 <code>output/pass-00299</code>。 可以使用脚本 <code>classify.py</code> 对图片进行预测或提取特征,注意该脚本默认使用模型配置为 <code>models/vgg.py</code></p><div class="md-section-divider"></div><h3 data-anchor-id="edho" id="预测">预测</h3><p data-anchor-id="qhar">可以按照下面方式预测图片的类别,默认使用GPU预测,如果使用CPU预测,在后面加参数 <code>-c</code>即可。</p><pre data-anchor-id="am9x"><code>python classify.py --job=predict --model=output/pass-00299 --data=image/dog.png # -c
</code></pre><p data-anchor-id="ihir">预测结果为:</p><pre data-anchor-id="131e"><code>Label of image/dog.png is: 5
</code></pre><div class="md-section-divider"></div><h3 data-anchor-id="zize" id="特征提取">特征提取</h3><p data-anchor-id="jtvl">可以按照下面方式对图片提取特征,和预测使用方式不同的是指定job类型为extract,并需要指定提取的层。<code>classify.py</code> 默认以第一层卷积特征为例提取特征,并画出了类似图13的可视化图。VGG模型的第一层卷积有64个通道,图13展示了每个通道的灰度图。</p><pre data-anchor-id="9eh3"><code>python classify.py --job=extract --model=output/pass-00299 --data=image/dog.png # -c
</code></pre><p align="center" data-anchor-id="ob85">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/image_classification/image/fea_conv0.png" width="500"><br>
图13. 卷积特征可视化图
</p><div class="md-section-divider"></div><h2 data-anchor-id="iawn" id="总结">总结</h2><p data-anchor-id="vz6e">传统图像分类方法由多个阶段构成,框架较为复杂,而端到端的CNN模型结构可一步到位,而且大幅度提升了分类准确率。本文我们首先介绍VGG、GoogleNet、ResNet三个经典的模型;然后基于CIFAR10数据集,介绍如何使用PaddlePaddle配置和训练CNN模型,尤其是VGG和ResNet模型;最后介绍如何使用PaddlePaddle的API接口对图片进行预测和特征提取。对于其他数据集比如ImageNet,配置和训练流程是同样的,大家可以自行进行实验。</p><div class="md-section-divider"></div><h2 data-anchor-id="5u0o" id="参考文献">参考文献</h2><p data-anchor-id="l46l">[1] D. G. Lowe, <a href="http://www.cs.ubc.ca/~lowe/papers/ijcv04.pdf" target="_blank">Distinctive image features from scale-invariant keypoints</a>. IJCV, 60(2):91-110, 2004.</p><p data-anchor-id="9lzt">[2] N. Dalal, B. Triggs, <a href="http://vision.stanford.edu/teaching/cs231b_spring1213/papers/CVPR05_DalalTriggs.pdf" target="_blank">Histograms of Oriented Gradients for Human Detection</a>, Proc. IEEE Conf. Computer Vision and Pattern Recognition, 2005.</p><p data-anchor-id="mdjp">[3] Ahonen, T., Hadid, A., and Pietikinen, M. (2006). <a href="http://ieeexplore.ieee.org/document/1717463/" target="_blank">Face description with local binary patterns: Application to face recognition</a>. PAMI, 28. </p><p data-anchor-id="841z">[4] J. Sivic, A. Zisserman, <a href="http://www.robots.ox.ac.uk/~vgg/publications/papers/sivic03.pdf" target="_blank">Video Google: A Text Retrieval Approach to Object Matching in Videos</a>, Proc. Ninth Int'l Conf. Computer Vision, pp. 1470-1478, 2003.</p><p data-anchor-id="y1r0">[5] B. Olshausen, D. Field, <a href="http://redwood.psych.cornell.edu/papers/olshausen_field_1997.pdf" target="_blank">Sparse Coding with an Overcomplete Basis Set: A Strategy Employed by V1?</a>, Vision Research, vol. 37, pp. 3311-3325, 1997.</p><p data-anchor-id="vlkn">[6] Wang, J., Yang, J., Yu, K., Lv, F., Huang, T., and Gong, Y. (2010). <a href="http://ieeexplore.ieee.org/abstract/document/5540018/" target="_blank">Locality-constrained Linear Coding for image classification</a>. In CVPR.</p><p data-anchor-id="tow3">[7] Perronnin, F., Sánchez, J., &amp; Mensink, T. (2010). <a href="http://dl.acm.org/citation.cfm?id=1888101" target="_blank">Improving the fisher kernel for large-scale image classification</a>. In ECCV (4).</p><p data-anchor-id="mflg">[8] Lin, Y., Lv, F., Cao, L., Zhu, S., Yang, M., Cour, T., Yu, K., and Huang, T. (2011). <a href="http://ieeexplore.ieee.org/document/5995477/" target="_blank">Large-scale image clas- sification: Fast feature extraction and SVM training</a>. In CVPR.</p><p data-anchor-id="cim7">[9] Krizhevsky, A., Sutskever, I., and Hinton, G. (2012). <a href="http://www.cs.toronto.edu/~kriz/imagenet_classification_with_deep_convolutional.pdf" target="_blank">ImageNet classification with deep convolutional neu- ral networks</a>. In NIPS.</p><p data-anchor-id="z1pg">[10] G.E. Hinton, N. Srivastava, A. Krizhevsky, I. Sutskever, and R.R. Salakhutdinov. <a href="https://arxiv.org/abs/1207.0580" target="_blank">Improving neural networks by preventing co-adaptation of feature detectors</a>. arXiv preprint arXiv:1207.0580, 2012.</p><p data-anchor-id="hef7">[11] K. Chatfield, K. Simonyan, A. Vedaldi, A. Zisserman. <a href="https://arxiv.org/abs/1405.3531" target="_blank">Return of the Devil in the Details: Delving Deep into Convolutional Nets</a>. BMVC, 2014。</p><p data-anchor-id="1mm1">[12] Szegedy, C., Liu, W., Jia, Y., Sermanet, P., Reed, S., Anguelov, D., Erhan, D., Vanhoucke, V., Rabinovich, A., <a href="https://arxiv.org/abs/1409.4842" target="_blank">Going deeper with convolutions</a>. In: CVPR. (2015)</p><p data-anchor-id="g962">[13] Lin, M., Chen, Q., and Yan, S. <a href="https://arxiv.org/abs/1312.4400" target="_blank">Network in network</a>. In Proc. ICLR, 2014.</p><p data-anchor-id="xzx2">[14] S. Ioffe and C. Szegedy. <a href="https://arxiv.org/abs/1502.03167" target="_blank">Batch normalization: Accelerating deep network training by reducing internal covariate shift</a>. In ICML, 2015.</p><p data-anchor-id="w8xg">[15] K. He, X. Zhang, S. Ren, J. Sun. <a href="https://arxiv.org/abs/1512.03385" target="_blank">Deep Residual Learning for Image Recognition</a>. CVPR 2016.</p><p data-anchor-id="gwoo">[16] Szegedy, C., Vanhoucke, V., Ioffe, S., Shlens, J., Wojna, Z. <a href="https://arxiv.org/abs/1512.00567" target="_blank">Rethinking the incep-tion architecture for computer vision</a>. In: CVPR. (2016).</p><p data-anchor-id="21y3">[17] Szegedy, C., Ioffe, S., Vanhoucke, V. <a href="https://arxiv.org/abs/1602.07261" target="_blank">Inception-v4, inception-resnet and the impact of residual connections on learning</a>. arXiv:1602.07261 (2016).</p><p data-anchor-id="qr78">[18] Everingham, M., Eslami, S. M. A., Van Gool, L., Williams, C. K. I., Winn, J. and Zisserman, A. The Pascal Visual Object Classes Challenge: A Retrospective. International Journal of Computer Vision, 111(1), 98-136, 2015.</p><p data-anchor-id="4wev">[19] He, K., Zhang, X., Ren, S., and Sun, J. <a href="https://arxiv.org/abs/1502.01852" target="_blank">Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification</a>. ArXiv e-prints, February 2015.</p><p data-anchor-id="sm90">[20] <a href="http://deeplearning.net/tutorial/lenet.html" target="_blank">http://deeplearning.net/tutorial/lenet.html</a></p><p data-anchor-id="vhmp">[21] <a href="https://www.cs.toronto.edu/~kriz/cifar.html" target="_blank">https://www.cs.toronto.edu/~kriz/cifar.html</a></p><p data-anchor-id="d6jv">[22] <a href="http://cs231n.github.io/classification/" target="_blank">http://cs231n.github.io/classification/</a></p><p data-anchor-id="scwx"><br> <br>
<img src="https://i.creativecommons.org/l/by-nc-sa/4.0/88x31.png" alt="知识共享许可协议"></p><p data-anchor-id="nxbu">本教程由<a href="http://book.paddlepaddle.org" target="_blank">PaddlePaddle</a>创作,采用<a href="http://creativecommons.org/licenses/by-nc-sa/4.0/" target="_blank">知识共享 署名-非商业性使用-相同方式共享 4.0 国际 许可协议</a>进行许可。</p></div>
</body>
</html>
......@@ -597,6 +597,7 @@ marked.setOptions({
code = code.replace(/&amp;/g, "&")
code = code.replace(/&gt;/g, ">")
code = code.replace(/&lt;/g, "<")
code = code.replace(/&nbsp;/g, " ")
return hljs.highlightAuto(code, [lang]).value;
}
});
......
......@@ -52,6 +52,7 @@ marked.setOptions({
code = code.replace(/&amp;/g, "&")
code = code.replace(/&gt;/g, ">")
code = code.replace(/&lt;/g, "<")
code = code.replace(/&nbsp;/g, " ")
return hljs.highlightAuto(code, [lang]).value;
}
});
......
......@@ -52,6 +52,7 @@ marked.setOptions({
code = code.replace(/&amp;/g, "&")
code = code.replace(/&gt;/g, ">")
code = code.replace(/&lt;/g, "<")
code = code.replace(/&nbsp;/g, " ")
return hljs.highlightAuto(code, [lang]).value;
}
});
......
<!DOCTYPE html>
<html class="theme theme-white">
<head>
<meta charset="utf-8">
<title>语义角色标注</title>
<link href="https://www.zybuluo.com/static/assets/template-theme-white.css" rel="stylesheet" media="screen">
<style type="text/css">
#wmd-preview h1 {
color: #0077bb; /* 将标题改为蓝色 */
}</style>
</head>
<body class="theme theme-white">
<div style="visibility: hidden; overflow: hidden; position: absolute; top: 0px; height: 1px; width: auto; padding: 0px; border: 0px; margin: 0px; text-align: left; text-indent: 0px; text-transform: none; line-height: normal; letter-spacing: normal; word-spacing: normal;"><div id="MathJax_SVG_Hidden"></div><svg><defs id="MathJax_SVG_glyphs"><path id="MJMATHI-74" stroke-width="1" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path id="MJMAIN-2212" stroke-width="1" d="M84 237T84 250T98 270H679Q694 262 694 250T679 230H98Q84 237 84 250Z"></path><path id="MJMAIN-31" stroke-width="1" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path><path id="MJMATHI-78" stroke-width="1" d="M52 289Q59 331 106 386T222 442Q257 442 286 424T329 379Q371 442 430 442Q467 442 494 420T522 361Q522 332 508 314T481 292T458 288Q439 288 427 299T415 328Q415 374 465 391Q454 404 425 404Q412 404 406 402Q368 386 350 336Q290 115 290 78Q290 50 306 38T341 26Q378 26 414 59T463 140Q466 150 469 151T485 153H489Q504 153 504 145Q504 144 502 134Q486 77 440 33T333 -11Q263 -11 227 52Q186 -10 133 -10H127Q78 -10 57 16T35 71Q35 103 54 123T99 143Q142 143 142 101Q142 81 130 66T107 46T94 41L91 40Q91 39 97 36T113 29T132 26Q168 26 194 71Q203 87 217 139T245 247T261 313Q266 340 266 352Q266 380 251 392T217 404Q177 404 142 372T93 290Q91 281 88 280T72 278H58Q52 284 52 289Z"></path><path id="MJMATHI-50" stroke-width="1" d="M287 628Q287 635 230 637Q206 637 199 638T192 648Q192 649 194 659Q200 679 203 681T397 683Q587 682 600 680Q664 669 707 631T751 530Q751 453 685 389Q616 321 507 303Q500 302 402 301H307L277 182Q247 66 247 59Q247 55 248 54T255 50T272 48T305 46H336Q342 37 342 35Q342 19 335 5Q330 0 319 0Q316 0 282 1T182 2Q120 2 87 2T51 1Q33 1 33 11Q33 13 36 25Q40 41 44 43T67 46Q94 46 127 49Q141 52 146 61Q149 65 218 339T287 628ZM645 554Q645 567 643 575T634 597T609 619T560 635Q553 636 480 637Q463 637 445 637T416 636T404 636Q391 635 386 627Q384 621 367 550T332 412T314 344Q314 342 395 342H407H430Q542 342 590 392Q617 419 631 471T645 554Z"></path><path id="MJMAIN-28" stroke-width="1" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path id="MJMATHI-58" stroke-width="1" d="M42 0H40Q26 0 26 11Q26 15 29 27Q33 41 36 43T55 46Q141 49 190 98Q200 108 306 224T411 342Q302 620 297 625Q288 636 234 637H206Q200 643 200 645T202 664Q206 677 212 683H226Q260 681 347 681Q380 681 408 681T453 682T473 682Q490 682 490 671Q490 670 488 658Q484 643 481 640T465 637Q434 634 411 620L488 426L541 485Q646 598 646 610Q646 628 622 635Q617 635 609 637Q594 637 594 648Q594 650 596 664Q600 677 606 683H618Q619 683 643 683T697 681T738 680Q828 680 837 683H845Q852 676 852 672Q850 647 840 637H824Q790 636 763 628T722 611T698 593L687 584Q687 585 592 480L505 384Q505 383 536 304T601 142T638 56Q648 47 699 46Q734 46 734 37Q734 35 732 23Q728 7 725 4T711 1Q708 1 678 1T589 2Q528 2 496 2T461 1Q444 1 444 10Q444 11 446 25Q448 35 450 39T455 44T464 46T480 47T506 54Q523 62 523 64Q522 64 476 181L429 299Q241 95 236 84Q232 76 232 72Q232 53 261 47Q262 47 267 47T273 46Q276 46 277 46T280 45T283 42T284 35Q284 26 282 19Q279 6 276 4T261 1Q258 1 243 1T201 2T142 2Q64 2 42 0Z"></path><path id="MJMAIN-7C" stroke-width="1" d="M139 -249H137Q125 -249 119 -235V251L120 737Q130 750 139 750Q152 750 159 735V-235Q151 -249 141 -249H139Z"></path><path id="MJMATHI-59" stroke-width="1" d="M66 637Q54 637 49 637T39 638T32 641T30 647T33 664T42 682Q44 683 56 683Q104 680 165 680Q288 680 306 683H316Q322 677 322 674T320 656Q316 643 310 637H298Q242 637 242 624Q242 619 292 477T343 333L346 336Q350 340 358 349T379 373T411 410T454 461Q546 568 561 587T577 618Q577 634 545 637Q528 637 528 647Q528 649 530 661Q533 676 535 679T549 683Q551 683 578 682T657 680Q684 680 713 681T746 682Q763 682 763 673Q763 669 760 657T755 643Q753 637 734 637Q662 632 617 587Q608 578 477 424L348 273L322 169Q295 62 295 57Q295 46 363 46Q379 46 384 45T390 35Q390 33 388 23Q384 6 382 4T366 1Q361 1 324 1T232 2Q170 2 138 2T102 1Q84 1 84 9Q84 14 87 24Q88 27 89 30T90 35T91 39T93 42T96 44T101 45T107 45T116 46T129 46Q168 47 180 50T198 63Q201 68 227 171L252 274L129 623Q128 624 127 625T125 627T122 629T118 631T113 633T105 634T96 635T83 636T66 637Z"></path><path id="MJMAIN-29" stroke-width="1" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path><path id="MJMAIN-3D" stroke-width="1" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path id="MJMAIN-2C" stroke-width="1" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path><path id="MJMAIN-32" stroke-width="1" d="M109 429Q82 429 66 447T50 491Q50 562 103 614T235 666Q326 666 387 610T449 465Q449 422 429 383T381 315T301 241Q265 210 201 149L142 93L218 92Q375 92 385 97Q392 99 409 186V189H449V186Q448 183 436 95T421 3V0H50V19V31Q50 38 56 46T86 81Q115 113 136 137Q145 147 170 174T204 211T233 244T261 278T284 308T305 340T320 369T333 401T340 431T343 464Q343 527 309 573T212 619Q179 619 154 602T119 569T109 550Q109 549 114 549Q132 549 151 535T170 489Q170 464 154 447T109 429Z"></path><path id="MJMAIN-2E" stroke-width="1" d="M78 60Q78 84 95 102T138 120Q162 120 180 104T199 61Q199 36 182 18T139 0T96 17T78 60Z"></path><path id="MJMATHI-6E" stroke-width="1" d="M21 287Q22 293 24 303T36 341T56 388T89 425T135 442Q171 442 195 424T225 390T231 369Q231 367 232 367L243 378Q304 442 382 442Q436 442 469 415T503 336T465 179T427 52Q427 26 444 26Q450 26 453 27Q482 32 505 65T540 145Q542 153 560 153Q580 153 580 145Q580 144 576 130Q568 101 554 73T508 17T439 -10Q392 -10 371 17T350 73Q350 92 386 193T423 345Q423 404 379 404H374Q288 404 229 303L222 291L189 157Q156 26 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 112 180T152 343Q153 348 153 366Q153 405 129 405Q91 405 66 305Q60 285 60 284Q58 278 41 278H27Q21 284 21 287Z"></path><path id="MJMATHI-79" stroke-width="1" d="M21 287Q21 301 36 335T84 406T158 442Q199 442 224 419T250 355Q248 336 247 334Q247 331 231 288T198 191T182 105Q182 62 196 45T238 27Q261 27 281 38T312 61T339 94Q339 95 344 114T358 173T377 247Q415 397 419 404Q432 431 462 431Q475 431 483 424T494 412T496 403Q496 390 447 193T391 -23Q363 -106 294 -155T156 -205Q111 -205 77 -183T43 -117Q43 -95 50 -80T69 -58T89 -48T106 -45Q150 -45 150 -87Q150 -107 138 -122T115 -142T102 -147L99 -148Q101 -153 118 -160T152 -167H160Q177 -167 186 -165Q219 -156 247 -127T290 -65T313 -9T321 21L315 17Q309 13 296 6T270 -6Q250 -11 231 -11Q185 -11 150 11T104 82Q103 89 103 113Q103 170 138 262T173 379Q173 380 173 381Q173 390 173 393T169 400T158 404H154Q131 404 112 385T82 344T65 302T57 280Q55 278 41 278H27Q21 284 21 287Z"></path><path id="MJMAIN-2217" stroke-width="1" d="M229 286Q216 420 216 436Q216 454 240 464Q241 464 245 464T251 465Q263 464 273 456T283 436Q283 419 277 356T270 286L328 328Q384 369 389 372T399 375Q412 375 423 365T435 338Q435 325 425 315Q420 312 357 282T289 250L355 219L425 184Q434 175 434 161Q434 146 425 136T401 125Q393 125 383 131T328 171L270 213Q283 79 283 63Q283 53 276 44T250 35Q231 35 224 44T216 63Q216 80 222 143T229 213L171 171Q115 130 110 127Q106 124 100 124Q87 124 76 134T64 161Q64 166 64 169T67 175T72 181T81 188T94 195T113 204T138 215T170 230T210 250L74 315Q65 324 65 338Q65 353 74 363T98 374Q106 374 116 368T171 328L229 286Z"></path><path id="MJMAIN-61" stroke-width="1" d="M137 305T115 305T78 320T63 359Q63 394 97 421T218 448Q291 448 336 416T396 340Q401 326 401 309T402 194V124Q402 76 407 58T428 40Q443 40 448 56T453 109V145H493V106Q492 66 490 59Q481 29 455 12T400 -6T353 12T329 54V58L327 55Q325 52 322 49T314 40T302 29T287 17T269 6T247 -2T221 -8T190 -11Q130 -11 82 20T34 107Q34 128 41 147T68 188T116 225T194 253T304 268H318V290Q318 324 312 340Q290 411 215 411Q197 411 181 410T156 406T148 403Q170 388 170 359Q170 334 154 320ZM126 106Q126 75 150 51T209 26Q247 26 276 49T315 109Q317 116 318 175Q318 233 317 233Q309 233 296 232T251 223T193 203T147 166T126 106Z"></path><path id="MJMAIN-72" stroke-width="1" d="M36 46H50Q89 46 97 60V68Q97 77 97 91T98 122T98 161T98 203Q98 234 98 269T98 328L97 351Q94 370 83 376T38 385H20V408Q20 431 22 431L32 432Q42 433 60 434T96 436Q112 437 131 438T160 441T171 442H174V373Q213 441 271 441H277Q322 441 343 419T364 373Q364 352 351 337T313 322Q288 322 276 338T263 372Q263 381 265 388T270 400T273 405Q271 407 250 401Q234 393 226 386Q179 341 179 207V154Q179 141 179 127T179 101T180 81T180 66V61Q181 59 183 57T188 54T193 51T200 49T207 48T216 47T225 47T235 46T245 46H276V0H267Q249 3 140 3Q37 3 28 0H20V46H36Z"></path><path id="MJMAIN-67" stroke-width="1" d="M329 409Q373 453 429 453Q459 453 472 434T485 396Q485 382 476 371T449 360Q416 360 412 390Q410 404 415 411Q415 412 416 414V415Q388 412 363 393Q355 388 355 386Q355 385 359 381T368 369T379 351T388 325T392 292Q392 230 343 187T222 143Q172 143 123 171Q112 153 112 133Q112 98 138 81Q147 75 155 75T227 73Q311 72 335 67Q396 58 431 26Q470 -13 470 -72Q470 -139 392 -175Q332 -206 250 -206Q167 -206 107 -175Q29 -140 29 -75Q29 -39 50 -15T92 18L103 24Q67 55 67 108Q67 155 96 193Q52 237 52 292Q52 355 102 398T223 442Q274 442 318 416L329 409ZM299 343Q294 371 273 387T221 404Q192 404 171 388T145 343Q142 326 142 292Q142 248 149 227T179 192Q196 182 222 182Q244 182 260 189T283 207T294 227T299 242Q302 258 302 292T299 343ZM403 -75Q403 -50 389 -34T348 -11T299 -2T245 0H218Q151 0 138 -6Q118 -15 107 -34T95 -74Q95 -84 101 -97T122 -127T170 -155T250 -167Q319 -167 361 -139T403 -75Z"></path><path id="MJMAIN-20" stroke-width="1"></path><path id="MJMAIN-6D" stroke-width="1" d="M41 46H55Q94 46 102 60V68Q102 77 102 91T102 122T103 161T103 203Q103 234 103 269T102 328V351Q99 370 88 376T43 385H25V408Q25 431 27 431L37 432Q47 433 65 434T102 436Q119 437 138 438T167 441T178 442H181V402Q181 364 182 364T187 369T199 384T218 402T247 421T285 437Q305 442 336 442Q351 442 364 440T387 434T406 426T421 417T432 406T441 395T448 384T452 374T455 366L457 361L460 365Q463 369 466 373T475 384T488 397T503 410T523 422T546 432T572 439T603 442Q729 442 740 329Q741 322 741 190V104Q741 66 743 59T754 49Q775 46 803 46H819V0H811L788 1Q764 2 737 2T699 3Q596 3 587 0H579V46H595Q656 46 656 62Q657 64 657 200Q656 335 655 343Q649 371 635 385T611 402T585 404Q540 404 506 370Q479 343 472 315T464 232V168V108Q464 78 465 68T468 55T477 49Q498 46 526 46H542V0H534L510 1Q487 2 460 2T422 3Q319 3 310 0H302V46H318Q379 46 379 62Q380 64 380 200Q379 335 378 343Q372 371 358 385T334 402T308 404Q263 404 229 370Q202 343 195 315T187 232V168V108Q187 78 188 68T191 55T200 49Q221 46 249 46H265V0H257L234 1Q210 2 183 2T145 3Q42 3 33 0H25V46H41Z"></path><path id="MJMAIN-78" stroke-width="1" d="M201 0Q189 3 102 3Q26 3 17 0H11V46H25Q48 47 67 52T96 61T121 78T139 96T160 122T180 150L226 210L168 288Q159 301 149 315T133 336T122 351T113 363T107 370T100 376T94 379T88 381T80 383Q74 383 44 385H16V431H23Q59 429 126 429Q219 429 229 431H237V385Q201 381 201 369Q201 367 211 353T239 315T268 274L272 270L297 304Q329 345 329 358Q329 364 327 369T322 376T317 380T310 384L307 385H302V431H309Q324 428 408 428Q487 428 493 431H499V385H492Q443 385 411 368Q394 360 377 341T312 257L296 236L358 151Q424 61 429 57T446 50Q464 46 499 46H516V0H510H502Q494 1 482 1T457 2T432 2T414 3Q403 3 377 3T327 1L304 0H295V46H298Q309 46 320 51T331 63Q331 65 291 120L250 175Q249 174 219 133T185 88Q181 83 181 74Q181 63 188 55T206 46Q208 46 208 23V0H201Z"></path><path id="MJMATHI-70" stroke-width="1" d="M23 287Q24 290 25 295T30 317T40 348T55 381T75 411T101 433T134 442Q209 442 230 378L240 387Q302 442 358 442Q423 442 460 395T497 281Q497 173 421 82T249 -10Q227 -10 210 -4Q199 1 187 11T168 28L161 36Q160 35 139 -51T118 -138Q118 -144 126 -145T163 -148H188Q194 -155 194 -157T191 -175Q188 -187 185 -190T172 -194Q170 -194 161 -194T127 -193T65 -192Q-5 -192 -24 -194H-32Q-39 -187 -39 -183Q-37 -156 -26 -148H-6Q28 -147 33 -136Q36 -130 94 103T155 350Q156 355 156 364Q156 405 131 405Q109 405 94 377T71 316T59 280Q57 278 43 278H29Q23 284 23 287ZM178 102Q200 26 252 26Q282 26 310 49T356 107Q374 141 392 215T411 325V331Q411 405 350 405Q339 405 328 402T306 393T286 380T269 365T254 350T243 336T235 326L232 322Q232 321 229 308T218 264T204 212Q178 106 178 102Z"></path><path id="MJMATHI-5A" stroke-width="1" d="M58 8Q58 23 64 35Q64 36 329 334T596 635L586 637Q575 637 512 637H500H476Q442 637 420 635T365 624T311 598T266 548T228 469Q227 466 226 463T224 458T223 453T222 450L221 448Q218 443 202 443Q185 443 182 453L214 561Q228 606 241 651Q249 679 253 681Q256 683 487 683H718Q723 678 723 675Q723 673 717 649Q189 54 188 52L185 49H274Q369 50 377 51Q452 60 500 100T579 247Q587 272 590 277T603 282H607Q628 282 628 271Q547 5 541 2Q538 0 300 0H124Q58 0 58 8Z"></path><path id="MJMAIN-65" stroke-width="1" d="M28 218Q28 273 48 318T98 391T163 433T229 448Q282 448 320 430T378 380T406 316T415 245Q415 238 408 231H126V216Q126 68 226 36Q246 30 270 30Q312 30 342 62Q359 79 369 104L379 128Q382 131 395 131H398Q415 131 415 121Q415 117 412 108Q393 53 349 21T250 -11Q155 -11 92 58T28 218ZM333 275Q322 403 238 411H236Q228 411 220 410T195 402T166 381T143 340T127 274V267H333V275Z"></path><path id="MJMAIN-70" stroke-width="1" d="M36 -148H50Q89 -148 97 -134V-126Q97 -119 97 -107T97 -77T98 -38T98 6T98 55T98 106Q98 140 98 177T98 243T98 296T97 335T97 351Q94 370 83 376T38 385H20V408Q20 431 22 431L32 432Q42 433 61 434T98 436Q115 437 135 438T165 441T176 442H179V416L180 390L188 397Q247 441 326 441Q407 441 464 377T522 216Q522 115 457 52T310 -11Q242 -11 190 33L182 40V-45V-101Q182 -128 184 -134T195 -145Q216 -148 244 -148H260V-194H252L228 -193Q205 -192 178 -192T140 -191Q37 -191 28 -194H20V-148H36ZM424 218Q424 292 390 347T305 402Q234 402 182 337V98Q222 26 294 26Q345 26 384 80T424 218Z"></path><path id="MJSZ2-2211" stroke-width="1" d="M60 948Q63 950 665 950H1267L1325 815Q1384 677 1388 669H1348L1341 683Q1320 724 1285 761Q1235 809 1174 838T1033 881T882 898T699 902H574H543H251L259 891Q722 258 724 252Q725 250 724 246Q721 243 460 -56L196 -356Q196 -357 407 -357Q459 -357 548 -357T676 -358Q812 -358 896 -353T1063 -332T1204 -283T1307 -196Q1328 -170 1348 -124H1388Q1388 -125 1381 -145T1356 -210T1325 -294L1267 -449L666 -450Q64 -450 61 -448Q55 -446 55 -439Q55 -437 57 -433L590 177Q590 178 557 222T452 366T322 544L56 909L55 924Q55 945 60 948Z"></path><path id="MJMATHI-69" stroke-width="1" d="M184 600Q184 624 203 642T247 661Q265 661 277 649T290 619Q290 596 270 577T226 557Q211 557 198 567T184 600ZM21 287Q21 295 30 318T54 369T98 420T158 442Q197 442 223 419T250 357Q250 340 236 301T196 196T154 83Q149 61 149 51Q149 26 166 26Q175 26 185 29T208 43T235 78T260 137Q263 149 265 151T282 153Q302 153 302 143Q302 135 293 112T268 61T223 11T161 -11Q129 -11 102 10T74 74Q74 91 79 106T122 220Q160 321 166 341T173 380Q173 404 156 404H154Q124 404 99 371T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Z"></path><path id="MJMATHI-6A" stroke-width="1" d="M297 596Q297 627 318 644T361 661Q378 661 389 651T403 623Q403 595 384 576T340 557Q322 557 310 567T297 596ZM288 376Q288 405 262 405Q240 405 220 393T185 362T161 325T144 293L137 279Q135 278 121 278H107Q101 284 101 286T105 299Q126 348 164 391T252 441Q253 441 260 441T272 442Q296 441 316 432Q341 418 354 401T367 348V332L318 133Q267 -67 264 -75Q246 -125 194 -164T75 -204Q25 -204 7 -183T-12 -137Q-12 -110 7 -91T53 -71Q70 -71 82 -81T95 -112Q95 -148 63 -167Q69 -168 77 -168Q111 -168 139 -140T182 -74L193 -32Q204 11 219 72T251 197T278 308T289 365Q289 372 288 376Z"></path><path id="MJMATHI-3BB" stroke-width="1" d="M166 673Q166 685 183 694H202Q292 691 316 644Q322 629 373 486T474 207T524 67Q531 47 537 34T546 15T551 6T555 2T556 -2T550 -11H482Q457 3 450 18T399 152L354 277L340 262Q327 246 293 207T236 141Q211 112 174 69Q123 9 111 -1T83 -12Q47 -12 47 20Q47 37 61 52T199 187Q229 216 266 252T321 306L338 322Q338 323 288 462T234 612Q214 657 183 657Q166 657 166 673Z"></path><path id="MJMAIN-2B" stroke-width="1" d="M56 237T56 250T70 270H369V420L370 570Q380 583 389 583Q402 583 409 568V270H707Q722 262 722 250T707 230H409V-68Q401 -82 391 -82H389H387Q375 -82 369 -68V230H70Q56 237 56 250Z"></path><path id="MJMATHI-6B" stroke-width="1" d="M121 647Q121 657 125 670T137 683Q138 683 209 688T282 694Q294 694 294 686Q294 679 244 477Q194 279 194 272Q213 282 223 291Q247 309 292 354T362 415Q402 442 438 442Q468 442 485 423T503 369Q503 344 496 327T477 302T456 291T438 288Q418 288 406 299T394 328Q394 353 410 369T442 390L458 393Q446 405 434 405H430Q398 402 367 380T294 316T228 255Q230 254 243 252T267 246T293 238T320 224T342 206T359 180T365 147Q365 130 360 106T354 66Q354 26 381 26Q429 26 459 145Q461 153 479 153H483Q499 153 499 144Q499 139 496 130Q455 -11 378 -11Q333 -11 305 15T277 90Q277 108 280 121T283 145Q283 167 269 183T234 206T200 217T182 220H180Q168 178 159 139T145 81T136 44T129 20T122 7T111 -2Q98 -11 83 -11Q66 -11 57 -1T48 16Q48 26 85 176T158 471L195 616Q196 629 188 632T149 637H144Q134 637 131 637T124 640T121 647Z"></path><path id="MJMATHI-3BC" stroke-width="1" d="M58 -216Q44 -216 34 -208T23 -186Q23 -176 96 116T173 414Q186 442 219 442Q231 441 239 435T249 423T251 413Q251 401 220 279T187 142Q185 131 185 107V99Q185 26 252 26Q261 26 270 27T287 31T302 38T315 45T327 55T338 65T348 77T356 88T365 100L372 110L408 253Q444 395 448 404Q461 431 491 431Q504 431 512 424T523 412T525 402L449 84Q448 79 448 68Q448 43 455 35T476 26Q485 27 496 35Q517 55 537 131Q543 151 547 152Q549 153 557 153H561Q580 153 580 144Q580 138 575 117T555 63T523 13Q510 0 491 -8Q483 -10 467 -10Q446 -10 429 -4T402 11T385 29T376 44T374 51L368 45Q362 39 350 30T324 12T288 -4T246 -11Q199 -11 153 12L129 -85Q108 -167 104 -180T92 -202Q76 -216 58 -216Z"></path><path id="MJMATHI-73" stroke-width="1" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"></path><path id="MJSZ4-239B" stroke-width="1" d="M837 1154Q843 1148 843 1145Q843 1141 818 1106T753 1002T667 841T574 604T494 299Q417 -84 417 -609Q417 -641 416 -647T411 -654Q409 -655 366 -655Q299 -655 297 -654Q292 -652 292 -643T291 -583Q293 -400 304 -242T347 110T432 470T574 813T785 1136Q787 1139 790 1142T794 1147T796 1150T799 1152T802 1153T807 1154T813 1154H819H837Z"></path><path id="MJSZ4-239D" stroke-width="1" d="M843 -635Q843 -638 837 -644H820Q801 -644 800 -643Q792 -635 785 -626Q684 -503 605 -363T473 -75T385 216T330 518T302 809T291 1093Q291 1144 291 1153T296 1164Q298 1165 366 1165Q409 1165 411 1164Q415 1163 416 1157T417 1119Q417 529 517 109T833 -617Q843 -631 843 -635Z"></path><path id="MJSZ4-239E" stroke-width="1" d="M31 1143Q31 1154 49 1154H59Q72 1154 75 1152T89 1136Q190 1013 269 873T401 585T489 294T544 -8T572 -299T583 -583Q583 -634 583 -643T577 -654Q575 -655 508 -655Q465 -655 463 -654Q459 -653 458 -647T457 -609Q457 -58 371 340T100 1037Q87 1059 61 1098T31 1143Z"></path><path id="MJSZ4-23A0" stroke-width="1" d="M56 -644H50Q31 -644 31 -635Q31 -632 37 -622Q69 -579 100 -527Q286 -228 371 170T457 1119Q457 1161 462 1164Q464 1165 520 1165Q575 1165 577 1164Q582 1162 582 1153T583 1093Q581 910 570 752T527 400T442 40T300 -303T89 -626Q78 -640 75 -642T61 -644H56Z"></path><path id="MJMATHI-66" stroke-width="1" d="M118 -162Q120 -162 124 -164T135 -167T147 -168Q160 -168 171 -155T187 -126Q197 -99 221 27T267 267T289 382V385H242Q195 385 192 387Q188 390 188 397L195 425Q197 430 203 430T250 431Q298 431 298 432Q298 434 307 482T319 540Q356 705 465 705Q502 703 526 683T550 630Q550 594 529 578T487 561Q443 561 443 603Q443 622 454 636T478 657L487 662Q471 668 457 668Q445 668 434 658T419 630Q412 601 403 552T387 469T380 433Q380 431 435 431Q480 431 487 430T498 424Q499 420 496 407T491 391Q489 386 482 386T428 385H372L349 263Q301 15 282 -47Q255 -132 212 -173Q175 -205 139 -205Q107 -205 81 -186T55 -132Q55 -95 76 -78T118 -61Q162 -61 162 -103Q162 -122 151 -136T127 -157L118 -162Z"></path><path id="MJSZ1-2211" stroke-width="1" d="M61 748Q64 750 489 750H913L954 640Q965 609 976 579T993 533T999 516H979L959 517Q936 579 886 621T777 682Q724 700 655 705T436 710H319Q183 710 183 709Q186 706 348 484T511 259Q517 250 513 244L490 216Q466 188 420 134T330 27L149 -187Q149 -188 362 -188Q388 -188 436 -188T506 -189Q679 -189 778 -162T936 -43Q946 -27 959 6H999L913 -249L489 -250Q65 -250 62 -248Q56 -246 56 -239Q56 -234 118 -161Q186 -81 245 -11L428 206Q428 207 242 462L57 717L56 728Q56 744 61 748Z"></path><path id="MJMATHI-57" stroke-width="1" d="M436 683Q450 683 486 682T553 680Q604 680 638 681T677 682Q695 682 695 674Q695 670 692 659Q687 641 683 639T661 637Q636 636 621 632T600 624T597 615Q597 603 613 377T629 138L631 141Q633 144 637 151T649 170T666 200T690 241T720 295T759 362Q863 546 877 572T892 604Q892 619 873 628T831 637Q817 637 817 647Q817 650 819 660Q823 676 825 679T839 682Q842 682 856 682T895 682T949 681Q1015 681 1034 683Q1048 683 1048 672Q1048 666 1045 655T1038 640T1028 637Q1006 637 988 631T958 617T939 600T927 584L923 578L754 282Q586 -14 585 -15Q579 -22 561 -22Q546 -22 542 -17Q539 -14 523 229T506 480L494 462Q472 425 366 239Q222 -13 220 -15T215 -19Q210 -22 197 -22Q178 -22 176 -15Q176 -12 154 304T131 622Q129 631 121 633T82 637H58Q51 644 51 648Q52 671 64 683H76Q118 680 176 680Q301 680 313 683H323Q329 677 329 674T327 656Q322 641 318 637H297Q236 634 232 620Q262 160 266 136L501 550L499 587Q496 629 489 632Q483 636 447 637Q428 637 422 639T416 648Q416 650 418 660Q419 664 420 669T421 676T424 680T428 682T436 683Z"></path><path id="MJMATHI-3C9" stroke-width="1" d="M495 384Q495 406 514 424T555 443Q574 443 589 425T604 364Q604 334 592 278T555 155T483 38T377 -11Q297 -11 267 66Q266 68 260 61Q201 -11 125 -11Q15 -11 15 139Q15 230 56 325T123 434Q135 441 147 436Q160 429 160 418Q160 406 140 379T94 306T62 208Q61 202 61 187Q61 124 85 100T143 76Q201 76 245 129L253 137V156Q258 297 317 297Q348 297 348 261Q348 243 338 213T318 158L308 135Q309 133 310 129T318 115T334 97T358 83T393 76Q456 76 501 148T546 274Q546 305 533 325T508 357T495 384Z"></path><path id="MJMATHI-44" stroke-width="1" d="M287 628Q287 635 230 637Q207 637 200 638T193 647Q193 655 197 667T204 682Q206 683 403 683Q570 682 590 682T630 676Q702 659 752 597T803 431Q803 275 696 151T444 3L430 1L236 0H125H72Q48 0 41 2T33 11Q33 13 36 25Q40 41 44 43T67 46Q94 46 127 49Q141 52 146 61Q149 65 218 339T287 628ZM703 469Q703 507 692 537T666 584T629 613T590 629T555 636Q553 636 541 636T512 636T479 637H436Q392 637 386 627Q384 623 313 339T242 52Q242 48 253 48T330 47Q335 47 349 47T373 46Q499 46 581 128Q617 164 640 212T683 339T703 469Z"></path><path id="MJMAIN-5B" stroke-width="1" d="M118 -250V750H255V710H158V-210H255V-250H118Z"></path><path id="MJMATHI-4E" stroke-width="1" d="M234 637Q231 637 226 637Q201 637 196 638T191 649Q191 676 202 682Q204 683 299 683Q376 683 387 683T401 677Q612 181 616 168L670 381Q723 592 723 606Q723 633 659 637Q635 637 635 648Q635 650 637 660Q641 676 643 679T653 683Q656 683 684 682T767 680Q817 680 843 681T873 682Q888 682 888 672Q888 650 880 642Q878 637 858 637Q787 633 769 597L620 7Q618 0 599 0Q585 0 582 2Q579 5 453 305L326 604L261 344Q196 88 196 79Q201 46 268 46H278Q284 41 284 38T282 19Q278 6 272 0H259Q228 2 151 2Q123 2 100 2T63 2T46 1Q31 1 31 10Q31 14 34 26T39 40Q41 46 62 46Q130 49 150 85Q154 91 221 362L289 634Q287 635 234 637Z"></path><path id="MJMAIN-5D" stroke-width="1" d="M22 710V750H159V-250H22V-210H119V710H22Z"></path><path id="MJMATHI-4C" stroke-width="1" d="M228 637Q194 637 192 641Q191 643 191 649Q191 673 202 682Q204 683 217 683Q271 680 344 680Q485 680 506 683H518Q524 677 524 674T522 656Q517 641 513 637H475Q406 636 394 628Q387 624 380 600T313 336Q297 271 279 198T252 88L243 52Q243 48 252 48T311 46H328Q360 46 379 47T428 54T478 72T522 106T564 161Q580 191 594 228T611 270Q616 273 628 273H641Q647 264 647 262T627 203T583 83T557 9Q555 4 553 3T537 0T494 -1Q483 -1 418 -1T294 0H116Q32 0 32 10Q32 17 34 24Q39 43 44 45Q48 46 59 46H65Q92 46 125 49Q139 52 144 61Q147 65 216 339T285 628Q285 635 228 637Z"></path><path id="MJMAIN-6C" stroke-width="1" d="M42 46H56Q95 46 103 60V68Q103 77 103 91T103 124T104 167T104 217T104 272T104 329Q104 366 104 407T104 482T104 542T103 586T103 603Q100 622 89 628T44 637H26V660Q26 683 28 683L38 684Q48 685 67 686T104 688Q121 689 141 690T171 693T182 694H185V379Q185 62 186 60Q190 52 198 49Q219 46 247 46H263V0H255L232 1Q209 2 183 2T145 3T107 3T57 1L34 0H26V46H42Z"></path><path id="MJMAIN-6F" stroke-width="1" d="M28 214Q28 309 93 378T250 448Q340 448 405 380T471 215Q471 120 407 55T250 -10Q153 -10 91 57T28 214ZM250 30Q372 30 372 193V225V250Q372 272 371 288T364 326T348 362T317 390T268 410Q263 411 252 411Q222 411 195 399Q152 377 139 338T126 246V226Q126 130 145 91Q177 30 250 30Z"></path><path id="MJSZ2-220F" stroke-width="1" d="M220 812Q220 813 218 819T214 829T208 840T199 853T185 866T166 878T140 887T107 893T66 896H56V950H1221V896H1211Q1080 896 1058 812V-311Q1076 -396 1211 -396H1221V-450H725V-396H735Q864 -396 888 -314Q889 -312 889 -311V896H388V292L389 -311Q405 -396 542 -396H552V-450H56V-396H66Q195 -396 219 -314Q220 -312 220 -311V812Z"></path><path id="MJMATHI-6D" stroke-width="1" d="M21 287Q22 293 24 303T36 341T56 388T88 425T132 442T175 435T205 417T221 395T229 376L231 369Q231 367 232 367L243 378Q303 442 384 442Q401 442 415 440T441 433T460 423T475 411T485 398T493 385T497 373T500 364T502 357L510 367Q573 442 659 442Q713 442 746 415T780 336Q780 285 742 178T704 50Q705 36 709 31T724 26Q752 26 776 56T815 138Q818 149 821 151T837 153Q857 153 857 145Q857 144 853 130Q845 101 831 73T785 17T716 -10Q669 -10 648 17T627 73Q627 92 663 193T700 345Q700 404 656 404H651Q565 404 506 303L499 291L466 157Q433 26 428 16Q415 -11 385 -11Q372 -11 364 -4T353 8T350 18Q350 29 384 161L420 307Q423 322 423 345Q423 404 379 404H374Q288 404 229 303L222 291L189 157Q156 26 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 112 181Q151 335 151 342Q154 357 154 369Q154 405 129 405Q107 405 92 377T69 316T57 280Q55 278 41 278H27Q21 284 21 287Z"></path><path id="MJSZ4-28" stroke-width="1" d="M758 -1237T758 -1240T752 -1249H736Q718 -1249 717 -1248Q711 -1245 672 -1199Q237 -706 237 251T672 1700Q697 1730 716 1749Q718 1750 735 1750H752Q758 1744 758 1741Q758 1737 740 1713T689 1644T619 1537T540 1380T463 1176Q348 802 348 251Q348 -242 441 -599T744 -1218Q758 -1237 758 -1240Z"></path><path id="MJSZ4-29" stroke-width="1" d="M33 1741Q33 1750 51 1750H60H65Q73 1750 81 1743T119 1700Q554 1207 554 251Q554 -707 119 -1199Q76 -1250 66 -1250Q65 -1250 62 -1250T56 -1249Q55 -1249 53 -1249T49 -1250Q33 -1250 33 -1239Q33 -1236 50 -1214T98 -1150T163 -1052T238 -910T311 -727Q443 -335 443 251Q443 402 436 532T405 831T339 1142T224 1438T50 1716Q33 1737 33 1741Z"></path><path id="MJMATHI-43" stroke-width="1" d="M50 252Q50 367 117 473T286 641T490 704Q580 704 633 653Q642 643 648 636T656 626L657 623Q660 623 684 649Q691 655 699 663T715 679T725 690L740 705H746Q760 705 760 698Q760 694 728 561Q692 422 692 421Q690 416 687 415T669 413H653Q647 419 647 422Q647 423 648 429T650 449T651 481Q651 552 619 605T510 659Q484 659 454 652T382 628T299 572T226 479Q194 422 175 346T156 222Q156 108 232 58Q280 24 350 24Q441 24 512 92T606 240Q610 253 612 255T628 257Q648 257 648 248Q648 243 647 239Q618 132 523 55T319 -22Q206 -22 128 53T50 252Z"></path><path id="MJMAIN-2225" stroke-width="1" d="M133 736Q138 750 153 750Q164 750 170 739Q172 735 172 250T170 -239Q164 -250 152 -250Q144 -250 138 -244L137 -243Q133 -241 133 -179T132 250Q132 731 133 736ZM329 739Q334 750 346 750Q353 750 361 744L362 743Q366 741 366 679T367 250T367 -178T362 -243L361 -244Q355 -250 347 -250Q335 -250 329 -239Q327 -235 327 250T329 739Z"></path><path id="MJMAIN-AF" stroke-width="1" d="M69 544V590H430V544H69Z"></path><path id="MJMAIN-41" stroke-width="1" d="M255 0Q240 3 140 3Q48 3 39 0H32V46H47Q119 49 139 88Q140 91 192 245T295 553T348 708Q351 716 366 716H376Q396 715 400 709Q402 707 508 390L617 67Q624 54 636 51T687 46H717V0H708Q699 3 581 3Q458 3 437 0H427V46H440Q510 46 510 64Q510 66 486 138L462 209H229L209 150Q189 91 189 85Q189 72 209 59T259 46H264V0H255ZM447 255L345 557L244 256Q244 255 345 255H447Z"></path><path id="MJMAIN-6E" stroke-width="1" d="M41 46H55Q94 46 102 60V68Q102 77 102 91T102 122T103 161T103 203Q103 234 103 269T102 328V351Q99 370 88 376T43 385H25V408Q25 431 27 431L37 432Q47 433 65 434T102 436Q119 437 138 438T167 441T178 442H181V402Q181 364 182 364T187 369T199 384T218 402T247 421T285 437Q305 442 336 442Q450 438 463 329Q464 322 464 190V104Q464 66 466 59T477 49Q498 46 526 46H542V0H534L510 1Q487 2 460 2T422 3Q319 3 310 0H302V46H318Q379 46 379 62Q380 64 380 200Q379 335 378 343Q372 371 358 385T334 402T308 404Q263 404 229 370Q202 343 195 315T187 232V168V108Q187 78 188 68T191 55T200 49Q221 46 249 46H265V0H257L234 1Q210 2 183 2T145 3Q42 3 33 0H25V46H41Z"></path><path id="MJMAIN-74" stroke-width="1" d="M27 422Q80 426 109 478T141 600V615H181V431H316V385H181V241Q182 116 182 100T189 68Q203 29 238 29Q282 29 292 100Q293 108 293 146V181H333V146V134Q333 57 291 17Q264 -10 221 -10Q187 -10 162 2T124 33T105 68T98 100Q97 107 97 248V385H18V422H27Z"></path><path id="MJMAIN-54" stroke-width="1" d="M36 443Q37 448 46 558T55 671V677H666V671Q667 666 676 556T685 443V437H645V443Q645 445 642 478T631 544T610 593Q593 614 555 625Q534 630 478 630H451H443Q417 630 414 618Q413 616 413 339V63Q420 53 439 50T528 46H558V0H545L361 3Q186 1 177 0H164V46H194Q264 46 283 49T309 63V339V550Q309 620 304 625T271 630H244H224Q154 630 119 601Q101 585 93 554T81 486T76 443V437H36V443Z"></path><path id="MJMAIN-69" stroke-width="1" d="M69 609Q69 637 87 653T131 669Q154 667 171 652T188 609Q188 579 171 564T129 549Q104 549 87 564T69 609ZM247 0Q232 3 143 3Q132 3 106 3T56 1L34 0H26V46H42Q70 46 91 49Q100 53 102 60T104 102V205V293Q104 345 102 359T88 378Q74 385 41 385H30V408Q30 431 32 431L42 432Q52 433 70 434T106 436Q123 437 142 438T171 441T182 442H185V62Q190 52 197 50T232 46H255V0H247Z"></path><path id="MJMAIN-4C" stroke-width="1" d="M128 622Q121 629 117 631T101 634T58 637H25V683H36Q48 680 182 680Q324 680 348 683H360V637H333Q273 637 258 635T233 622L232 342V129Q232 57 237 52Q243 47 313 47Q384 47 410 53Q470 70 498 110T536 221Q536 226 537 238T540 261T542 272T562 273H582V268Q580 265 568 137T554 5V0H25V46H58Q100 47 109 49T128 61V622Z"></path><path id="MJMAIN-63" stroke-width="1" d="M370 305T349 305T313 320T297 358Q297 381 312 396Q317 401 317 402T307 404Q281 408 258 408Q209 408 178 376Q131 329 131 219Q131 137 162 90Q203 29 272 29Q313 29 338 55T374 117Q376 125 379 127T395 129H409Q415 123 415 120Q415 116 411 104T395 71T366 33T318 2T249 -11Q163 -11 99 53T34 214Q34 318 99 383T250 448T370 421T404 357Q404 334 387 320Z"></path><path id="MJMAIN-50" stroke-width="1" d="M130 622Q123 629 119 631T103 634T60 637H27V683H214Q237 683 276 683T331 684Q419 684 471 671T567 616Q624 563 624 489Q624 421 573 372T451 307Q429 302 328 301H234V181Q234 62 237 58Q245 47 304 46H337V0H326Q305 3 182 3Q47 3 38 0H27V46H60Q102 47 111 49T130 61V622ZM507 488Q507 514 506 528T500 564T483 597T450 620T397 635Q385 637 307 637H286Q237 637 234 628Q231 624 231 483V342H302H339Q390 342 423 349T481 382Q507 411 507 488Z"></path><path id="MJMAIN-64" stroke-width="1" d="M376 495Q376 511 376 535T377 568Q377 613 367 624T316 637H298V660Q298 683 300 683L310 684Q320 685 339 686T376 688Q393 689 413 690T443 693T454 694H457V390Q457 84 458 81Q461 61 472 55T517 46H535V0Q533 0 459 -5T380 -11H373V44L365 37Q307 -11 235 -11Q158 -11 96 50T34 215Q34 315 97 378T244 442Q319 442 376 393V495ZM373 342Q328 405 260 405Q211 405 173 369Q146 341 139 305T131 211Q131 155 138 120T173 59Q203 26 251 26Q322 26 373 103V342Z"></path></defs></svg></div><div id="wmd-preview" class="wmd-preview wmd-preview-full-reader"><div class="md-section-divider"></div><div class="md-section-divider"></div><h1 data-anchor-id="1ybr" id="语义角色标注">语义角色标注</h1><div class="md-section-divider"></div><h2 data-anchor-id="w9c4" id="背景介绍">背景介绍</h2><p data-anchor-id="iidv">自然语言分析技术大致分为三个层面:词法分析、句法分析和语义分析。语义角色标注是实现浅层语义分析的一种方式。在一个句子中,谓词是对主语的陈述或说明,指出“做什么”、“是什么”或“怎么样,代表了一个事件的核心,跟谓词搭配的名词称为论元。语义角色是指论元在动词所指事件中担任的角色。主要有:施事者(Agent)、受事者(Patient)、客体(Theme)、经验者(Experiencer)、受益者(Beneficiary)、工具(Instrument)、处所(Location)、目标(Goal)和来源(Source)等。</p><p data-anchor-id="i34j">请看下面的例子,“遇到” 是谓词(Predicate,通常简写为“Pred”),“小明”是施事者(Agent),“小红”是受事者(Patient),“昨天” 是事件发生的时间(Time),“公园”是事情发生的地点(Location)。</p><div class="md-section-divider"></div><p data-anchor-id="evvp"><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-54-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -881.1077362650129 34267.88018219112 1480.7162216130375" style="width: 79.575ex; height: 3.475ex; vertical-align: -1.506ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5B"></use><g transform="translate(278,0)"><text font-family="STIXGeneral,'Arial Unicode MS',serif" font-style="" font-weight="" stroke="none" transform="scale(49.87111969111969) matrix(1 0 0 -1 0 0)"></text></g><g transform="translate(1075,0)"><text font-family="STIXGeneral,'Arial Unicode MS',serif" font-style="" font-weight="" stroke="none" transform="scale(49.87111969111969) matrix(1 0 0 -1 0 0)"></text></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5D" x="1872" y="0"></use><g transform="translate(2151,-373)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-41"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-67" x="750" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-65" x="1251" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-6E" x="1695" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-74" x="2252" y="0"></use></g><g transform="translate(4892,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5B"></use><g transform="translate(278,0)"><text font-family="STIXGeneral,'Arial Unicode MS',serif" font-style="" font-weight="" stroke="none" transform="scale(49.87111969111969) matrix(1 0 0 -1 0 0)"></text></g><g transform="translate(1075,0)"><text font-family="STIXGeneral,'Arial Unicode MS',serif" font-style="" font-weight="" stroke="none" transform="scale(49.87111969111969) matrix(1 0 0 -1 0 0)"></text></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5D" x="1872" y="0"></use><g transform="translate(2151,-334)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-54"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-69" x="722" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-6D" x="1001" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-65" x="1834" y="0"></use></g></g><g transform="translate(9423,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5B"></use><g transform="translate(278,0)"><text font-family="STIXGeneral,'Arial Unicode MS',serif" font-style="" font-weight="" stroke="none" transform="scale(49.87111969111969) matrix(1 0 0 -1 0 0)"></text></g><g transform="translate(1075,0)"><text font-family="STIXGeneral,'Arial Unicode MS',serif" font-style="" font-weight="" stroke="none" transform="scale(49.87111969111969) matrix(1 0 0 -1 0 0)"></text></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5D" x="1872" y="0"></use><g transform="translate(2151,-334)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-54"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-69" x="722" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-6D" x="1001" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-65" x="1834" y="0"></use></g></g><g transform="translate(13953,0)"><text font-family="STIXGeneral,'Arial Unicode MS',serif" font-style="" font-weight="" stroke="none" transform="scale(49.87111969111969) matrix(1 0 0 -1 0 0)"></text><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5B" x="797" y="0"></use><g transform="translate(1075,0)"><text font-family="STIXGeneral,'Arial Unicode MS',serif" font-style="" font-weight="" stroke="none" transform="scale(49.87111969111969) matrix(1 0 0 -1 0 0)"></text></g><g transform="translate(1872,0)"><text font-family="STIXGeneral,'Arial Unicode MS',serif" font-style="" font-weight="" stroke="none" transform="scale(49.87111969111969) matrix(1 0 0 -1 0 0)"></text></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5D" x="2669" y="0"></use><g transform="translate(2948,-340)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-4C"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-6F" x="625" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-63" x="1126" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-61" x="1570" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-74" x="2071" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-69" x="2460" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-6F" x="2739" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-6E" x="3239" y="0"></use></g></g><g transform="translate(20797,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5B"></use><g transform="translate(278,0)"><text font-family="STIXGeneral,'Arial Unicode MS',serif" font-style="" font-weight="" stroke="none" transform="scale(49.87111969111969) matrix(1 0 0 -1 0 0)"></text></g><g transform="translate(1075,0)"><text font-family="STIXGeneral,'Arial Unicode MS',serif" font-style="" font-weight="" stroke="none" transform="scale(49.87111969111969) matrix(1 0 0 -1 0 0)"></text></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5D" x="1872" y="0"></use><g transform="translate(2151,-352)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-50"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-72" x="681" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-65" x="1074" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-64" x="1518" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-69" x="2075" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-63" x="2353" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-61" x="2798" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-74" x="3298" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-65" x="3688" y="0"></use></g></g><g transform="translate(27181,0)"><text font-family="STIXGeneral,'Arial Unicode MS',serif" font-style="" font-weight="" stroke="none" transform="scale(49.87111969111969) matrix(1 0 0 -1 0 0)"></text><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5B" x="797" y="0"></use><g transform="translate(1075,0)"><text font-family="STIXGeneral,'Arial Unicode MS',serif" font-style="" font-weight="" stroke="none" transform="scale(49.87111969111969) matrix(1 0 0 -1 0 0)"></text></g><g transform="translate(1872,0)"><text font-family="STIXGeneral,'Arial Unicode MS',serif" font-style="" font-weight="" stroke="none" transform="scale(49.87111969111969) matrix(1 0 0 -1 0 0)"></text></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5D" x="2669" y="0"></use><g transform="translate(2948,-341)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-50"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-61" x="681" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-74" x="1182" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-69" x="1571" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-65" x="1850" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-6E" x="2294" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-74" x="2851" y="0"></use></g></g><g transform="translate(33470,0)"><text font-family="STIXGeneral,'Arial Unicode MS',serif" font-style="" font-weight="" stroke="none" transform="scale(49.87111969111969) matrix(1 0 0 -1 0 0)"></text></g></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-54">\mbox{[小明]}_{\mbox{Agent}}\mbox{[昨天]}_{\mbox{Time}}\mbox{[晚上]}_\mbox{Time}\mbox{[公园]}_{\mbox{Location}}\mbox{[遇到]}_{\mbox{Predicate}}\mbox{[小红]}_{\mbox{Patient}}\mbox{}</script></p><p data-anchor-id="l79q">语义角色标注(Semantic Role Labeling,SRL)以句子的谓词为中心,不对句子所包含的语义信息进行深入分析,只分析句子中各成分与谓词之间的关系,即句子的谓词(Predicate)- 论元(Argument)结构,并用语义角色来描述这些结构关系,是许多自然语言理解任务(如信息抽取,篇章分析,深度问答等)的一个重要中间步骤。在研究中一般都假定谓词是给定的,所要做的就是找出给定谓词的各个论元和它们的语义角色。</p><p data-anchor-id="5rx9">传统的SRL系统大多建立在句法分析基础之上,通常包括5个流程:</p><ol data-anchor-id="q4kx">
<li>构建一棵句法分析树,例如,图1是对上面例子进行依存句法分析得到的一棵句法树。</li>
<li>从句法树上识别出给定谓词的候选论元。</li>
<li>候选论元剪除;一个句子中的候选论元可能很多,候选论元剪除就是从大量的候选项中剪除那些最不可能成为论元的候选项。</li>
<li>论元识别:这个过程是从上一步剪除之后的候选中判断哪些是真正的论元,通常当做一个二分类问题来解决。</li>
<li>对第4步的结果,通过多分类得到论元的语义角色标签。可以看到,句法分析是基础,并且后续步骤常常会构造的一些人工特征,这些特征往往也来自句法分析。</li>
</ol><p align="center" data-anchor-id="m7gn">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/label_semantic_roles/image/dependency_parsing.png" width="800"><br>
图1. 依存句法分析句法树示例
</p><p data-anchor-id="dc3o">然而,完全句法分析需要确定句子所包含的全部句法信息,并确定句子各成分之间的关系,是一个非常困难的任务,目前技术下的句法分析准确率并不高,句法分析的细微错误都会导致SRL的错误。为了降低问题的复杂度,同时获得一定的句法结构信息,“浅层句法分析”的思想应运而生。浅层句法分析也称为部分句法分析(partial parsing)或语块划分(chunking)。和完全句法分析得到一颗完整的句法树不同,浅层句法分析只需要识别句子中某些结构相对简单的独立成分,例如:动词短语,这些被识别出来的结构称为语块。为了回避 “无法获得准确率较高的句法树” 所带来的困难,一些研究[<a href="#参考文献">1</a>]也提出了基于语块(chunk)的SRL方法。基于语块的SRL方法将SRL作为一个序列标注问题来解决。序列标注任务一般都会采用BIO表示方式来定义序列标注的标签集,我们先来介绍这种表示方法。在BIO表示法中,B代表语块的开始,I代表语块的中间,O代表语块结束。通过B、I、O 三种标记将不同的语块赋予不同的标签,例如:对于一个角色为A的论元,将它所包含的第一个语块赋予标签B-A,将它所包含的其它语块赋予标签I-A,不属于任何论元的语块赋予标签O。</p><p data-anchor-id="8gjd">我们继续以上面的这句话为例,图1展示了BIO表示方法。</p><p align="center" data-anchor-id="a5w5">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/label_semantic_roles/image/bio_example.png" width="900"><br>
图2. BIO标注方法示例
</p><p data-anchor-id="uum5">从上面的例子可以看到,根据序列标注结果可以直接得到论元的语义角色标注结果,是一个相对简单的过程。这种简单性体现在:(1)依赖浅层句法分析,降低了句法分析的要求和难度;(2)没有了候选论元剪除这一步骤;(3)论元的识别和论元标注是同时实现的。这种一体化处理论元识别和论元标注的方法,简化了流程,降低了错误累积的风险,往往能够取得更好的结果。</p><p data-anchor-id="6x8q">与基于语块的SRL方法类似,在本教程中我们也将SRL看作一个序列标注问题,不同的是,我们只依赖输入文本序列,不依赖任何额外的语法解析结果或是复杂的人造特征,利用深度神经网络构建一个端到端学习的SRL系统。我们以<a href="http://www.cs.upc.edu/~srlconll/" target="_blank">CoNLL-2004 and CoNLL-2005 Shared Tasks</a>任务中SRL任务的公开数据集为例,实践下面的任务:给定一句话和这句话里的一个谓词,通过序列标注的方式,从句子中找到谓词对应的论元,同时标注它们的语义角色。</p><div class="md-section-divider"></div><h2 data-anchor-id="h3qc" id="模型概览">模型概览</h2><p data-anchor-id="4a4g">循环神经网络(Recurrent Neural Network)是一种对序列建模的重要模型,在自然语言处理任务中有着广泛地应用。不同于前馈神经网络(Feed-forward Neural Network),RNN能够处理输入之间前后关联的问题。LSTM是RNN的一种重要变种,常用来学习长序列中蕴含的长程依赖关系,我们在<a href="https://github.com/PaddlePaddle/book/tree/develop/understand_sentiment" target="_blank">情感分析</a>一篇中已经介绍过,这一篇中我们依然利用LSTM来解决SRL问题。</p><div class="md-section-divider"></div><h3 data-anchor-id="4qpw" id="栈式循环神经网络stacked-recurrent-neural-network">栈式循环神经网络(Stacked Recurrent Neural Network)</h3><p data-anchor-id="5gfu">深层网络有助于形成层次化特征,网络上层在下层已经学习到的初级特征基础上,形成更复杂的高级特征。尽管LSTM沿时间轴展开后等价于一个非常“深”的前馈网络,但由于LSTM各个时间步参数共享,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-55-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -687.0516853480245 2084.9444444444443 719.103370696049" style="width: 4.865ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="583" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1584" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-55">t-1</script>时刻状态到<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-56-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -647.0516853480245 361.5 679.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use></g></svg></span><script type="math/tex" id="MathJax-Element-56">t</script>时刻的映射,始终只经过了一次非线性映射,也就是说单层LSTM对状态转移的建模是 “浅” 的。堆叠多个LSTM单元,令前一个LSTM<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-57-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -647.0516853480245 361.5 679.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use></g></svg></span><script type="math/tex" id="MathJax-Element-57">t</script>时刻的输出,成为下一个LSTM单元<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-58-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -647.0516853480245 361.5 679.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use></g></svg></span><script type="math/tex" id="MathJax-Element-58">t</script>时刻的输入,帮助我们构建起一个深层网络,我们把它称为第一个版本的栈式循环神经网络。深层网络提高了模型拟合复杂模式的能力,能够更好地建模跨不同时间步的模式[<a href="#参考文献">2</a>]。</p><p data-anchor-id="8qg8">然而,训练一个深层LSTM网络并非易事。纵向堆叠多个LSTM单元可能遇到梯度在纵向深度上传播受阻的问题。通常,堆叠4层LSTM单元可以正常训练,当层数达到4~8层时,会出现性能衰减,这时必须考虑一些新的结构以保证梯度纵向顺畅传播,这是训练深层LSTM网络必须解决的问题。我们可以借鉴LSTM解决 “梯度消失梯度爆炸” 问题的智慧之一:在记忆单元(Memory Cell)这条信息传播的路线上没有非线性映射,当梯度反向传播时既不会衰减、也不会爆炸。因此,深层LSTM模型也可以在纵向上添加一条保证梯度顺畅传播的路径。</p><p data-anchor-id="hgx2">一个LSTM单元完成的运算可以被分为三部分:(1)输入到隐层的映射(input-to-hidden) :每个时间步输入信息<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-59-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -463.0516853480245 572.5 495.10337069604896" style="width: 1.274ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use></g></svg></span><script type="math/tex" id="MathJax-Element-59">x</script>会首先经过一个矩阵映射,再作为遗忘门,输入门,记忆单元,输出门的输入,注意,这一次映射没有引入非线性激活;(2)隐层到隐层的映射(hidden-to-hidden):这一步是LSTM计算的主体,包括遗忘门,输入门,记忆单元更新,输出门的计算;(3)隐层到输出的映射(hidden-to-output):通常是简单的对隐层向量进行激活。我们在第一个版本的栈式网络的基础上,加入一条新的路径:除上一层LSTM输出之外,将前层LSTM的输入到隐层的映射作为的一个新的输入,同时加入一个线性映射去学习一个新的变换。</p><p data-anchor-id="xrg6">图3是最终得到的栈式循环神经网络结构示意图。</p><p align="center" data-anchor-id="jqz2">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/label_semantic_roles/image/stacked_lstm.png" width="400"><br>
图3. 基于LSTM的栈式循环神经网络结构示意图
</p><div class="md-section-divider"></div><h3 data-anchor-id="y4pb" id="双向循环神经网络bidirectional-recurrent-neural-network">双向循环神经网络(Bidirectional Recurrent Neural Network)</h3><p data-anchor-id="6cxf">在LSTM中,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-6-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -647.0516853480245 361.5 679.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use></g></svg></span><script type="math/tex" id="MathJax-Element-6">t</script>时刻的隐藏层向量编码了到<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-7-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -647.0516853480245 361.5 679.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use></g></svg></span><script type="math/tex" id="MathJax-Element-7">t</script>时刻为止所有输入的信息,但<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-8-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -647.0516853480245 361.5 679.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use></g></svg></span><script type="math/tex" id="MathJax-Element-8">t</script>时刻的LSTM可以看到历史,却无法看到未来。在绝大多数自然语言处理任务中,我们几乎总是能拿到整个句子。这种情况下,如果能够像获取历史信息一样,得到未来的信息,对序列学习任务会有很大的帮助。</p><p data-anchor-id="rupi">为了克服这一缺陷,我们可以设计一种双向循环网络单元,它的思想简单且直接:对上一节的栈式循环神经网络进行一个小小的修改,堆叠多个LSTM单元,让每一层LSTM单元分别以:正向、反向、正向 …… 的顺序学习上一层的输出序列。于是,从第2层开始,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-9-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -647.0516853480245 361.5 679.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use></g></svg></span><script type="math/tex" id="MathJax-Element-9">t</script>时刻我们的LSTM单元便总是可以看到历史和未来的信息。图4是基于LSTM的双向循环神经网络结构示意图。</p><p align="center" data-anchor-id="85x1">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/label_semantic_roles/image/bidirectional_stacked_lstm.png" width="500"><br>
图4. 基于LSTM的双向循环神经网络结构示意图
</p><p data-anchor-id="yond">需要说明的是,这种双向RNN结构和Bengio等人在机器翻译任务中使用的双向RNN结构[<a href="#参考文献">3</a>, <a href="#参考文献">4</a>] 并不相同,我们会在后续<a href="https://github.com/PaddlePaddle/book/blob/develop/machine_translation/README.md" target="_blank">机器翻译</a>任务中,介绍另一种双向循环神经网络。</p><div class="md-section-divider"></div><h3 data-anchor-id="bygv" id="条件随机场-conditional-random-field">条件随机场 (Conditional Random Field)</h3><p data-anchor-id="qybp">使用神经网络模型解决问题的思路通常是:前层网络学习输入的特征表示,网络的最后一层在特征基础上完成最终的任务。在SRL任务中,深层LSTM网络学习输入的特征表示,条件随机场(Conditional Random Filed, CRF)在特征的基础上完成序列标注,处于整个网络的末端。</p><p data-anchor-id="nhmk">CRF是一种概率化结构模型,可以看作是一个概率无向图模型,结点表示随机变量,边表示随机变量之间的概率依赖关系。简单来讲,CRF学习条件概率<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-10-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 3425 1042.103370696049" style="width: 7.992ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-50"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="751" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58" x="1141" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C" x="1993" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59" x="2272" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="3035" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-10">P(X|Y)</script>,其中 <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-11-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 8786.487065625814 1042.103370696049" style="width: 20.386ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="1130" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="2186" y="0"></use><g transform="translate(2576,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="809" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="3602" y="0"></use><g transform="translate(4047,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="809" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="5074" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="5519" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="5964" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="6409" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="6854" y="0"></use><g transform="translate(7299,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="809" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="8396" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-11">X = (x_1, x_2, ... , x_n)</script> 是输入序列,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-12-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 8451.487065625814 1042.103370696049" style="width: 19.575ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="1041" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="2097" y="0"></use><g transform="translate(2487,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="693" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="3431" y="0"></use><g transform="translate(3876,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="693" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="4821" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="5266" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="5711" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="6156" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="6601" y="0"></use><g transform="translate(7046,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="693" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="8061" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-12">Y = (y_1, y_2, ... , y_n)</script> 是标记序列;解码过程是给定 <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-13-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 852.5 725.103370696049" style="width: 1.969ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58"></use></g></svg></span><script type="math/tex" id="MathJax-Element-13">X</script>序列求解令<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-14-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 3425 1042.103370696049" style="width: 7.992ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-50"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="751" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59" x="1141" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C" x="1904" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58" x="2183" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="3035" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-14">P(Y|X)</script>最大的<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-15-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 763.5 724.103370696049" style="width: 1.737ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59"></use></g></svg></span><script type="math/tex" id="MathJax-Element-15">Y</script>序列,即<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-16-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 10177.488526975352 1042.103370696049" style="width: 23.629ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2217" x="1157" y="513"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="1549" y="0"></use><g transform="translate(2606,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-61"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-72" x="500" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-67" x="893" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-20" x="1393" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-6D" x="1644" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-61" x="2477" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-78" x="2978" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59" x="4958" y="-343"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-50" x="6752" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="7503" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59" x="7893" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C" x="8656" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58" x="8935" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="9787" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-16">Y^* = \mbox{arg max}_{Y} P(Y | X)</script></p><p data-anchor-id="c62u">序列标注任务只需要考虑输入和输出都是一个线性序列,并且由于我们只是将输入序列作为条件,不做任何条件独立假设,因此输入序列的元素之间并不存在图结构。综上,在序列标注任务中使用的是如图5所示的定义在链式图上的CRF,称之为线性链条件随机场(Linear Chain Conditional Random Field)。</p><p align="center" data-anchor-id="9is4">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/label_semantic_roles/image/linear_chain_crf.png" width="400"><br>
图5. 序列标注任务中使用的线性链条件随机场
</p><p data-anchor-id="r8qo">根据线性链条件随机场上的因子分解定理[<a href="#参考文献">5</a>],在给定观测序列<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-17-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 852.5 725.103370696049" style="width: 1.969ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58"></use></g></svg></span><script type="math/tex" id="MathJax-Element-17">X</script>时,一个特定标记序列<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-18-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 763.5 724.103370696049" style="width: 1.737ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59"></use></g></svg></span><script type="math/tex" id="MathJax-Element-18">Y</script>的概率可以定义为:</p><div class="md-section-divider"></div><p data-anchor-id="tdcj"><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-19-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="-38.5 -1850.7792646367611 31680.014552427256 3201.5585292735223" style="width: 73.552ex; height: 7.413ex; vertical-align: -3.243ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-70"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="503" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59" x="893" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C" x="1656" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58" x="1935" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="2787" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="3454" y="0"></use><g transform="translate(4631,0)"><rect stroke="none" width="2475" height="60" x="0" y="220"></rect><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="987" y="676"></use><g transform="translate(60,-716)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-5A"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="723" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58" x="1113" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="1965" y="0"></use></g></g><g transform="translate(7392,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-65"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-78" x="444" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-70" x="973" y="0"></use></g><g transform="translate(8922,0)"><g transform="translate(0,1830)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ4-239B" x="0" y="-1156"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ4-239D" x="0" y="-2517"></use></g><g transform="translate(875,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ2-2211"></use><g transform="translate(147,-1090)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="345" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1124" y="0"></use></g><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="721" y="1627"></use></g><g transform="translate(2320,0)"><g transform="translate(0,1830)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ4-239B" x="0" y="-1156"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ4-239D" x="0" y="-2517"></use></g><g transform="translate(875,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ2-2211"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6A" x="815" y="-1536"></use></g><g transform="translate(2486,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3BB"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6A" x="825" y="-213"></use></g><g transform="translate(3461,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6A" x="511" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="4215" y="0"></use><g transform="translate(4604,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use><g transform="translate(490,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="345" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1124" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="6343" y="0"></use><g transform="translate(6788,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="693" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="7623" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58" x="8068" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="8921" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="9366" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="9712" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="10323" y="0"></use><g transform="translate(11324,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ2-2211"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6B" x="760" y="-1569"></use></g><g transform="translate(12935,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3BC"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6B" x="853" y="-213"></use></g><g transform="translate(14007,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-73"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6B" x="663" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="14946" y="0"></use><g transform="translate(15335,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="693" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="16170" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58" x="16615" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="17468" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="17913" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="18258" y="0"></use><g transform="translate(18648,1830)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ4-239E" x="0" y="-1155"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ4-23A0" x="0" y="-2517"></use></g></g><g transform="translate(21843,1830)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ4-239E" x="0" y="-1155"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ4-23A0" x="0" y="-2517"></use></g></g></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-19">p(Y | X) = \frac{1}{Z(X)} \text{exp}\left(\sum_{i=1}^{n}\left(\sum_{j}\lambda_{j}t_{j} (y_{i - 1}, y_{i}, X, i) + \sum_{k} \mu_k s_k (y_i, X, i)\right)\right)</script></p><p data-anchor-id="ned9">其中<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-20-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 2355 1042.103370696049" style="width: 5.444ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-5A"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="723" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58" x="1113" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="1965" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-20">Z(X)</script>是归一化因子,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-21-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -647.0516853480245 753.1815472394509 962.0602608392912" style="width: 1.737ex; height: 2.201ex; vertical-align: -0.811ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6A" x="511" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-21">t_j</script> 是定义在边上的特征函数,依赖于当前和前一个位置,称为转移特征,表示对于输入序列<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-22-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 852.5 725.103370696049" style="width: 1.969ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58"></use></g></svg></span><script type="math/tex" id="MathJax-Element-22">X</script>及其标注序列在 <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-23-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -682.0516853480245 345.5 714.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use></g></svg></span><script type="math/tex" id="MathJax-Element-23">i</script><span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-24-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -687.0516853480245 2068.9444444444443 719.103370696049" style="width: 4.749ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="567" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1568" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-24">i - 1</script>位置上标记的转移概率。<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-25-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 938.2561863887845 642.5886520702876" style="width: 2.201ex; height: 1.506ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-73"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6B" x="663" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-25">s_k</script>是定义在结点上的特征函数,称为状态特征,依赖于当前位置,表示对于观察序列<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-26-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 852.5 725.103370696049" style="width: 1.969ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58"></use></g></svg></span><script type="math/tex" id="MathJax-Element-26">X</script>及其<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-27-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -682.0516853480245 345.5 714.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use></g></svg></span><script type="math/tex" id="MathJax-Element-27">i</script>位置的标记概率。<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-28-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 975.1815472394509 1030.0602608392912" style="width: 2.317ex; height: 2.432ex; vertical-align: -0.811ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3BB"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6A" x="825" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-28">\lambda_j</script><span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-29-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -463.0516853480245 1072.2561863887845 700.103370696049" style="width: 2.548ex; height: 1.622ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3BC"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6B" x="853" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-29">\mu_k</script> 分别是转移特征函数和状态特征函数对应的权值。实际上,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-30-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -647.0516853480245 361.5 679.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use></g></svg></span><script type="math/tex" id="MathJax-Element-30">t</script><span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-31-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 469.5 495.10337069604896" style="width: 1.042ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-73"></use></g></svg></span><script type="math/tex" id="MathJax-Element-31">s</script>可以用相同的数学形式表示,再对转移特征和状态特在各个位置<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-32-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -682.0516853480245 345.5 714.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use></g></svg></span><script type="math/tex" id="MathJax-Element-32">i</script>求和有:<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-33-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -811.9875111244761 14451.096586641503 1126.8798169060667" style="width: 33.591ex; height: 2.664ex; vertical-align: -0.811ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6B" x="693" y="-213"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="959" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59" x="1348" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="2112" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58" x="2557" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="3409" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="4077" y="0"></use><g transform="translate(5133,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ1-2211"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="1494" y="675"></use><g transform="translate(1056,-287)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="345" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1124" y="0"></use></g></g><g transform="translate(7605,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6B" x="693" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="8564" y="0"></use><g transform="translate(8954,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use><g transform="translate(490,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="345" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1124" y="0"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="1739" y="0"></use><g transform="translate(2184,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="693" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="3019" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58" x="3464" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="4316" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="4762" y="0"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="14061" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-33">f_{k}(Y, X) = \sum_{i=1}^{n}f_k({y_{i - 1}, y_i, X, i})</script>,把<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-34-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -726.0516853480245 550.5 952.103370696049" style="width: 1.274ex; height: 2.201ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66"></use></g></svg></span><script type="math/tex" id="MathJax-Element-34">f</script>统称为特征函数,于是<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-35-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 3425 1042.103370696049" style="width: 7.992ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-50"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="751" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59" x="1141" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C" x="1904" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58" x="2183" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="3035" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-35">P(Y|X)</script>可表示为:</p><div class="md-section-divider"></div><p data-anchor-id="55mz"><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-36-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="-38.5 -1363.5596853480245 17122.901261666455 2601.2018649949378" style="width: 39.73ex; height: 6.023ex; vertical-align: -3.012ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-70"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="503" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59" x="893" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C" x="1656" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58" x="1935" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="2787" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57" x="3232" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="4281" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="4948" y="0"></use><g transform="translate(6124,0)"><rect stroke="none" width="2475" height="60" x="0" y="220"></rect><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="987" y="676"></use><g transform="translate(60,-716)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-5A"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="723" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58" x="1113" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="1965" y="0"></use></g></g><g transform="translate(8886,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-65"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-78" x="444" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-70" x="973" y="0"></use></g><g transform="translate(10582,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ2-2211"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6B" x="760" y="-1569"></use></g><g transform="translate(12193,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3C9"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6B" x="880" y="-213"></use></g><g transform="translate(13284,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6B" x="693" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="14244" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59" x="14633" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="15397" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58" x="15842" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="16694" y="0"></use></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-36">p(Y|X, W) = \frac{1}{Z(X)}\text{exp}\sum_{k}\omega_{k}f_{k}(Y, X)</script></p><p data-anchor-id="j1lk"><span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-37-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 622.5 497.10337069604896" style="width: 1.39ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3C9"></use></g></svg></span><script type="math/tex" id="MathJax-Element-37">\omega</script>是特征函数对应的权值,是CRF模型要学习的参数。训练时,对于给定的输入序列和对应的标记序列集合<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-38-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 16565.212081659516 1042.103370696049" style="width: 38.456ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-44"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="1106" y="0"></use><g transform="translate(2162,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5B"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="278" y="0"></use><g transform="translate(668,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1171" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="1950" y="0"></use><g transform="translate(2395,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="822" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="3430" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="3820" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="4265" y="0"></use><g transform="translate(4655,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="1171" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="5937" y="0"></use><g transform="translate(6382,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="822" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="7418" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="7807" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="8252" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="8697" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="9143" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="9588" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="10033" y="0"></use><g transform="translate(10422,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-4E" x="1171" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="11979" y="0"></use><g transform="translate(12424,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-4E" x="822" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="13734" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5D" x="14124" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-38">D = \left[(X_1, Y_1), (X_2 , Y_2) , ... , (X_N, Y_N)\right]</script> ,通过正则化的极大似然估计,求解如下优化目标:</p><div class="md-section-divider"></div><p data-anchor-id="8cxb"><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-39-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -1771.0516853480244 21574.709772271945 3042.103370696049" style="width: 50.154ex; height: 7.066ex; vertical-align: -3.012ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-4C"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="681" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3BB" x="1071" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="1654" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-44" x="2099" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="2928" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="3595" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="4651" y="0"></use><g transform="translate(5430,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-6C"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-6F" x="278" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-67" x="779" y="0"></use></g><g transform="translate(6709,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ4-28"></use><g transform="translate(792,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ2-220F" x="123" y="0"></use><g transform="translate(0,-1090)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6D"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="878" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1657" y="0"></use></g><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-4E" x="634" y="1627"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-70" x="2484" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="2988" y="0"></use><g transform="translate(3377,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6D" x="822" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C" x="4680" y="0"></use><g transform="translate(4958,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6D" x="1171" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="6508" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57" x="6953" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="8002" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ4-29" x="8391" y="0"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="16116" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-43" x="17116" y="0"></use><g transform="translate(18164,0)"><rect stroke="none" width="620" height="60" x="0" y="220"></rect><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="60" y="676"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="60" y="-686"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2225" x="19071" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57" x="19571" y="0"></use><g transform="translate(20620,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2225"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="707" y="583"></use></g></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-39">L(\lambda, D) = - \text{log}\left(\prod_{m=1}^{N}p(Y_m|X_m, W)\right) + C \frac{1}{2}\lVert W\rVert^{2}</script></p><p data-anchor-id="n3ra">这个优化目标可以通过反向传播算法和整个神经网络一起求解。解码时,对于给定的输入序列<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-40-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 852.5 725.103370696049" style="width: 1.969ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58"></use></g></svg></span><script type="math/tex" id="MathJax-Element-40">X</script>,通过解码算法(通常有:维特比算法、Beam Search)求令出条件概率<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-41-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -826.8583648847041 3453.675 1097.9100502327285" style="width: 7.992ex; height: 2.548ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-50"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-AF" x="279" y="215"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="780" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59" x="1169" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C" x="1933" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58" x="2211" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="3064" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-41">\bar{P}(Y|X)</script>最大的输出序列 <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-42-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -826.8583648847041 763.5 846.9100502327285" style="width: 1.737ex; height: 1.969ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-AF" x="249" y="215"></use></g></svg></span><script type="math/tex" id="MathJax-Element-42">\bar{Y}</script></p><div class="md-section-divider"></div><h3 data-anchor-id="14xq" id="深度双向lstmdb-lstmsrl模型">深度双向LSTM(DB-LSTM)SRL模型</h3><p data-anchor-id="ilbe">在SRL任务中,输入是 “谓词” 和 “一句话”,目标是从这句话中找到谓词的论元,并标注论元的语义角色。如果一个句子含有<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-43-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 600.5 496.10337069604896" style="width: 1.39ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E"></use></g></svg></span><script type="math/tex" id="MathJax-Element-43">n</script>个谓词,这个句子会被处理<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-44-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 600.5 496.10337069604896" style="width: 1.39ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E"></use></g></svg></span><script type="math/tex" id="MathJax-Element-44">n</script>次。一个最为直接的模型是下面这样:</p><ol data-anchor-id="gcqi">
<li>构造输入; <br>
<ul><li>输入1是谓词,输入2是句子</li>
<li>将输入1扩展成和输入2一样长的序列,用one-hot方式表示;</li></ul></li>
<li>one-hot方式的谓词序列和句子序列通过词表,转换为实向量表示的词向量序列;</li>
<li>将步骤2中的2个词向量序列作为双向LSTM的输入,学习输入序列的特征表示;</li>
<li>CRF以步骤3中模型学习到的特征为输入,以标记序列为监督信号,实现序列标注;</li>
</ol><p data-anchor-id="u97f">大家可以尝试上面这种方法。这里,我们提出一些改进,引入两个简单但对提高系统性能非常有效的特征:</p><ul data-anchor-id="gay3">
<li>谓词上下文:上面的方法中,只用到了谓词的词向量表达谓词相关的所有信息,这种方法始终是非常弱的,特别是如果谓词在句子中出现多次,有可能引起一定的歧义。从经验出发,谓词前后若干个词的一个小片段,能够提供更丰富的信息,帮助消解歧义。于是,我们把这样的经验也添加到模型中,为每个谓词同时抽取一个“谓词上下文” 片段,也就是从这个谓词前后各取<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-45-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 600.5 496.10337069604896" style="width: 1.39ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E"></use></g></svg></span><script type="math/tex" id="MathJax-Element-45">n</script>个词构成的一个窗口片段;</li>
<li>谓词上下文区域标记:为句子中的每一个词引入一个0-1二值变量,表示它们是否在“谓词上下文”片段中;</li>
</ul><p data-anchor-id="1zhr">修改后的模型如下(图6是一个深度为4的模型结构示意图):</p><ol data-anchor-id="1dhg">
<li>构造输入 <br>
<ul><li>输入1是句子序列,输入2是谓词序列,输入3是谓词上下文,从句子中抽取这个谓词前后各<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-46-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 600.5 496.10337069604896" style="width: 1.39ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E"></use></g></svg></span><script type="math/tex" id="MathJax-Element-46">n</script>个词,构成谓词上下文,用one-hot方式表示,输入4是谓词上下文区域标记,标记了句子中每一个词是否在谓词上下文中;</li>
<li>将输入2~3均扩展为和输入1一样长的序列;</li></ul></li>
<li>输入1~4均通过词表取词向量转换为实向量表示的词向量序列;其中输入1、3共享同一个词表,输入2和4各自独有词表;</li>
<li>第2步的4个词向量序列作为双向LSTM模型的输入;LSTM模型学习输入序列的特征表示,得到新的特性表示序列;</li>
<li>CRF以第3步中LSTM学习到的特征为输入,以标记序列为监督信号,完成序列标注;</li>
</ol><p align="center" data-anchor-id="ued5">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/label_semantic_roles/image/db_lstm_network.png" width="500"><br>
图6. SRL任务上的深层双向LSTM模型
</p><div class="md-section-divider"></div><h2 data-anchor-id="phrn" id="数据准备">数据准备</h2><div class="md-section-divider"></div><h3 data-anchor-id="pgof" id="数据介绍与下载">数据介绍与下载</h3><p data-anchor-id="6teo">在此教程中,我们选用<a href="http://www.cs.upc.edu/~srlconll/" target="_blank">CoNLL 2005</a>SRL任务开放出的数据集作为示例。运行 <code>sh ./get_data.sh</code> 会自动从官方网站上下载原始数据。需要特别说明的是,CoNLL 2005 SRL任务的训练数集和开发集在比赛之后并非免费进行公开,目前,能够获取到的只有测试集,包括Wall Street Journal的23节和Brown语料集中的3节。在本教程中,我们以测试集中的WSJ数据为训练集来讲解模型。但是,由于测试集中样本的数量远远不够,如果希望训练一个可用的神经网络SRL系统,请考虑付费获取全量数据。</p><p data-anchor-id="6usr">原始数据中同时包括了词性标注、命名实体识别、语法解析树等多种信息。本教程中,我们使用test.wsj文件夹中的数据进行训练和测试,并只会用到words文件夹(文本序列)和props文件夹(标注结果)下的数据。本教程使用的数据目录如下:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="421q"><ol class="linenums"><li class="L0"><code class="language-text"><span class="pln">conll05st</span><span class="pun">-</span><span class="pln">release</span><span class="pun">/</span></code></li><li class="L1"><code class="language-text"><span class="pun">└──</span><span class="pln"> test</span><span class="pun">.</span><span class="pln">wsj</span></code></li><li class="L2"><code class="language-text"><span class="pln"> </span><span class="pun">├──</span><span class="pln"> props </span><span class="com"># 标注结果</span></code></li><li class="L3"><code class="language-text"><span class="pln"> </span><span class="pun">└──</span><span class="pln"> words </span><span class="com"># 输入文本序列</span></code></li></ol></pre><p data-anchor-id="cmqs">标注信息源自Penn TreeBank[<a href="#参考文献">7</a>]和PropBank[<a href="#参考文献">8</a>]的标注结果。PropBank标注结果的标签和我们在文章一开始示例中使用的标注结果标签不同,但原理是相同的,关于标注结果标签含义的说明,请参考论文[<a href="#参考文献">9</a>]。</p><p data-anchor-id="7om6">除数据之外,<code>get_data.sh</code>同时下载了以下资源:</p><table data-anchor-id="fycd" class="table table-striped-white table-bordered">
<thead>
<tr>
<th>文件名称</th>
<th>说明</th>
</tr>
</thead>
<tbody><tr>
<td>word_dict</td>
<td>输入句子的词典,共计44068个词</td>
</tr>
<tr>
<td>label_dict</td>
<td>标记的词典,共计106个标记</td>
</tr>
<tr>
<td>predicate_dict</td>
<td>谓词的词典,共计3162个词</td>
</tr>
<tr>
<td>emb</td>
<td>一个训练好的词表,32维</td>
</tr>
</tbody></table><p data-anchor-id="3og1">我们在英文维基百科上训练语言模型得到了一份词向量用来初始化SRL模型。在SRL模型训练过程中,词向量不再被更新。关于语言模型和词向量可以参考<a href="https://github.com/PaddlePaddle/book/blob/develop/word2vec/README.md" target="_blank">词向量</a> 这篇教程。我们训练语言模型的语料共有995,000,000个token,词典大小控制为4900,000词。CoNLL 2005训练语料中有5%的词不在这4900,000个词中,我们将它们全部看作未登录词,用<code>&lt;unk&gt;</code>表示。</p><div class="md-section-divider"></div><h3 data-anchor-id="byyx" id="数据预处理">数据预处理</h3><p data-anchor-id="usf0">脚本在下载数据之后,又调用了<code>extract_pair.py</code><code>extract_dict_feature.py</code>两个子脚本进行数据预处理,前者完成了下面的第1步,后者完成了下面的2~4步:</p><ol data-anchor-id="hsaq">
<li>将文本序列和标记序列其合并到一条记录中;</li>
<li>一个句子如果含有<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-47-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 600.5 496.10337069604896" style="width: 1.39ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E"></use></g></svg></span><script type="math/tex" id="MathJax-Element-47">n</script>个谓词,这个句子会被处理<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-48-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 600.5 496.10337069604896" style="width: 1.39ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E"></use></g></svg></span><script type="math/tex" id="MathJax-Element-48">n</script>次,变成<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-49-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 600.5 496.10337069604896" style="width: 1.39ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E"></use></g></svg></span><script type="math/tex" id="MathJax-Element-49">n</script>条独立的训练样本,每个样本一个不同的谓词;</li>
<li>抽取谓词上下文和构造谓词上下文区域标记;</li>
<li>构造以BIO法表示的标记;</li>
</ol><p data-anchor-id="bgzy"><code>data/feature</code>文件是处理好的模型输入,一行是一条训练样本,以"\t"分隔,共9列,分别是:句子序列、谓词、谓词上下文(占 5 列)、谓词上下区域标志、标注序列。下表是一条训练样本的示例。</p><table data-anchor-id="k68g" class="table table-striped-white table-bordered">
<thead>
<tr>
<th>句子序列</th>
<th>谓词</th>
<th>谓词上下文(窗口 = 5)</th>
<th>谓词上下文区域标记</th>
<th>标注序列</th>
</tr>
</thead>
<tbody><tr>
<td>A</td>
<td>set</td>
<td>n't been set . ×</td>
<td>0</td>
<td>B-A1</td>
</tr>
<tr>
<td>record</td>
<td>set</td>
<td>n't been set . ×</td>
<td>0</td>
<td>I-A1</td>
</tr>
<tr>
<td>date</td>
<td>set</td>
<td>n't been set . ×</td>
<td>0</td>
<td>I-A1</td>
</tr>
<tr>
<td>has</td>
<td>set</td>
<td>n't been set . ×</td>
<td>0</td>
<td>O</td>
</tr>
<tr>
<td>n't</td>
<td>set</td>
<td>n't been set . ×</td>
<td>1</td>
<td>B-AM-NEG</td>
</tr>
<tr>
<td>been</td>
<td>set</td>
<td>n't been set . ×</td>
<td>1</td>
<td>O</td>
</tr>
<tr>
<td>set</td>
<td>set</td>
<td>n't been set . ×</td>
<td>1</td>
<td>B-V</td>
</tr>
<tr>
<td>.</td>
<td>set</td>
<td>n't been set . ×</td>
<td>1</td>
<td>O</td>
</tr>
</tbody></table><div class="md-section-divider"></div><h3 data-anchor-id="u6r3" id="提供数据给-paddlepaddle">提供数据给 PaddlePaddle</h3><ol data-anchor-id="d87o">
<li><p>使用hook函数进行PaddlePaddle输入字段的格式定义。</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"><span class="kwd">def</span><span class="pln"> hook</span><span class="pun">(</span><span class="pln">settings</span><span class="pun">,</span><span class="pln"> word_dict</span><span class="pun">,</span><span class="pln"> label_dict</span><span class="pun">,</span><span class="pln"> predicate_dict</span><span class="pun">,</span><span class="pln"> </span><span class="pun">**</span><span class="pln">kwargs</span><span class="pun">):</span></code></li><li class="L1"><code class="language-python"><span class="pln"> settings</span><span class="pun">.</span><span class="pln">word_dict </span><span class="pun">=</span><span class="pln"> word_dict </span><span class="com"># 获取句子序列的字典</span></code></li><li class="L2"><code class="language-python"><span class="pln"> settings</span><span class="pun">.</span><span class="pln">label_dict </span><span class="pun">=</span><span class="pln"> label_dict </span><span class="com"># 获取标记序列的字典</span></code></li><li class="L3"><code class="language-python"><span class="pln"> settings</span><span class="pun">.</span><span class="pln">predicate_dict </span><span class="pun">=</span><span class="pln"> predicate_dict </span><span class="com"># 获取谓词的字典</span></code></li><li class="L4"><code class="language-python"></code></li><li class="L5"><code class="language-python"><span class="pln"> </span><span class="com"># 所有输入特征都是使用one-hot表示序列,在PaddlePaddle中是interger_value_sequence类型</span></code></li><li class="L6"><code class="language-python"><span class="pln"> </span><span class="com"># input_types是一个字典,字典中每个元素对应着配置中的一个data_layer,key恰好就是data_layer的名字</span></code></li><li class="L7"><code class="language-python"></code></li><li class="L8"><code class="language-python"><span class="pln"> settings</span><span class="pun">.</span><span class="pln">input_types </span><span class="pun">=</span><span class="pln"> </span><span class="pun">{</span></code></li><li class="L9"><code class="language-python"><span class="pln"> </span><span class="str">'word_data'</span><span class="pun">:</span><span class="pln"> integer_value_sequence</span><span class="pun">(</span><span class="pln">len</span><span class="pun">(</span><span class="pln">word_dict</span><span class="pun">)),</span><span class="pln"> </span><span class="com"># 句子序列</span></code></li><li class="L0"><code class="language-python"><span class="pln"> </span><span class="str">'ctx_n2_data'</span><span class="pun">:</span><span class="pln"> integer_value_sequence</span><span class="pun">(</span><span class="pln">len</span><span class="pun">(</span><span class="pln">word_dict</span><span class="pun">)),</span><span class="pln"> </span><span class="com"># 谓词上下文中的第1个词</span></code></li><li class="L1"><code class="language-python"><span class="pln"> </span><span class="str">'ctx_n1_data'</span><span class="pun">:</span><span class="pln"> integer_value_sequence</span><span class="pun">(</span><span class="pln">len</span><span class="pun">(</span><span class="pln">word_dict</span><span class="pun">)),</span><span class="pln"> </span><span class="com"># 谓词上下文中的第2个词</span></code></li><li class="L2"><code class="language-python"><span class="pln"> </span><span class="str">'ctx_0_data'</span><span class="pun">:</span><span class="pln"> integer_value_sequence</span><span class="pun">(</span><span class="pln">len</span><span class="pun">(</span><span class="pln">word_dict</span><span class="pun">)),</span><span class="pln"> </span><span class="com"># 谓词上下文中的第3个词</span></code></li><li class="L3"><code class="language-python"><span class="pln"> </span><span class="str">'ctx_p1_data'</span><span class="pun">:</span><span class="pln"> integer_value_sequence</span><span class="pun">(</span><span class="pln">len</span><span class="pun">(</span><span class="pln">word_dict</span><span class="pun">)),</span><span class="pln"> </span><span class="com"># 谓词上下文中的第4个词</span></code></li><li class="L4"><code class="language-python"><span class="pln"> </span><span class="str">'ctx_p2_data'</span><span class="pun">:</span><span class="pln"> integer_value_sequence</span><span class="pun">(</span><span class="pln">len</span><span class="pun">(</span><span class="pln">word_dict</span><span class="pun">)),</span><span class="pln"> </span><span class="com"># 谓词上下文中的第5个词</span></code></li><li class="L5"><code class="language-python"><span class="pln"> </span><span class="str">'verb_data'</span><span class="pun">:</span><span class="pln"> integer_value_sequence</span><span class="pun">(</span><span class="pln">len</span><span class="pun">(</span><span class="pln">predicate_dict</span><span class="pun">)),</span><span class="pln"> </span><span class="com"># 谓词</span></code></li><li class="L6"><code class="language-python"><span class="pln"> </span><span class="str">'mark_data'</span><span class="pun">:</span><span class="pln"> integer_value_sequence</span><span class="pun">(</span><span class="lit">2</span><span class="pun">),</span><span class="pln"> </span><span class="com"># 谓词上下文区域标记</span></code></li><li class="L7"><code class="language-python"><span class="pln"> </span><span class="str">'target'</span><span class="pun">:</span><span class="pln"> integer_value_sequence</span><span class="pun">(</span><span class="pln">len</span><span class="pun">(</span><span class="pln">label_dict</span><span class="pun">))</span><span class="pln"> </span><span class="com"># 标记序列</span></code></li><li class="L8"><code class="language-python"><span class="pln"> </span><span class="pun">}</span></code></li></ol></pre></li>
<li><p>使用process将数据逐一提供给PaddlePaddle,只需要考虑如何从原始数据文件中返回一条训练样本。</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"><span class="kwd">def</span><span class="pln"> process</span><span class="pun">(</span><span class="pln">settings</span><span class="pun">,</span><span class="pln"> file_name</span><span class="pun">):</span></code></li><li class="L1"><code class="language-python"><span class="pln"> </span><span class="kwd">with</span><span class="pln"> open</span><span class="pun">(</span><span class="pln">file_name</span><span class="pun">,</span><span class="pln"> </span><span class="str">'r'</span><span class="pun">)</span><span class="pln"> </span><span class="kwd">as</span><span class="pln"> fdata</span><span class="pun">:</span></code></li><li class="L2"><code class="language-python"><span class="pln"> </span><span class="kwd">for</span><span class="pln"> line </span><span class="kwd">in</span><span class="pln"> fdata</span><span class="pun">:</span></code></li><li class="L3"><code class="language-python"><span class="pln"> sentence</span><span class="pun">,</span><span class="pln"> predicate</span><span class="pun">,</span><span class="pln"> ctx_n2</span><span class="pun">,</span><span class="pln"> ctx_n1</span><span class="pun">,</span><span class="pln"> ctx_0</span><span class="pun">,</span><span class="pln"> ctx_p1</span><span class="pun">,</span><span class="pln"> ctx_p2</span><span class="pun">,</span><span class="pln"> mark</span><span class="pun">,</span><span class="pln"> label </span><span class="pun">=</span><span class="pln"> \</span></code></li><li class="L4"><code class="language-python"><span class="pln"> line</span><span class="pun">.</span><span class="pln">strip</span><span class="pun">().</span><span class="pln">split</span><span class="pun">(</span><span class="str">'\t'</span><span class="pun">)</span></code></li><li class="L5"><code class="language-python"></code></li><li class="L6"><code class="language-python"><span class="pln"> </span><span class="com"># 句子文本</span></code></li><li class="L7"><code class="language-python"><span class="pln"> words </span><span class="pun">=</span><span class="pln"> sentence</span><span class="pun">.</span><span class="pln">split</span><span class="pun">()</span></code></li><li class="L8"><code class="language-python"><span class="pln"> sen_len </span><span class="pun">=</span><span class="pln"> len</span><span class="pun">(</span><span class="pln">words</span><span class="pun">)</span></code></li><li class="L9"><code class="language-python"><span class="pln"> word_slot </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[</span><span class="pln">settings</span><span class="pun">.</span><span class="pln">word_dict</span><span class="pun">.</span><span class="pln">get</span><span class="pun">(</span><span class="pln">w</span><span class="pun">,</span><span class="pln"> UNK_IDX</span><span class="pun">)</span><span class="pln"> </span><span class="kwd">for</span><span class="pln"> w </span><span class="kwd">in</span><span class="pln"> words</span><span class="pun">]</span></code></li><li class="L0"><code class="language-python"></code></li><li class="L1"><code class="language-python"><span class="pln"> </span><span class="com"># 一个谓词,这里将谓词扩展成一个和句子一样长的序列</span></code></li><li class="L2"><code class="language-python"><span class="pln"> predicate_slot </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[</span><span class="pln">settings</span><span class="pun">.</span><span class="pln">predicate_dict</span><span class="pun">.</span><span class="pln">get</span><span class="pun">(</span><span class="pln">predicate</span><span class="pun">)]</span><span class="pln"> </span><span class="pun">*</span><span class="pln"> sen_len</span></code></li><li class="L3"><code class="language-python"></code></li><li class="L4"><code class="language-python"><span class="pln"> </span><span class="com"># 在教程中,我们使用一个窗口为 5 的谓词上下文窗口:谓词和这个谓词前后隔两个词</span></code></li><li class="L5"><code class="language-python"><span class="pln"> </span><span class="com"># 这里会将窗口中的每一个词,扩展成和输入句子一样长的序列</span></code></li><li class="L6"><code class="language-python"><span class="pln"> ctx_n2_slot </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[</span><span class="pln">settings</span><span class="pun">.</span><span class="pln">word_dict</span><span class="pun">.</span><span class="pln">get</span><span class="pun">(</span><span class="pln">ctx_n2</span><span class="pun">,</span><span class="pln"> UNK_IDX</span><span class="pun">)]</span><span class="pln"> </span><span class="pun">*</span><span class="pln"> sen_len</span></code></li><li class="L7"><code class="language-python"><span class="pln"> ctx_n1_slot </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[</span><span class="pln">settings</span><span class="pun">.</span><span class="pln">word_dict</span><span class="pun">.</span><span class="pln">get</span><span class="pun">(</span><span class="pln">ctx_n1</span><span class="pun">,</span><span class="pln"> UNK_IDX</span><span class="pun">)]</span><span class="pln"> </span><span class="pun">*</span><span class="pln"> sen_len</span></code></li><li class="L8"><code class="language-python"><span class="pln"> ctx_0_slot </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[</span><span class="pln">settings</span><span class="pun">.</span><span class="pln">word_dict</span><span class="pun">.</span><span class="pln">get</span><span class="pun">(</span><span class="pln">ctx_0</span><span class="pun">,</span><span class="pln"> UNK_IDX</span><span class="pun">)]</span><span class="pln"> </span><span class="pun">*</span><span class="pln"> sen_len</span></code></li><li class="L9"><code class="language-python"><span class="pln"> ctx_p1_slot </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[</span><span class="pln">settings</span><span class="pun">.</span><span class="pln">word_dict</span><span class="pun">.</span><span class="pln">get</span><span class="pun">(</span><span class="pln">ctx_p1</span><span class="pun">,</span><span class="pln"> UNK_IDX</span><span class="pun">)]</span><span class="pln"> </span><span class="pun">*</span><span class="pln"> sen_len</span></code></li><li class="L0"><code class="language-python"><span class="pln"> ctx_p2_slot </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[</span><span class="pln">settings</span><span class="pun">.</span><span class="pln">word_dict</span><span class="pun">.</span><span class="pln">get</span><span class="pun">(</span><span class="pln">ctx_p2</span><span class="pun">,</span><span class="pln"> UNK_IDX</span><span class="pun">)]</span><span class="pln"> </span><span class="pun">*</span><span class="pln"> sen_len</span></code></li><li class="L1"><code class="language-python"></code></li><li class="L2"><code class="language-python"><span class="pln"> </span><span class="com"># 谓词上下文区域标记,是一个二值特征</span></code></li><li class="L3"><code class="language-python"><span class="pln"> marks </span><span class="pun">=</span><span class="pln"> mark</span><span class="pun">.</span><span class="pln">split</span><span class="pun">()</span></code></li><li class="L4"><code class="language-python"><span class="pln"> mark_slot </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[</span><span class="pln">int</span><span class="pun">(</span><span class="pln">w</span><span class="pun">)</span><span class="pln"> </span><span class="kwd">for</span><span class="pln"> w </span><span class="kwd">in</span><span class="pln"> marks</span><span class="pun">]</span></code></li><li class="L5"><code class="language-python"></code></li><li class="L6"><code class="language-python"><span class="pln"> label_list </span><span class="pun">=</span><span class="pln"> label</span><span class="pun">.</span><span class="pln">split</span><span class="pun">()</span></code></li><li class="L7"><code class="language-python"><span class="pln"> label_slot </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[</span><span class="pln">settings</span><span class="pun">.</span><span class="pln">label_dict</span><span class="pun">.</span><span class="pln">get</span><span class="pun">(</span><span class="pln">w</span><span class="pun">)</span><span class="pln"> </span><span class="kwd">for</span><span class="pln"> w </span><span class="kwd">in</span><span class="pln"> label_list</span><span class="pun">]</span></code></li><li class="L8"><code class="language-python"><span class="pln"> </span><span class="kwd">yield</span><span class="pln"> </span><span class="pun">{</span></code></li><li class="L9"><code class="language-python"><span class="pln"> </span><span class="str">'word_data'</span><span class="pun">:</span><span class="pln"> word_slot</span><span class="pun">,</span></code></li><li class="L0"><code class="language-python"><span class="pln"> </span><span class="str">'ctx_n2_data'</span><span class="pun">:</span><span class="pln"> ctx_n2_slot</span><span class="pun">,</span></code></li><li class="L1"><code class="language-python"><span class="pln"> </span><span class="str">'ctx_n1_data'</span><span class="pun">:</span><span class="pln"> ctx_n1_slot</span><span class="pun">,</span></code></li><li class="L2"><code class="language-python"><span class="pln"> </span><span class="str">'ctx_0_data'</span><span class="pun">:</span><span class="pln"> ctx_0_slot</span><span class="pun">,</span></code></li><li class="L3"><code class="language-python"><span class="pln"> </span><span class="str">'ctx_p1_data'</span><span class="pun">:</span><span class="pln"> ctx_p1_slot</span><span class="pun">,</span></code></li><li class="L4"><code class="language-python"><span class="pln"> </span><span class="str">'ctx_p2_data'</span><span class="pun">:</span><span class="pln"> ctx_p2_slot</span><span class="pun">,</span></code></li><li class="L5"><code class="language-python"><span class="pln"> </span><span class="str">'verb_data'</span><span class="pun">:</span><span class="pln"> predicate_slot</span><span class="pun">,</span></code></li><li class="L6"><code class="language-python"><span class="pln"> </span><span class="str">'mark_data'</span><span class="pun">:</span><span class="pln"> mark_slot</span><span class="pun">,</span></code></li><li class="L7"><code class="language-python"><span class="pln"> </span><span class="str">'target'</span><span class="pun">:</span><span class="pln"> label_slot</span></code></li><li class="L8"><code class="language-python"><span class="pln"> </span><span class="pun">}</span><span class="pln"> </span></code></li></ol></pre></li>
</ol><div class="md-section-divider"></div><h2 data-anchor-id="sy5y" id="模型配置说明">模型配置说明</h2><div class="md-section-divider"></div><h3 data-anchor-id="22j1" id="数据定义">数据定义</h3><p data-anchor-id="9w8p">首先通过 define_py_data_sources2 从dataprovider中读入数据。配置文件中会读取三个字典:输入文本序列的字典、标记的字典、谓词的字典,并传给data provider,data provider会利用这三个字典,将相应的文本输入转换成one-hot序列。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="7pq1"><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">define_py_data_sources2</span><span class="pun">(</span></code></li><li class="L1"><code class="language-python"><span class="pln"> train_list</span><span class="pun">=</span><span class="pln">train_list_file</span><span class="pun">,</span></code></li><li class="L2"><code class="language-python"><span class="pln"> test_list</span><span class="pun">=</span><span class="pln">test_list_file</span><span class="pun">,</span></code></li><li class="L3"><code class="language-python"><span class="pln"> module</span><span class="pun">=</span><span class="str">'dataprovider'</span><span class="pun">,</span></code></li><li class="L4"><code class="language-python"><span class="pln"> obj</span><span class="pun">=</span><span class="str">'process'</span><span class="pun">,</span></code></li><li class="L5"><code class="language-python"><span class="pln"> args</span><span class="pun">={</span></code></li><li class="L6"><code class="language-python"><span class="pln"> </span><span class="str">'word_dict'</span><span class="pun">:</span><span class="pln"> word_dict</span><span class="pun">,</span><span class="pln"> </span><span class="com"># 输入文本序列的字典</span></code></li><li class="L7"><code class="language-python"><span class="pln"> </span><span class="str">'label_dict'</span><span class="pun">:</span><span class="pln"> label_dict</span><span class="pun">,</span><span class="pln"> </span><span class="com"># 标记的字典</span></code></li><li class="L8"><code class="language-python"><span class="pln"> </span><span class="str">'predicate_dict'</span><span class="pun">:</span><span class="pln"> predicate_dict </span><span class="com"># 谓词的词典</span></code></li><li class="L9"><code class="language-python"><span class="pln"> </span><span class="pun">}</span></code></li><li class="L0"><code class="language-python"><span class="pun">)</span></code></li></ol></pre><div class="md-section-divider"></div><h3 data-anchor-id="7i1q" id="算法配置">算法配置</h3><p data-anchor-id="vb6u">在这里,我们指定了模型的训练参数,选择了<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-50-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 1135.406943983867 874.8104774772355" style="width: 2.664ex; height: 2.085ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-4C"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="963" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-50">L_2</script>正则、学习率和batch size,并使用带Momentum的随机梯度下降法作为优化算法。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="tsp4"><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">settings</span><span class="pun">(</span></code></li><li class="L1"><code class="language-python"><span class="pln"> batch_size</span><span class="pun">=</span><span class="lit">150</span><span class="pun">,</span></code></li><li class="L2"><code class="language-python"><span class="pln"> learning_method</span><span class="pun">=</span><span class="typ">MomentumOptimizer</span><span class="pun">(</span><span class="pln">momentum</span><span class="pun">=</span><span class="lit">0</span><span class="pun">),</span></code></li><li class="L3"><code class="language-python"><span class="pln"> learning_rate</span><span class="pun">=</span><span class="lit">2e-2</span><span class="pun">,</span></code></li><li class="L4"><code class="language-python"><span class="pln"> regularization</span><span class="pun">=</span><span class="pln">L2Regularization</span><span class="pun">(</span><span class="lit">8e-4</span><span class="pun">),</span></code></li><li class="L5"><code class="language-python"><span class="pln"> model_average</span><span class="pun">=</span><span class="typ">ModelAverage</span><span class="pun">(</span><span class="pln">average_window</span><span class="pun">=</span><span class="lit">0.5</span><span class="pun">,</span><span class="pln"> max_average_window</span><span class="pun">=</span><span class="lit">10000</span><span class="pun">)</span></code></li><li class="L6"><code class="language-python"><span class="pun">)</span></code></li></ol></pre><div class="md-section-divider"></div><h3 data-anchor-id="19yq" id="模型结构">模型结构</h3><ol data-anchor-id="043w">
<li><p>定义输入数据维度及模型超参数。</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">mark_dict_len </span><span class="pun">=</span><span class="pln"> </span><span class="lit">2</span><span class="pln"> </span><span class="com"># 谓上下文区域标志的维度,是一个0-1 2值特征,因此维度为2</span></code></li><li class="L1"><code class="language-python"><span class="pln">word_dim </span><span class="pun">=</span><span class="pln"> </span><span class="lit">32</span><span class="pln"> </span><span class="com"># 词向量维度</span></code></li><li class="L2"><code class="language-python"><span class="pln">mark_dim </span><span class="pun">=</span><span class="pln"> </span><span class="lit">5</span><span class="pln"> </span><span class="com"># 谓词上下文区域通过词表被映射为一个实向量,这个是相邻的维度</span></code></li><li class="L3"><code class="language-python"><span class="pln">hidden_dim </span><span class="pun">=</span><span class="pln"> </span><span class="lit">512</span><span class="pln"> </span><span class="com"># LSTM隐层向量的维度 : 512 / 4</span></code></li><li class="L4"><code class="language-python"><span class="pln">depth </span><span class="pun">=</span><span class="pln"> </span><span class="lit">8</span><span class="pln"> </span><span class="com"># 栈式LSTM的深度</span></code></li><li class="L5"><code class="language-python"></code></li><li class="L6"><code class="language-python"><span class="pln">word </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="pln">name</span><span class="pun">=</span><span class="str">'word_data'</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">word_dict_len</span><span class="pun">)</span></code></li><li class="L7"><code class="language-python"><span class="pln">predicate </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="pln">name</span><span class="pun">=</span><span class="str">'verb_data'</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">pred_len</span><span class="pun">)</span></code></li><li class="L8"><code class="language-python"></code></li><li class="L9"><code class="language-python"><span class="pln">ctx_n2 </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="pln">name</span><span class="pun">=</span><span class="str">'ctx_n2_data'</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">word_dict_len</span><span class="pun">)</span></code></li><li class="L0"><code class="language-python"><span class="pln">ctx_n1 </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="pln">name</span><span class="pun">=</span><span class="str">'ctx_n1_data'</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">word_dict_len</span><span class="pun">)</span></code></li><li class="L1"><code class="language-python"><span class="pln">ctx_0 </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="pln">name</span><span class="pun">=</span><span class="str">'ctx_0_data'</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">word_dict_len</span><span class="pun">)</span></code></li><li class="L2"><code class="language-python"><span class="pln">ctx_p1 </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="pln">name</span><span class="pun">=</span><span class="str">'ctx_p1_data'</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">word_dict_len</span><span class="pun">)</span></code></li><li class="L3"><code class="language-python"><span class="pln">ctx_p2 </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="pln">name</span><span class="pun">=</span><span class="str">'ctx_p2_data'</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">word_dict_len</span><span class="pun">)</span></code></li><li class="L4"><code class="language-python"><span class="pln">mark </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="pln">name</span><span class="pun">=</span><span class="str">'mark_data'</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">mark_dict_len</span><span class="pun">)</span></code></li><li class="L5"><code class="language-python"></code></li><li class="L6"><code class="language-python"><span class="kwd">if</span><span class="pln"> </span><span class="kwd">not</span><span class="pln"> is_predict</span><span class="pun">:</span></code></li><li class="L7"><code class="language-python"><span class="pln"> target </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="pln">name</span><span class="pun">=</span><span class="str">'target'</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">label_dict_len</span><span class="pun">)</span><span class="pln"> </span><span class="com"># 标记序列只在训练和测试流程中定义</span></code></li></ol></pre>
<p>这里需要特别说明的是hidden_dim = 512指定了LSTM隐层向量的维度为128维,关于这一点请参考PaddlePaddle官方文档中<a href="http://www.paddlepaddle.org/doc/ui/api/trainer_config_helpers/layers.html#lstmemory" target="_blank">lstmemory</a>的说明。</p></li>
<li><p>将句子序列、谓词、谓词上下文、谓词上下文区域标记通过词表,转换为实向量表示的词向量序列。</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"></code></li><li class="L1"><code class="language-python"></code></li><li class="L2"><code class="language-python"><span class="com"># 在本教程中,我们加载了预训练的词向量,这里设置了:is_static=True</span></code></li><li class="L3"><code class="language-python"></code></li><li class="L4"><code class="language-python"></code></li><li class="L5"><code class="language-python"><span class="com"># is_static 为 True 时保证了在训练 SRL 模型过程中,词表不再更新</span></code></li><li class="L6"><code class="language-python"></code></li><li class="L7"><code class="language-python"><span class="pln">emb_para </span><span class="pun">=</span><span class="pln"> </span><span class="typ">ParameterAttribute</span><span class="pun">(</span><span class="pln">name</span><span class="pun">=</span><span class="str">'emb'</span><span class="pun">,</span><span class="pln"> initial_std</span><span class="pun">=</span><span class="lit">0.</span><span class="pun">,</span><span class="pln"> is_static</span><span class="pun">=</span><span class="kwd">True</span><span class="pun">)</span></code></li><li class="L8"><code class="language-python"></code></li><li class="L9"><code class="language-python"><span class="pln">word_input </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[</span><span class="pln">word</span><span class="pun">,</span><span class="pln"> ctx_n2</span><span class="pun">,</span><span class="pln"> ctx_n1</span><span class="pun">,</span><span class="pln"> ctx_0</span><span class="pun">,</span><span class="pln"> ctx_p1</span><span class="pun">,</span><span class="pln"> ctx_p2</span><span class="pun">]</span></code></li><li class="L0"><code class="language-python"><span class="pln">emb_layers </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[</span></code></li><li class="L1"><code class="language-python"><span class="pln"> embedding_layer</span><span class="pun">(</span></code></li><li class="L2"><code class="language-python"><span class="pln"> size</span><span class="pun">=</span><span class="pln">word_dim</span><span class="pun">,</span><span class="pln"> input</span><span class="pun">=</span><span class="pln">x</span><span class="pun">,</span><span class="pln"> param_attr</span><span class="pun">=</span><span class="pln">emb_para</span><span class="pun">)</span><span class="pln"> </span><span class="kwd">for</span><span class="pln"> x </span><span class="kwd">in</span><span class="pln"> word_input</span></code></li><li class="L3"><code class="language-python"><span class="pun">]</span></code></li><li class="L4"><code class="language-python"><span class="pln">emb_layers</span><span class="pun">.</span><span class="pln">append</span><span class="pun">(</span><span class="pln">predicate_embedding</span><span class="pun">)</span></code></li><li class="L5"><code class="language-python"><span class="pln">mark_embedding </span><span class="pun">=</span><span class="pln"> embedding_layer</span><span class="pun">(</span></code></li><li class="L6"><code class="language-python"><span class="pln"> name</span><span class="pun">=</span><span class="str">'word_ctx-in_embedding'</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">mark_dim</span><span class="pun">,</span><span class="pln"> input</span><span class="pun">=</span><span class="pln">mark</span><span class="pun">,</span><span class="pln"> param_attr</span><span class="pun">=</span><span class="pln">std_0</span><span class="pun">)</span></code></li><li class="L7"><code class="language-python"><span class="pln">emb_layers</span><span class="pun">.</span><span class="pln">append</span><span class="pun">(</span><span class="pln">mark_embedding</span><span class="pun">)</span></code></li></ol></pre></li>
<li><p>8个LSTM单元以“正向/反向”的顺序对所有输入序列进行学习。</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"></code></li><li class="L1"><code class="language-python"><span class="com"># std_0 指定的参数以均值为0的高斯分布初始化,用在LSTM的bias初始化中 </span></code></li><li class="L2"><code class="language-python"></code></li><li class="L3"><code class="language-python"><span class="pln">std_0 </span><span class="pun">=</span><span class="pln"> </span><span class="typ">ParameterAttribute</span><span class="pun">(</span><span class="pln">initial_std</span><span class="pun">=</span><span class="lit">0.</span><span class="pun">)</span></code></li><li class="L4"><code class="language-python"></code></li><li class="L5"><code class="language-python"><span class="pln">hidden_0 </span><span class="pun">=</span><span class="pln"> mixed_layer</span><span class="pun">(</span></code></li><li class="L6"><code class="language-python"><span class="pln"> name</span><span class="pun">=</span><span class="str">'hidden0'</span><span class="pun">,</span></code></li><li class="L7"><code class="language-python"><span class="pln"> size</span><span class="pun">=</span><span class="pln">hidden_dim</span><span class="pun">,</span></code></li><li class="L8"><code class="language-python"><span class="pln"> bias_attr</span><span class="pun">=</span><span class="pln">std_default</span><span class="pun">,</span></code></li><li class="L9"><code class="language-python"><span class="pln"> input</span><span class="pun">=[</span></code></li><li class="L0"><code class="language-python"><span class="pln"> full_matrix_projection</span><span class="pun">(</span></code></li><li class="L1"><code class="language-python"><span class="pln"> input</span><span class="pun">=</span><span class="pln">emb</span><span class="pun">,</span><span class="pln"> param_attr</span><span class="pun">=</span><span class="pln">std_default</span><span class="pun">)</span><span class="pln"> </span><span class="kwd">for</span><span class="pln"> emb </span><span class="kwd">in</span><span class="pln"> emb_layers</span></code></li><li class="L2"><code class="language-python"><span class="pln"> </span><span class="pun">])</span></code></li><li class="L3"><code class="language-python"><span class="pln">lstm_0 </span><span class="pun">=</span><span class="pln"> lstmemory</span><span class="pun">(</span></code></li><li class="L4"><code class="language-python"><span class="pln"> name</span><span class="pun">=</span><span class="str">'lstm0'</span><span class="pun">,</span></code></li><li class="L5"><code class="language-python"><span class="pln"> input</span><span class="pun">=</span><span class="pln">hidden_0</span><span class="pun">,</span></code></li><li class="L6"><code class="language-python"><span class="pln"> act</span><span class="pun">=</span><span class="typ">ReluActivation</span><span class="pun">(),</span></code></li><li class="L7"><code class="language-python"><span class="pln"> gate_act</span><span class="pun">=</span><span class="typ">SigmoidActivation</span><span class="pun">(),</span></code></li><li class="L8"><code class="language-python"><span class="pln"> state_act</span><span class="pun">=</span><span class="typ">SigmoidActivation</span><span class="pun">(),</span></code></li><li class="L9"><code class="language-python"><span class="pln"> bias_attr</span><span class="pun">=</span><span class="pln">std_0</span><span class="pun">,</span></code></li><li class="L0"><code class="language-python"><span class="pln"> param_attr</span><span class="pun">=</span><span class="pln">lstm_para_attr</span><span class="pun">)</span></code></li><li class="L1"><code class="language-python"><span class="pln"> input_tmp </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[</span><span class="pln">hidden_0</span><span class="pun">,</span><span class="pln"> lstm_0</span><span class="pun">]</span></code></li><li class="L2"><code class="language-python"></code></li><li class="L3"><code class="language-python"><span class="kwd">for</span><span class="pln"> i </span><span class="kwd">in</span><span class="pln"> range</span><span class="pun">(</span><span class="lit">1</span><span class="pun">,</span><span class="pln"> depth</span><span class="pun">):</span></code></li><li class="L4"><code class="language-python"><span class="pln"> mix_hidden </span><span class="pun">=</span><span class="pln"> mixed_layer</span><span class="pun">(</span></code></li><li class="L5"><code class="language-python"><span class="pln"> name</span><span class="pun">=</span><span class="str">'hidden'</span><span class="pln"> </span><span class="pun">+</span><span class="pln"> str</span><span class="pun">(</span><span class="pln">i</span><span class="pun">),</span></code></li><li class="L6"><code class="language-python"><span class="pln"> size</span><span class="pun">=</span><span class="pln">hidden_dim</span><span class="pun">,</span></code></li><li class="L7"><code class="language-python"><span class="pln"> bias_attr</span><span class="pun">=</span><span class="pln">std_default</span><span class="pun">,</span></code></li><li class="L8"><code class="language-python"><span class="pln"> input</span><span class="pun">=[</span></code></li><li class="L9"><code class="language-python"><span class="pln"> full_matrix_projection</span><span class="pun">(</span></code></li><li class="L0"><code class="language-python"><span class="pln"> input</span><span class="pun">=</span><span class="pln">input_tmp</span><span class="pun">[</span><span class="lit">0</span><span class="pun">],</span><span class="pln"> param_attr</span><span class="pun">=</span><span class="pln">hidden_para_attr</span><span class="pun">),</span></code></li><li class="L1"><code class="language-python"><span class="pln"> full_matrix_projection</span><span class="pun">(</span></code></li><li class="L2"><code class="language-python"><span class="pln"> input</span><span class="pun">=</span><span class="pln">input_tmp</span><span class="pun">[</span><span class="lit">1</span><span class="pun">],</span><span class="pln"> param_attr</span><span class="pun">=</span><span class="pln">lstm_para_attr</span><span class="pun">)</span></code></li><li class="L3"><code class="language-python"><span class="pln"> </span><span class="pun">])</span></code></li><li class="L4"><code class="language-python"><span class="pln"> lstm </span><span class="pun">=</span><span class="pln"> lstmemory</span><span class="pun">(</span></code></li><li class="L5"><code class="language-python"><span class="pln"> name</span><span class="pun">=</span><span class="str">'lstm'</span><span class="pln"> </span><span class="pun">+</span><span class="pln"> str</span><span class="pun">(</span><span class="pln">i</span><span class="pun">),</span></code></li><li class="L6"><code class="language-python"><span class="pln"> input</span><span class="pun">=</span><span class="pln">mix_hidden</span><span class="pun">,</span></code></li><li class="L7"><code class="language-python"><span class="pln"> act</span><span class="pun">=</span><span class="typ">ReluActivation</span><span class="pun">(),</span></code></li><li class="L8"><code class="language-python"><span class="pln"> gate_act</span><span class="pun">=</span><span class="typ">SigmoidActivation</span><span class="pun">(),</span></code></li><li class="L9"><code class="language-python"><span class="pln"> state_act</span><span class="pun">=</span><span class="typ">SigmoidActivation</span><span class="pun">(),</span></code></li><li class="L0"><code class="language-python"><span class="pln"> reverse</span><span class="pun">=((</span><span class="pln">i </span><span class="pun">%</span><span class="pln"> </span><span class="lit">2</span><span class="pun">)</span><span class="pln"> </span><span class="pun">==</span><span class="pln"> </span><span class="lit">1</span><span class="pun">),</span></code></li><li class="L1"><code class="language-python"><span class="pln"> bias_attr</span><span class="pun">=</span><span class="pln">std_0</span><span class="pun">,</span></code></li><li class="L2"><code class="language-python"><span class="pln"> param_attr</span><span class="pun">=</span><span class="pln">lstm_para_attr</span><span class="pun">)</span></code></li><li class="L3"><code class="language-python"></code></li><li class="L4"><code class="language-python"><span class="pln"> input_tmp </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[</span><span class="pln">mix_hidden</span><span class="pun">,</span><span class="pln"> lstm</span><span class="pun">]</span></code></li></ol></pre></li>
<li><p>取最后一个栈式LSTM的输出和这个LSTM单元的输入到隐层映射,经过一个全连接层映射到标记字典的维度,得到最终的特征向量表示。</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">feature_out </span><span class="pun">=</span><span class="pln"> mixed_layer</span><span class="pun">(</span></code></li><li class="L1"><code class="language-python"><span class="pln"> name</span><span class="pun">=</span><span class="str">'output'</span><span class="pun">,</span></code></li><li class="L2"><code class="language-python"><span class="pln"> size</span><span class="pun">=</span><span class="pln">label_dict_len</span><span class="pun">,</span></code></li><li class="L3"><code class="language-python"><span class="pln"> bias_attr</span><span class="pun">=</span><span class="pln">std_default</span><span class="pun">,</span></code></li><li class="L4"><code class="language-python"><span class="pln"> input</span><span class="pun">=[</span></code></li><li class="L5"><code class="language-python"><span class="pln"> full_matrix_projection</span><span class="pun">(</span></code></li><li class="L6"><code class="language-python"><span class="pln"> input</span><span class="pun">=</span><span class="pln">input_tmp</span><span class="pun">[</span><span class="lit">0</span><span class="pun">],</span><span class="pln"> param_attr</span><span class="pun">=</span><span class="pln">hidden_para_attr</span><span class="pun">),</span></code></li><li class="L7"><code class="language-python"><span class="pln"> full_matrix_projection</span><span class="pun">(</span></code></li><li class="L8"><code class="language-python"><span class="pln"> input</span><span class="pun">=</span><span class="pln">input_tmp</span><span class="pun">[</span><span class="lit">1</span><span class="pun">],</span><span class="pln"> param_attr</span><span class="pun">=</span><span class="pln">lstm_para_attr</span><span class="pun">)</span></code></li><li class="L9"><code class="language-python"><span class="pln"> </span><span class="pun">],</span><span class="pln"> </span><span class="pun">)</span><span class="pln"> </span></code></li></ol></pre></li>
<li><p>CRF层在网络的末端,完成序列标注。</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">crf_l </span><span class="pun">=</span><span class="pln"> crf_layer</span><span class="pun">(</span></code></li><li class="L1"><code class="language-python"><span class="pln"> name</span><span class="pun">=</span><span class="str">'crf'</span><span class="pun">,</span></code></li><li class="L2"><code class="language-python"><span class="pln"> size</span><span class="pun">=</span><span class="pln">label_dict_len</span><span class="pun">,</span></code></li><li class="L3"><code class="language-python"><span class="pln"> input</span><span class="pun">=</span><span class="pln">feature_out</span><span class="pun">,</span></code></li><li class="L4"><code class="language-python"><span class="pln"> label</span><span class="pun">=</span><span class="pln">target</span><span class="pun">,</span></code></li><li class="L5"><code class="language-python"><span class="pln"> param_attr</span><span class="pun">=</span><span class="typ">ParameterAttribute</span><span class="pun">(</span></code></li><li class="L6"><code class="language-python"><span class="pln"> name</span><span class="pun">=</span><span class="str">'crfw'</span><span class="pun">,</span><span class="pln"> initial_std</span><span class="pun">=</span><span class="pln">default_std</span><span class="pun">,</span><span class="pln"> learning_rate</span><span class="pun">=</span><span class="pln">mix_hidden_lr</span><span class="pun">))</span></code></li></ol></pre></li>
</ol><div class="md-section-divider"></div><h2 data-anchor-id="ynqw" id="训练模型">训练模型</h2><p data-anchor-id="uaau">执行<code>sh train.sh</code>进行模型的训练,其中指定了总共需要训练150个pass。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="ltum"><ol class="linenums"><li class="L0"><code class="language-bash"><span class="pln">paddle train \</span></code></li><li class="L1"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">config</span><span class="pun">=./</span><span class="pln">db_lstm</span><span class="pun">.</span><span class="pln">py \</span></code></li><li class="L2"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">save_dir</span><span class="pun">=./</span><span class="pln">output \</span></code></li><li class="L3"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">trainer_count</span><span class="pun">=</span><span class="lit">1</span><span class="pln"> \</span></code></li><li class="L4"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">dot_period</span><span class="pun">=</span><span class="lit">500</span><span class="pln"> \</span></code></li><li class="L5"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">log_period</span><span class="pun">=</span><span class="lit">10</span><span class="pln"> \</span></code></li><li class="L6"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">num_passes</span><span class="pun">=</span><span class="lit">200</span><span class="pln"> \</span></code></li><li class="L7"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">use_gpu</span><span class="pun">=</span><span class="pln">false \</span></code></li><li class="L8"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">show_parameter_stats_period</span><span class="pun">=</span><span class="lit">10</span><span class="pln"> \</span></code></li><li class="L9"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">test_all_data_in_one_period</span><span class="pun">=</span><span class="lit">1</span><span class="pln"> \</span></code></li><li class="L0"><code class="language-bash"><span class="lit">2</span><span class="pun">&gt;&amp;</span><span class="lit">1</span><span class="pln"> </span><span class="pun">|</span><span class="pln"> tee </span><span class="str">'train.log'</span></code></li></ol></pre><p data-anchor-id="56he">训练日志示例如下。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="alxt"><ol class="linenums"><li class="L0"><code class="language-text"><span class="pln">I1224 </span><span class="lit">18</span><span class="pun">:</span><span class="lit">11</span><span class="pun">:</span><span class="lit">53.661479</span><span class="pln"> </span><span class="lit">1433</span><span class="pln"> </span><span class="typ">TrainerInternal</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">165</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Batch</span><span class="pun">=</span><span class="lit">880</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">145305</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">2.11541</span><span class="pln"> </span><span class="typ">CurrentCost</span><span class="pun">=</span><span class="lit">1.8645</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> __sum_evaluator_0__</span><span class="pun">=</span><span class="lit">0.607942</span><span class="pln"> </span><span class="typ">CurrentEval</span><span class="pun">:</span><span class="pln"> __sum_evaluator_0__</span><span class="pun">=</span><span class="lit">0.59322</span></code></li><li class="L1"><code class="language-text"><span class="pln">I1224 </span><span class="lit">18</span><span class="pun">:</span><span class="lit">11</span><span class="pun">:</span><span class="lit">55.254021</span><span class="pln"> </span><span class="lit">1433</span><span class="pln"> </span><span class="typ">TrainerInternal</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">165</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Batch</span><span class="pun">=</span><span class="lit">885</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">146134</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">2.11408</span><span class="pln"> </span><span class="typ">CurrentCost</span><span class="pun">=</span><span class="lit">1.88156</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> __sum_evaluator_0__</span><span class="pun">=</span><span class="lit">0.607299</span><span class="pln"> </span><span class="typ">CurrentEval</span><span class="pun">:</span><span class="pln"> __sum_evaluator_0__</span><span class="pun">=</span><span class="lit">0.494572</span></code></li><li class="L2"><code class="language-text"><span class="pln">I1224 </span><span class="lit">18</span><span class="pun">:</span><span class="lit">11</span><span class="pun">:</span><span class="lit">56.867604</span><span class="pln"> </span><span class="lit">1433</span><span class="pln"> </span><span class="typ">TrainerInternal</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">165</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Batch</span><span class="pun">=</span><span class="lit">890</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">146987</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">2.11277</span><span class="pln"> </span><span class="typ">CurrentCost</span><span class="pun">=</span><span class="lit">1.88839</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> __sum_evaluator_0__</span><span class="pun">=</span><span class="lit">0.607203</span><span class="pln"> </span><span class="typ">CurrentEval</span><span class="pun">:</span><span class="pln"> __sum_evaluator_0__</span><span class="pun">=</span><span class="lit">0.590856</span></code></li><li class="L3"><code class="language-text"><span class="pln">I1224 </span><span class="lit">18</span><span class="pun">:</span><span class="lit">11</span><span class="pun">:</span><span class="lit">58.424069</span><span class="pln"> </span><span class="lit">1433</span><span class="pln"> </span><span class="typ">TrainerInternal</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">165</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Batch</span><span class="pun">=</span><span class="lit">895</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">147793</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">2.11129</span><span class="pln"> </span><span class="typ">CurrentCost</span><span class="pun">=</span><span class="lit">1.84247</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> __sum_evaluator_0__</span><span class="pun">=</span><span class="lit">0.607099</span><span class="pln"> </span><span class="typ">CurrentEval</span><span class="pun">:</span><span class="pln"> __sum_evaluator_0__</span><span class="pun">=</span><span class="lit">0.588089</span></code></li><li class="L4"><code class="language-text"><span class="pln">I1224 </span><span class="lit">18</span><span class="pun">:</span><span class="lit">12</span><span class="pun">:</span><span class="lit">00.006893</span><span class="pln"> </span><span class="lit">1433</span><span class="pln"> </span><span class="typ">TrainerInternal</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">165</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Batch</span><span class="pun">=</span><span class="lit">900</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">148611</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">2.11148</span><span class="pln"> </span><span class="typ">CurrentCost</span><span class="pun">=</span><span class="lit">2.14526</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> __sum_evaluator_0__</span><span class="pun">=</span><span class="lit">0.607882</span><span class="pln"> </span><span class="typ">CurrentEval</span><span class="pun">:</span><span class="pln"> __sum_evaluator_0__</span><span class="pun">=</span><span class="lit">0.749389</span></code></li><li class="L5"><code class="language-text"><span class="pln">I1224 </span><span class="lit">18</span><span class="pun">:</span><span class="lit">12</span><span class="pun">:</span><span class="lit">00.164089</span><span class="pln"> </span><span class="lit">1433</span><span class="pln"> </span><span class="typ">TrainerInternal</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">181</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Pass</span><span class="pun">=</span><span class="lit">0</span><span class="pln"> </span><span class="typ">Batch</span><span class="pun">=</span><span class="lit">901</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">148647</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">2.11195</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> __sum_evaluator_0__</span><span class="pun">=</span><span class="lit">0.60793</span></code></li></ol></pre><p data-anchor-id="ieyd">经过150个 pass 后,得到平均 error 约为 0.0516055。</p><div class="md-section-divider"></div><h2 data-anchor-id="ve23" id="应用模型">应用模型</h2><p data-anchor-id="gb0e">训练好的<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-51-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 888.5 725.103370696049" style="width: 2.085ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-4E"></use></g></svg></span><script type="math/tex" id="MathJax-Element-51">N</script>个pass,会得到<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-52-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 888.5 725.103370696049" style="width: 2.085ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-4E"></use></g></svg></span><script type="math/tex" id="MathJax-Element-52">N</script>个模型,我们需要从中选择一个最优模型进行预测。通常做法是在开发集上进行调参,并基于我们关心的某个性能指标选择最优模型。本教程的<code>predict.sh</code>脚本简单地选择了测试集上标记错误最少的那个pass(这里是pass-00100)用于预测。</p><p data-anchor-id="y3vb">预测时,我们需要将配置中的 <code>crf_layer</code> 删掉,替换为 <code>crf_decoding_layer</code>,如下所示:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="siiw"><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">crf_dec_l </span><span class="pun">=</span><span class="pln"> crf_decoding_layer</span><span class="pun">(</span></code></li><li class="L1"><code class="language-python"><span class="pln"> name</span><span class="pun">=</span><span class="str">'crf_dec_l'</span><span class="pun">,</span></code></li><li class="L2"><code class="language-python"><span class="pln"> size</span><span class="pun">=</span><span class="pln">label_dict_len</span><span class="pun">,</span></code></li><li class="L3"><code class="language-python"><span class="pln"> input</span><span class="pun">=</span><span class="pln">feature_out</span><span class="pun">,</span></code></li><li class="L4"><code class="language-python"><span class="pln"> param_attr</span><span class="pun">=</span><span class="typ">ParameterAttribute</span><span class="pun">(</span><span class="pln">name</span><span class="pun">=</span><span class="str">'crfw'</span><span class="pun">))</span></code></li></ol></pre><p data-anchor-id="tr5o">运行<code>python predict.py</code>脚本,便可使用指定的模型进行预测。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="joiw"><ol class="linenums"><li class="L0"><code class="language-bash"><span class="pln">python predict</span><span class="pun">.</span><span class="pln">py</span></code></li><li class="L1"><code class="language-bash"><span class="pln"> </span><span class="pun">-</span><span class="pln">c db_lstm</span><span class="pun">.</span><span class="pln">py </span><span class="com"># 指定配置文件</span></code></li><li class="L2"><code class="language-bash"><span class="pln"> </span><span class="pun">-</span><span class="pln">w output</span><span class="pun">/</span><span class="pln">pass</span><span class="pun">-</span><span class="lit">00100</span><span class="pln"> </span><span class="com"># 指定预测使用的模型所在的路径</span></code></li><li class="L3"><code class="language-bash"><span class="pln"> </span><span class="pun">-</span><span class="pln">l data</span><span class="pun">/</span><span class="pln">targetDict</span><span class="pun">.</span><span class="pln">txt </span><span class="com"># 指定标记的字典</span></code></li><li class="L4"><code class="language-bash"><span class="pln"> </span><span class="pun">-</span><span class="pln">p data</span><span class="pun">/</span><span class="pln">verbDict</span><span class="pun">.</span><span class="pln">txt </span><span class="com"># 指定谓词的词典</span></code></li><li class="L5"><code class="language-bash"><span class="pln"> </span><span class="pun">-</span><span class="pln">d data</span><span class="pun">/</span><span class="pln">wordDict</span><span class="pun">.</span><span class="pln">txt </span><span class="com"># 指定输入文本序列的字典</span></code></li><li class="L6"><code class="language-bash"><span class="pln"> </span><span class="pun">-</span><span class="pln">i data</span><span class="pun">/</span><span class="pln">feature </span><span class="com"># 指定输入数据的路径</span></code></li><li class="L7"><code class="language-bash"><span class="pln"> </span><span class="pun">-</span><span class="pln">o predict</span><span class="pun">.</span><span class="pln">res </span><span class="com"># 指定标记结果输出到文件的路径</span></code></li></ol></pre><p data-anchor-id="wmwz">预测结束后,在 - o 参数所指定的标记结果文件中,我们会得到如下格式的输出:每行是一条样本,以 “\t” 分隔的 2 列,第一列是输入文本,第二列是标记的结果。通过BIO标记可以直接得到论元的语义角色标签。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="40k3"><ol class="linenums"><li class="L0"><code class="language-text"><span class="typ">The</span><span class="pln"> interest</span><span class="pun">-</span><span class="pln">only securities were priced at </span><span class="lit">35</span><span class="pln"> </span><span class="lit">1</span><span class="pln">\/</span><span class="lit">2</span><span class="pln"> to </span><span class="kwd">yield</span><span class="pln"> </span><span class="lit">10.72</span><span class="pln"> </span><span class="pun">%</span><span class="pln"> </span><span class="pun">.</span><span class="pln"> B</span><span class="pun">-</span><span class="pln">A0 I</span><span class="pun">-</span><span class="pln">A0 I</span><span class="pun">-</span><span class="pln">A0 O O O O O O B</span><span class="pun">-</span><span class="pln">V B</span><span class="pun">-</span><span class="pln">A1 I</span><span class="pun">-</span><span class="pln">A1 O</span></code></li></ol></pre><div class="md-section-divider"></div><h2 data-anchor-id="y1o3" id="总结">总结</h2><p data-anchor-id="1fp2">语义角色标注是许多自然语言理解任务的重要中间步骤。这篇教程中我们以语义角色标注任务为例,介绍如何利用PaddlePaddle进行序列标注任务。教程中所介绍的模型来自我们发表的论文[<a href="#参考文献">10</a>]。由于 CoNLL 2005 SRL任务的训练数据目前并非完全开放,教程中只使用测试数据作为示例。在这个过程中,我们希望减少对其它自然语言处理工具的依赖,利用神经网络数据驱动、端到端学习的能力,得到一个和传统方法可比、甚至更好的模型。在论文中我们证实了这种可能性。关于模型更多的信息和讨论可以在论文中找到。</p><div class="md-section-divider"></div><h2 data-anchor-id="8njh" id="参考文献">参考文献</h2><ol data-anchor-id="k346">
<li>Sun W, Sui Z, Wang M, et al. <a href="http://www.aclweb.org/anthology/D09-1#page=1513" target="_blank">Chinese semantic role labeling with shallow parsing</a>[C]//Proceedings of the 2009 Conference on Empirical Methods in Natural Language Processing: Volume 3-Volume 3. Association for Computational Linguistics, 2009: 1475-1483.</li>
<li>Pascanu R, Gulcehre C, Cho K, et al. <a href="https://arxiv.org/abs/1312.6026" target="_blank">How to construct deep recurrent neural networks</a>[J]. arXiv preprint arXiv:1312.6026, 2013.</li>
<li>Cho K, Van Merriënboer B, Gulcehre C, et al. <a href="https://arxiv.org/abs/1406.1078" target="_blank">Learning phrase representations using RNN encoder-decoder for statistical machine translation</a>[J]. arXiv preprint arXiv:1406.1078, 2014.</li>
<li>Bahdanau D, Cho K, Bengio Y. <a href="https://arxiv.org/abs/1409.0473" target="_blank">Neural machine translation by jointly learning to align and translate</a>[J]. arXiv preprint arXiv:1409.0473, 2014.</li>
<li>Lafferty J, McCallum A, Pereira F. <a href="http://www.jmlr.org/papers/volume15/doppa14a/source/biblio.bib.old" target="_blank">Conditional random fields: Probabilistic models for segmenting and labeling sequence data</a>[C]//Proceedings of the eighteenth international conference on machine learning, ICML. 2001, 1: 282-289.</li>
<li>李航. 统计学习方法[J]. 清华大学出版社, 北京, 2012.</li>
<li>Marcus M P, Marcinkiewicz M A, Santorini B. <a href="http://repository.upenn.edu/cgi/viewcontent.cgi?article=1246&amp;context=cis_reports" target="_blank">Building a large annotated corpus of English: The Penn Treebank</a>[J]. Computational linguistics, 1993, 19(2): 313-330.</li>
<li>Palmer M, Gildea D, Kingsbury P. <a href="http://www.mitpressjournals.org/doi/pdfplus/10.1162/0891201053630264" target="_blank">The proposition bank: An annotated corpus of semantic roles</a>[J]. Computational linguistics, 2005, 31(1): 71-106.</li>
<li>Carreras X, Màrquez L. <a href="http://www.cs.upc.edu/~srlconll/st05/papers/intro.pdf" target="_blank">Introduction to the CoNLL-2005 shared task: Semantic role labeling</a>[C]//Proceedings of the Ninth Conference on Computational Natural Language Learning. Association for Computational Linguistics, 2005: 152-164.</li>
<li>Zhou J, Xu W. <a href="http://www.aclweb.org/anthology/P/P15/P15-1109.pdf" target="_blank">End-to-end learning of semantic role labeling using recurrent neural networks</a>[C]//Proceedings of the Annual Meeting of the Association for Computational Linguistics. 2015.</li>
</ol><p data-anchor-id="96b7"><br> <br>
<img src="https://i.creativecommons.org/l/by-nc-sa/4.0/88x31.png" alt="知识共享许可协议"></p><p data-anchor-id="s4l3">本教程由<a href="http://book.paddlepaddle.org" target="_blank">PaddlePaddle</a>创作,采用<a href="http://creativecommons.org/licenses/by-nc-sa/4.0/" target="_blank">知识共享 署名-非商业性使用-相同方式共享 4.0 国际 许可协议</a>进行许可。</p></div>
</body>
</html>
......@@ -531,6 +531,7 @@ marked.setOptions({
code = code.replace(/&amp;/g, "&")
code = code.replace(/&gt;/g, ">")
code = code.replace(/&lt;/g, "<")
code = code.replace(/&nbsp;/g, " ")
return hljs.highlightAuto(code, [lang]).value;
}
});
......
因为 它太大了无法显示 source diff 。你可以改为 查看blob
......@@ -740,6 +740,7 @@ marked.setOptions({
code = code.replace(/&amp;/g, "&")
code = code.replace(/&gt;/g, ">")
code = code.replace(/&lt;/g, "<")
code = code.replace(/&nbsp;/g, " ")
return hljs.highlightAuto(code, [lang]).value;
}
});
......
......@@ -52,6 +52,7 @@ marked.setOptions({
code = code.replace(/&amp;/g, "&")
code = code.replace(/&gt;/g, ">")
code = code.replace(/&lt;/g, "<")
code = code.replace(/&nbsp;/g, " ")
return hljs.highlightAuto(code, [lang]).value;
}
});
......
<!DOCTYPE html>
<html class="theme theme-white">
<head>
<meta charset="utf-8">
<title>识别数字</title>
<link href="https://www.zybuluo.com/static/assets/template-theme-white.css" rel="stylesheet" media="screen">
<style type="text/css">
#wmd-preview h1 {
color: #0077bb; /* 将标题改为蓝色 */
}</style>
</head>
<body class="theme theme-white">
<div style="visibility: hidden; overflow: hidden; position: absolute; top: 0px; height: 1px; width: auto; padding: 0px; border: 0px; margin: 0px; text-align: left; text-indent: 0px; text-transform: none; line-height: normal; letter-spacing: normal; word-spacing: normal;"><div id="MathJax_SVG_Hidden"></div><svg><defs id="MathJax_SVG_glyphs"><path id="MJMATHI-58" stroke-width="1" d="M42 0H40Q26 0 26 11Q26 15 29 27Q33 41 36 43T55 46Q141 49 190 98Q200 108 306 224T411 342Q302 620 297 625Q288 636 234 637H206Q200 643 200 645T202 664Q206 677 212 683H226Q260 681 347 681Q380 681 408 681T453 682T473 682Q490 682 490 671Q490 670 488 658Q484 643 481 640T465 637Q434 634 411 620L488 426L541 485Q646 598 646 610Q646 628 622 635Q617 635 609 637Q594 637 594 648Q594 650 596 664Q600 677 606 683H618Q619 683 643 683T697 681T738 680Q828 680 837 683H845Q852 676 852 672Q850 647 840 637H824Q790 636 763 628T722 611T698 593L687 584Q687 585 592 480L505 384Q505 383 536 304T601 142T638 56Q648 47 699 46Q734 46 734 37Q734 35 732 23Q728 7 725 4T711 1Q708 1 678 1T589 2Q528 2 496 2T461 1Q444 1 444 10Q444 11 446 25Q448 35 450 39T455 44T464 46T480 47T506 54Q523 62 523 64Q522 64 476 181L429 299Q241 95 236 84Q232 76 232 72Q232 53 261 47Q262 47 267 47T273 46Q276 46 277 46T280 45T283 42T284 35Q284 26 282 19Q279 6 276 4T261 1Q258 1 243 1T201 2T142 2Q64 2 42 0Z"></path><path id="MJMAIN-32" stroke-width="1" d="M109 429Q82 429 66 447T50 491Q50 562 103 614T235 666Q326 666 387 610T449 465Q449 422 429 383T381 315T301 241Q265 210 201 149L142 93L218 92Q375 92 385 97Q392 99 409 186V189H449V186Q448 183 436 95T421 3V0H50V19V31Q50 38 56 46T86 81Q115 113 136 137Q145 147 170 174T204 211T233 244T261 278T284 308T305 340T320 369T333 401T340 431T343 464Q343 527 309 573T212 619Q179 619 154 602T119 569T109 550Q109 549 114 549Q132 549 151 535T170 489Q170 464 154 447T109 429Z"></path><path id="MJMAIN-38" stroke-width="1" d="M70 417T70 494T124 618T248 666Q319 666 374 624T429 515Q429 485 418 459T392 417T361 389T335 371T324 363L338 354Q352 344 366 334T382 323Q457 264 457 174Q457 95 399 37T249 -22Q159 -22 101 29T43 155Q43 263 172 335L154 348Q133 361 127 368Q70 417 70 494ZM286 386L292 390Q298 394 301 396T311 403T323 413T334 425T345 438T355 454T364 471T369 491T371 513Q371 556 342 586T275 624Q268 625 242 625Q201 625 165 599T128 534Q128 511 141 492T167 463T217 431Q224 426 228 424L286 386ZM250 21Q308 21 350 55T392 137Q392 154 387 169T375 194T353 216T330 234T301 253T274 270Q260 279 244 289T218 306L210 311Q204 311 181 294T133 239T107 157Q107 98 150 60T250 21Z"></path><path id="MJMAIN-D7" stroke-width="1" d="M630 29Q630 9 609 9Q604 9 587 25T493 118L389 222L284 117Q178 13 175 11Q171 9 168 9Q160 9 154 15T147 29Q147 36 161 51T255 146L359 250L255 354Q174 435 161 449T147 471Q147 480 153 485T168 490Q173 490 175 489Q178 487 284 383L389 278L493 382Q570 459 587 475T609 491Q630 491 630 471Q630 464 620 453T522 355L418 250L522 145Q606 61 618 48T630 29Z"></path><path id="MJMAIN-37" stroke-width="1" d="M55 458Q56 460 72 567L88 674Q88 676 108 676H128V672Q128 662 143 655T195 646T364 644H485V605L417 512Q408 500 387 472T360 435T339 403T319 367T305 330T292 284T284 230T278 162T275 80Q275 66 275 52T274 28V19Q270 2 255 -10T221 -22Q210 -22 200 -19T179 0T168 40Q168 198 265 368Q285 400 349 489L395 552H302Q128 552 119 546Q113 543 108 522T98 479L95 458V455H55V458Z"></path><path id="MJMAIN-34" stroke-width="1" d="M462 0Q444 3 333 3Q217 3 199 0H190V46H221Q241 46 248 46T265 48T279 53T286 61Q287 63 287 115V165H28V211L179 442Q332 674 334 675Q336 677 355 677H373L379 671V211H471V165H379V114Q379 73 379 66T385 54Q393 47 442 46H471V0H462ZM293 211V545L74 212L183 211H293Z"></path><path id="MJMAIN-3D" stroke-width="1" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path id="MJMAIN-28" stroke-width="1" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path id="MJMATHI-78" stroke-width="1" d="M52 289Q59 331 106 386T222 442Q257 442 286 424T329 379Q371 442 430 442Q467 442 494 420T522 361Q522 332 508 314T481 292T458 288Q439 288 427 299T415 328Q415 374 465 391Q454 404 425 404Q412 404 406 402Q368 386 350 336Q290 115 290 78Q290 50 306 38T341 26Q378 26 414 59T463 140Q466 150 469 151T485 153H489Q504 153 504 145Q504 144 502 134Q486 77 440 33T333 -11Q263 -11 227 52Q186 -10 133 -10H127Q78 -10 57 16T35 71Q35 103 54 123T99 143Q142 143 142 101Q142 81 130 66T107 46T94 41L91 40Q91 39 97 36T113 29T132 26Q168 26 194 71Q203 87 217 139T245 247T261 313Q266 340 266 352Q266 380 251 392T217 404Q177 404 142 372T93 290Q91 281 88 280T72 278H58Q52 284 52 289Z"></path><path id="MJMAIN-30" stroke-width="1" d="M96 585Q152 666 249 666Q297 666 345 640T423 548Q460 465 460 320Q460 165 417 83Q397 41 362 16T301 -15T250 -22Q224 -22 198 -16T137 16T82 83Q39 165 39 320Q39 494 96 585ZM321 597Q291 629 250 629Q208 629 178 597Q153 571 145 525T137 333Q137 175 145 125T181 46Q209 16 250 16Q290 16 318 46Q347 76 354 130T362 333Q362 478 354 524T321 597Z"></path><path id="MJMAIN-2C" stroke-width="1" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path><path id="MJMAIN-31" stroke-width="1" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path><path id="MJMAIN-2026" stroke-width="1" d="M78 60Q78 84 95 102T138 120Q162 120 180 104T199 61Q199 36 182 18T139 0T96 17T78 60ZM525 60Q525 84 542 102T585 120Q609 120 627 104T646 61Q646 36 629 18T586 0T543 17T525 60ZM972 60Q972 84 989 102T1032 120Q1056 120 1074 104T1093 61Q1093 36 1076 18T1033 0T990 17T972 60Z"></path><path id="MJMAIN-33" stroke-width="1" d="M127 463Q100 463 85 480T69 524Q69 579 117 622T233 665Q268 665 277 664Q351 652 390 611T430 522Q430 470 396 421T302 350L299 348Q299 347 308 345T337 336T375 315Q457 262 457 175Q457 96 395 37T238 -22Q158 -22 100 21T42 130Q42 158 60 175T105 193Q133 193 151 175T169 130Q169 119 166 110T159 94T148 82T136 74T126 70T118 67L114 66Q165 21 238 21Q293 21 321 74Q338 107 338 175V195Q338 290 274 322Q259 328 213 329L171 330L168 332Q166 335 166 348Q166 366 174 366Q202 366 232 371Q266 376 294 413T322 525V533Q322 590 287 612Q265 626 240 626Q208 626 181 615T143 592T132 580H135Q138 579 143 578T153 573T165 566T175 555T183 540T186 520Q186 498 172 481T127 463Z"></path><path id="MJMAIN-29" stroke-width="1" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path><path id="MJMATHI-59" stroke-width="1" d="M66 637Q54 637 49 637T39 638T32 641T30 647T33 664T42 682Q44 683 56 683Q104 680 165 680Q288 680 306 683H316Q322 677 322 674T320 656Q316 643 310 637H298Q242 637 242 624Q242 619 292 477T343 333L346 336Q350 340 358 349T379 373T411 410T454 461Q546 568 561 587T577 618Q577 634 545 637Q528 637 528 647Q528 649 530 661Q533 676 535 679T549 683Q551 683 578 682T657 680Q684 680 713 681T746 682Q763 682 763 673Q763 669 760 657T755 643Q753 637 734 637Q662 632 617 587Q608 578 477 424L348 273L322 169Q295 62 295 57Q295 46 363 46Q379 46 384 45T390 35Q390 33 388 23Q384 6 382 4T366 1Q361 1 324 1T232 2Q170 2 138 2T102 1Q84 1 84 9Q84 14 87 24Q88 27 89 30T90 35T91 39T93 42T96 44T101 45T107 45T116 46T129 46Q168 47 180 50T198 63Q201 68 227 171L252 274L129 623Q128 624 127 625T125 627T122 629T118 631T113 633T105 634T96 635T83 636T66 637Z"></path><path id="MJMATHI-79" stroke-width="1" d="M21 287Q21 301 36 335T84 406T158 442Q199 442 224 419T250 355Q248 336 247 334Q247 331 231 288T198 191T182 105Q182 62 196 45T238 27Q261 27 281 38T312 61T339 94Q339 95 344 114T358 173T377 247Q415 397 419 404Q432 431 462 431Q475 431 483 424T494 412T496 403Q496 390 447 193T391 -23Q363 -106 294 -155T156 -205Q111 -205 77 -183T43 -117Q43 -95 50 -80T69 -58T89 -48T106 -45Q150 -45 150 -87Q150 -107 138 -122T115 -142T102 -147L99 -148Q101 -153 118 -160T152 -167H160Q177 -167 186 -165Q219 -156 247 -127T290 -65T313 -9T321 21L315 17Q309 13 296 6T270 -6Q250 -11 231 -11Q185 -11 150 11T104 82Q103 89 103 113Q103 170 138 262T173 379Q173 380 173 381Q173 390 173 393T169 400T158 404H154Q131 404 112 385T82 344T65 302T57 280Q55 278 41 278H27Q21 284 21 287Z"></path><path id="MJMAIN-39" stroke-width="1" d="M352 287Q304 211 232 211Q154 211 104 270T44 396Q42 412 42 436V444Q42 537 111 606Q171 666 243 666Q245 666 249 666T257 665H261Q273 665 286 663T323 651T370 619T413 560Q456 472 456 334Q456 194 396 97Q361 41 312 10T208 -22Q147 -22 108 7T68 93T121 149Q143 149 158 135T173 96Q173 78 164 65T148 49T135 44L131 43Q131 41 138 37T164 27T206 22H212Q272 22 313 86Q352 142 352 280V287ZM244 248Q292 248 321 297T351 430Q351 508 343 542Q341 552 337 562T323 588T293 615T246 625Q208 625 181 598Q160 576 154 546T147 441Q147 358 152 329T172 282Q197 248 244 248Z"></path><path id="MJMATHI-69" stroke-width="1" d="M184 600Q184 624 203 642T247 661Q265 661 277 649T290 619Q290 596 270 577T226 557Q211 557 198 567T184 600ZM21 287Q21 295 30 318T54 369T98 420T158 442Q197 442 223 419T250 357Q250 340 236 301T196 196T154 83Q149 61 149 51Q149 26 166 26Q175 26 185 29T208 43T235 78T260 137Q263 149 265 151T282 153Q302 153 302 143Q302 135 293 112T268 61T223 11T161 -11Q129 -11 102 10T74 74Q74 91 79 106T122 220Q160 321 166 341T173 380Q173 404 156 404H154Q124 404 99 371T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Z"></path><path id="MJMATHI-4C" stroke-width="1" d="M228 637Q194 637 192 641Q191 643 191 649Q191 673 202 682Q204 683 217 683Q271 680 344 680Q485 680 506 683H518Q524 677 524 674T522 656Q517 641 513 637H475Q406 636 394 628Q387 624 380 600T313 336Q297 271 279 198T252 88L243 52Q243 48 252 48T311 46H328Q360 46 379 47T428 54T478 72T522 106T564 161Q580 191 594 228T611 270Q616 273 628 273H641Q647 264 647 262T627 203T583 83T557 9Q555 4 553 3T537 0T494 -1Q483 -1 418 -1T294 0H116Q32 0 32 10Q32 17 34 24Q39 43 44 45Q48 46 59 46H65Q92 46 125 49Q139 52 144 61Q147 65 216 339T285 628Q285 635 228 637Z"></path><path id="MJMATHI-6C" stroke-width="1" d="M117 59Q117 26 142 26Q179 26 205 131Q211 151 215 152Q217 153 225 153H229Q238 153 241 153T246 151T248 144Q247 138 245 128T234 90T214 43T183 6T137 -11Q101 -11 70 11T38 85Q38 97 39 102L104 360Q167 615 167 623Q167 626 166 628T162 632T157 634T149 635T141 636T132 637T122 637Q112 637 109 637T101 638T95 641T94 647Q94 649 96 661Q101 680 107 682T179 688Q194 689 213 690T243 693T254 694Q266 694 266 686Q266 675 193 386T118 83Q118 81 118 75T117 65V59Z"></path><path id="MJMATHI-57" stroke-width="1" d="M436 683Q450 683 486 682T553 680Q604 680 638 681T677 682Q695 682 695 674Q695 670 692 659Q687 641 683 639T661 637Q636 636 621 632T600 624T597 615Q597 603 613 377T629 138L631 141Q633 144 637 151T649 170T666 200T690 241T720 295T759 362Q863 546 877 572T892 604Q892 619 873 628T831 637Q817 637 817 647Q817 650 819 660Q823 676 825 679T839 682Q842 682 856 682T895 682T949 681Q1015 681 1034 683Q1048 683 1048 672Q1048 666 1045 655T1038 640T1028 637Q1006 637 988 631T958 617T939 600T927 584L923 578L754 282Q586 -14 585 -15Q579 -22 561 -22Q546 -22 542 -17Q539 -14 523 229T506 480L494 462Q472 425 366 239Q222 -13 220 -15T215 -19Q210 -22 197 -22Q178 -22 176 -15Q176 -12 154 304T131 622Q129 631 121 633T82 637H58Q51 644 51 648Q52 671 64 683H76Q118 680 176 680Q301 680 313 683H323Q329 677 329 674T327 656Q322 641 318 637H297Q236 634 232 620Q262 160 266 136L501 550L499 587Q496 629 489 632Q483 636 447 637Q428 637 422 639T416 648Q416 650 418 660Q419 664 420 669T421 676T424 680T428 682T436 683Z"></path><path id="MJMATHI-62" stroke-width="1" d="M73 647Q73 657 77 670T89 683Q90 683 161 688T234 694Q246 694 246 685T212 542Q204 508 195 472T180 418L176 399Q176 396 182 402Q231 442 283 442Q345 442 383 396T422 280Q422 169 343 79T173 -11Q123 -11 82 27T40 150V159Q40 180 48 217T97 414Q147 611 147 623T109 637Q104 637 101 637H96Q86 637 83 637T76 640T73 647ZM336 325V331Q336 405 275 405Q258 405 240 397T207 376T181 352T163 330L157 322L136 236Q114 150 114 114Q114 66 138 42Q154 26 178 26Q211 26 245 58Q270 81 285 114T318 219Q336 291 336 325Z"></path><path id="MJMATHI-73" stroke-width="1" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"></path><path id="MJMATHI-6F" stroke-width="1" d="M201 -11Q126 -11 80 38T34 156Q34 221 64 279T146 380Q222 441 301 441Q333 441 341 440Q354 437 367 433T402 417T438 387T464 338T476 268Q476 161 390 75T201 -11ZM121 120Q121 70 147 48T206 26Q250 26 289 58T351 142Q360 163 374 216T388 308Q388 352 370 375Q346 405 306 405Q243 405 195 347Q158 303 140 230T121 120Z"></path><path id="MJMATHI-66" stroke-width="1" d="M118 -162Q120 -162 124 -164T135 -167T147 -168Q160 -168 171 -155T187 -126Q197 -99 221 27T267 267T289 382V385H242Q195 385 192 387Q188 390 188 397L195 425Q197 430 203 430T250 431Q298 431 298 432Q298 434 307 482T319 540Q356 705 465 705Q502 703 526 683T550 630Q550 594 529 578T487 561Q443 561 443 603Q443 622 454 636T478 657L487 662Q471 668 457 668Q445 668 434 658T419 630Q412 601 403 552T387 469T380 433Q380 431 435 431Q480 431 487 430T498 424Q499 420 496 407T491 391Q489 386 482 386T428 385H372L349 263Q301 15 282 -47Q255 -132 212 -173Q175 -205 139 -205Q107 -205 81 -186T55 -132Q55 -95 76 -78T118 -61Q162 -61 162 -103Q162 -122 151 -136T127 -157L118 -162Z"></path><path id="MJMATHI-74" stroke-width="1" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path id="MJMATHI-6D" stroke-width="1" d="M21 287Q22 293 24 303T36 341T56 388T88 425T132 442T175 435T205 417T221 395T229 376L231 369Q231 367 232 367L243 378Q303 442 384 442Q401 442 415 440T441 433T460 423T475 411T485 398T493 385T497 373T500 364T502 357L510 367Q573 442 659 442Q713 442 746 415T780 336Q780 285 742 178T704 50Q705 36 709 31T724 26Q752 26 776 56T815 138Q818 149 821 151T837 153Q857 153 857 145Q857 144 853 130Q845 101 831 73T785 17T716 -10Q669 -10 648 17T627 73Q627 92 663 193T700 345Q700 404 656 404H651Q565 404 506 303L499 291L466 157Q433 26 428 16Q415 -11 385 -11Q372 -11 364 -4T353 8T350 18Q350 29 384 161L420 307Q423 322 423 345Q423 404 379 404H374Q288 404 229 303L222 291L189 157Q156 26 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 112 181Q151 335 151 342Q154 357 154 369Q154 405 129 405Q107 405 92 377T69 316T57 280Q55 278 41 278H27Q21 284 21 287Z"></path><path id="MJMATHI-61" stroke-width="1" d="M33 157Q33 258 109 349T280 441Q331 441 370 392Q386 422 416 422Q429 422 439 414T449 394Q449 381 412 234T374 68Q374 43 381 35T402 26Q411 27 422 35Q443 55 463 131Q469 151 473 152Q475 153 483 153H487Q506 153 506 144Q506 138 501 117T481 63T449 13Q436 0 417 -8Q409 -10 393 -10Q359 -10 336 5T306 36L300 51Q299 52 296 50Q294 48 292 46Q233 -10 172 -10Q117 -10 75 30T33 157ZM351 328Q351 334 346 350T323 385T277 405Q242 405 210 374T160 293Q131 214 119 129Q119 126 119 118T118 106Q118 61 136 44T179 26Q217 26 254 59T298 110Q300 114 325 217T351 328Z"></path><path id="MJSZ2-2211" stroke-width="1" d="M60 948Q63 950 665 950H1267L1325 815Q1384 677 1388 669H1348L1341 683Q1320 724 1285 761Q1235 809 1174 838T1033 881T882 898T699 902H574H543H251L259 891Q722 258 724 252Q725 250 724 246Q721 243 460 -56L196 -356Q196 -357 407 -357Q459 -357 548 -357T676 -358Q812 -358 896 -353T1063 -332T1204 -283T1307 -196Q1328 -170 1348 -124H1388Q1388 -125 1381 -145T1356 -210T1325 -294L1267 -449L666 -450Q64 -450 61 -448Q55 -446 55 -439Q55 -437 57 -433L590 177Q590 178 557 222T452 366T322 544L56 909L55 924Q55 945 60 948Z"></path><path id="MJMATHI-6A" stroke-width="1" d="M297 596Q297 627 318 644T361 661Q378 661 389 651T403 623Q403 595 384 576T340 557Q322 557 310 567T297 596ZM288 376Q288 405 262 405Q240 405 220 393T185 362T161 325T144 293L137 279Q135 278 121 278H107Q101 284 101 286T105 299Q126 348 164 391T252 441Q253 441 260 441T272 442Q296 441 316 432Q341 418 354 401T367 348V332L318 133Q267 -67 264 -75Q246 -125 194 -164T75 -204Q25 -204 7 -183T-12 -137Q-12 -110 7 -91T53 -71Q70 -71 82 -81T95 -112Q95 -148 63 -167Q69 -168 77 -168Q111 -168 139 -140T182 -74L193 -32Q204 11 219 72T251 197T278 308T289 365Q289 372 288 376Z"></path><path id="MJMAIN-2B" stroke-width="1" d="M56 237T56 250T70 270H369V420L370 570Q380 583 389 583Q402 583 409 568V270H707Q722 262 722 250T707 230H409V-68Q401 -82 391 -82H389H387Q375 -82 369 -68V230H70Q56 237 56 250Z"></path><path id="MJMATHI-65" stroke-width="1" d="M39 168Q39 225 58 272T107 350T174 402T244 433T307 442H310Q355 442 388 420T421 355Q421 265 310 237Q261 224 176 223Q139 223 138 221Q138 219 132 186T125 128Q125 81 146 54T209 26T302 45T394 111Q403 121 406 121Q410 121 419 112T429 98T420 82T390 55T344 24T281 -1T205 -11Q126 -11 83 42T39 168ZM373 353Q367 405 305 405Q272 405 244 391T199 357T170 316T154 280T149 261Q149 260 169 260Q282 260 327 284T373 353Z"></path><path id="MJSZ1-2211" stroke-width="1" d="M61 748Q64 750 489 750H913L954 640Q965 609 976 579T993 533T999 516H979L959 517Q936 579 886 621T777 682Q724 700 655 705T436 710H319Q183 710 183 709Q186 706 348 484T511 259Q517 250 513 244L490 216Q466 188 420 134T330 27L149 -187Q149 -188 362 -188Q388 -188 436 -188T506 -189Q679 -189 778 -162T936 -43Q946 -27 959 6H999L913 -249L489 -250Q65 -250 62 -248Q56 -246 56 -239Q56 -234 118 -161Q186 -81 245 -11L428 206Q428 207 242 462L57 717L56 728Q56 744 61 748Z"></path><path id="MJMATHI-4E" stroke-width="1" d="M234 637Q231 637 226 637Q201 637 196 638T191 649Q191 676 202 682Q204 683 299 683Q376 683 387 683T401 677Q612 181 616 168L670 381Q723 592 723 606Q723 633 659 637Q635 637 635 648Q635 650 637 660Q641 676 643 679T653 683Q656 683 684 682T767 680Q817 680 843 681T873 682Q888 682 888 672Q888 650 880 642Q878 637 858 637Q787 633 769 597L620 7Q618 0 599 0Q585 0 582 2Q579 5 453 305L326 604L261 344Q196 88 196 79Q201 46 268 46H278Q284 41 284 38T282 19Q278 6 272 0H259Q228 2 151 2Q123 2 100 2T63 2T46 1Q31 1 31 10Q31 14 34 26T39 40Q41 46 62 46Q130 49 150 85Q154 91 221 362L289 634Q287 635 234 637Z"></path><path id="MJMATHI-63" stroke-width="1" d="M34 159Q34 268 120 355T306 442Q362 442 394 418T427 355Q427 326 408 306T360 285Q341 285 330 295T319 325T330 359T352 380T366 386H367Q367 388 361 392T340 400T306 404Q276 404 249 390Q228 381 206 359Q162 315 142 235T121 119Q121 73 147 50Q169 26 205 26H209Q321 26 394 111Q403 121 406 121Q410 121 419 112T429 98T420 83T391 55T346 25T282 0T202 -11Q127 -11 81 37T34 159Z"></path><path id="MJMATHI-72" stroke-width="1" d="M21 287Q22 290 23 295T28 317T38 348T53 381T73 411T99 433T132 442Q161 442 183 430T214 408T225 388Q227 382 228 382T236 389Q284 441 347 441H350Q398 441 422 400Q430 381 430 363Q430 333 417 315T391 292T366 288Q346 288 334 299T322 328Q322 376 378 392Q356 405 342 405Q286 405 239 331Q229 315 224 298T190 165Q156 25 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 114 189T154 366Q154 405 128 405Q107 405 92 377T68 316T57 280Q55 278 41 278H27Q21 284 21 287Z"></path><path id="MJMATHI-6E" stroke-width="1" d="M21 287Q22 293 24 303T36 341T56 388T89 425T135 442Q171 442 195 424T225 390T231 369Q231 367 232 367L243 378Q304 442 382 442Q436 442 469 415T503 336T465 179T427 52Q427 26 444 26Q450 26 453 27Q482 32 505 65T540 145Q542 153 560 153Q580 153 580 145Q580 144 576 130Q568 101 554 73T508 17T439 -10Q392 -10 371 17T350 73Q350 92 386 193T423 345Q423 404 379 404H374Q288 404 229 303L222 291L189 157Q156 26 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 112 180T152 343Q153 348 153 366Q153 405 129 405Q91 405 66 305Q60 285 60 284Q58 278 41 278H27Q21 284 21 287Z"></path><path id="MJMATHI-70" stroke-width="1" d="M23 287Q24 290 25 295T30 317T40 348T55 381T75 411T101 433T134 442Q209 442 230 378L240 387Q302 442 358 442Q423 442 460 395T497 281Q497 173 421 82T249 -10Q227 -10 210 -4Q199 1 187 11T168 28L161 36Q160 35 139 -51T118 -138Q118 -144 126 -145T163 -148H188Q194 -155 194 -157T191 -175Q188 -187 185 -190T172 -194Q170 -194 161 -194T127 -193T65 -192Q-5 -192 -24 -194H-32Q-39 -187 -39 -183Q-37 -156 -26 -148H-6Q28 -147 33 -136Q36 -130 94 103T155 350Q156 355 156 364Q156 405 131 405Q109 405 94 377T71 316T59 280Q57 278 43 278H29Q23 284 23 287ZM178 102Q200 26 252 26Q282 26 310 49T356 107Q374 141 392 215T411 325V331Q411 405 350 405Q339 405 328 402T306 393T286 380T269 365T254 350T243 336T235 326L232 322Q232 321 229 308T218 264T204 212Q178 106 178 102Z"></path><path id="MJMAIN-2212" stroke-width="1" d="M84 237T84 250T98 270H679Q694 262 694 250T679 230H98Q84 237 84 250Z"></path><path id="MJMATHI-67" stroke-width="1" d="M311 43Q296 30 267 15T206 0Q143 0 105 45T66 160Q66 265 143 353T314 442Q361 442 401 394L404 398Q406 401 409 404T418 412T431 419T447 422Q461 422 470 413T480 394Q480 379 423 152T363 -80Q345 -134 286 -169T151 -205Q10 -205 10 -137Q10 -111 28 -91T74 -71Q89 -71 102 -80T116 -111Q116 -121 114 -130T107 -144T99 -154T92 -162L90 -164H91Q101 -167 151 -167Q189 -167 211 -155Q234 -144 254 -122T282 -75Q288 -56 298 -13Q311 35 311 43ZM384 328L380 339Q377 350 375 354T369 368T359 382T346 393T328 402T306 405Q262 405 221 352Q191 313 171 233T151 117Q151 38 213 38Q269 38 323 108L331 118L384 328Z"></path><path id="MJMATHI-48" stroke-width="1" d="M228 637Q194 637 192 641Q191 643 191 649Q191 673 202 682Q204 683 219 683Q260 681 355 681Q389 681 418 681T463 682T483 682Q499 682 499 672Q499 670 497 658Q492 641 487 638H485Q483 638 480 638T473 638T464 637T455 637Q416 636 405 634T387 623Q384 619 355 500Q348 474 340 442T328 395L324 380Q324 378 469 378H614L615 381Q615 384 646 504Q674 619 674 627T617 637Q594 637 587 639T580 648Q580 650 582 660Q586 677 588 679T604 682Q609 682 646 681T740 680Q802 680 835 681T871 682Q888 682 888 672Q888 645 876 638H874Q872 638 869 638T862 638T853 637T844 637Q805 636 794 634T776 623Q773 618 704 340T634 58Q634 51 638 51Q646 48 692 46H723Q729 38 729 37T726 19Q722 6 716 0H701Q664 2 567 2Q533 2 504 2T458 2T437 1Q420 1 420 10Q420 15 423 24Q428 43 433 45Q437 46 448 46H454Q481 46 514 49Q520 50 522 50T528 55T534 64T540 82T547 110T558 153Q565 181 569 198Q602 330 602 331T457 332H312L279 197Q245 63 245 58Q245 51 253 49T303 46H334Q340 38 340 37T337 19Q333 6 327 0H312Q275 2 178 2Q144 2 115 2T69 2T48 1Q31 1 31 10Q31 12 34 24Q39 43 44 45Q48 46 59 46H65Q92 46 125 49Q139 52 144 61Q147 65 216 339T285 628Q285 635 228 637Z"></path><path id="MJMATHI-3D5" stroke-width="1" d="M409 688Q413 694 421 694H429H442Q448 688 448 686Q448 679 418 563Q411 535 404 504T392 458L388 442Q388 441 397 441T429 435T477 418Q521 397 550 357T579 260T548 151T471 65T374 11T279 -10H275L251 -105Q245 -128 238 -160Q230 -192 227 -198T215 -205H209Q189 -205 189 -198Q189 -193 211 -103L234 -11Q234 -10 226 -10Q221 -10 206 -8T161 6T107 36T62 89T43 171Q43 231 76 284T157 370T254 422T342 441Q347 441 348 445L378 567Q409 686 409 688ZM122 150Q122 116 134 91T167 53T203 35T237 27H244L337 404Q333 404 326 403T297 395T255 379T211 350T170 304Q152 276 137 237Q122 191 122 150ZM500 282Q500 320 484 347T444 385T405 400T381 404H378L332 217L284 29Q284 27 285 27Q293 27 317 33T357 47Q400 66 431 100T475 170T494 234T500 282Z"></path><path id="MJMAIN-35" stroke-width="1" d="M164 157Q164 133 148 117T109 101H102Q148 22 224 22Q294 22 326 82Q345 115 345 210Q345 313 318 349Q292 382 260 382H254Q176 382 136 314Q132 307 129 306T114 304Q97 304 95 310Q93 314 93 485V614Q93 664 98 664Q100 666 102 666Q103 666 123 658T178 642T253 634Q324 634 389 662Q397 666 402 666Q410 666 410 648V635Q328 538 205 538Q174 538 149 544L139 546V374Q158 388 169 396T205 412T256 420Q337 420 393 355T449 201Q449 109 385 44T229 -22Q148 -22 99 32T50 154Q50 178 61 192T84 210T107 214Q132 214 148 197T164 157Z"></path><path id="MJMATHI-44" stroke-width="1" d="M287 628Q287 635 230 637Q207 637 200 638T193 647Q193 655 197 667T204 682Q206 683 403 683Q570 682 590 682T630 676Q702 659 752 597T803 431Q803 275 696 151T444 3L430 1L236 0H125H72Q48 0 41 2T33 11Q33 13 36 25Q40 41 44 43T67 46Q94 46 127 49Q141 52 146 61Q149 65 218 339T287 628ZM703 469Q703 507 692 537T666 584T629 613T590 629T555 636Q553 636 541 636T512 636T479 637H436Q392 637 386 627Q384 623 313 339T242 52Q242 48 253 48T330 47Q335 47 349 47T373 46Q499 46 581 128Q617 164 640 212T683 339T703 469Z"></path><path id="MJMATHI-4B" stroke-width="1" d="M285 628Q285 635 228 637Q205 637 198 638T191 647Q191 649 193 661Q199 681 203 682Q205 683 214 683H219Q260 681 355 681Q389 681 418 681T463 682T483 682Q500 682 500 674Q500 669 497 660Q496 658 496 654T495 648T493 644T490 641T486 639T479 638T470 637T456 637Q416 636 405 634T387 623L306 305Q307 305 490 449T678 597Q692 611 692 620Q692 635 667 637Q651 637 651 648Q651 650 654 662T659 677Q662 682 676 682Q680 682 711 681T791 680Q814 680 839 681T869 682Q889 682 889 672Q889 650 881 642Q878 637 862 637Q787 632 726 586Q710 576 656 534T556 455L509 418L518 396Q527 374 546 329T581 244Q656 67 661 61Q663 59 666 57Q680 47 717 46H738Q744 38 744 37T741 19Q737 6 731 0H720Q680 3 625 3Q503 3 488 0H478Q472 6 472 9T474 27Q478 40 480 43T491 46H494Q544 46 544 71Q544 75 517 141T485 216L427 354L359 301L291 248L268 155Q245 63 245 58Q245 51 253 49T303 46H334Q340 37 340 35Q340 19 333 5Q328 0 317 0Q314 0 280 1T180 2Q118 2 85 2T49 1Q31 1 31 11Q31 13 34 25Q38 41 42 43T65 46Q92 46 125 49Q139 52 144 61Q147 65 216 339T285 628Z"></path><path id="MJMATHI-46" stroke-width="1" d="M48 1Q31 1 31 11Q31 13 34 25Q38 41 42 43T65 46Q92 46 125 49Q139 52 144 61Q146 66 215 342T285 622Q285 629 281 629Q273 632 228 634H197Q191 640 191 642T193 659Q197 676 203 680H742Q749 676 749 669Q749 664 736 557T722 447Q720 440 702 440H690Q683 445 683 453Q683 454 686 477T689 530Q689 560 682 579T663 610T626 626T575 633T503 634H480Q398 633 393 631Q388 629 386 623Q385 622 352 492L320 363H375Q378 363 398 363T426 364T448 367T472 374T489 386Q502 398 511 419T524 457T529 475Q532 480 548 480H560Q567 475 567 470Q567 467 536 339T502 207Q500 200 482 200H470Q463 206 463 212Q463 215 468 234T473 274Q473 303 453 310T364 317H309L277 190Q245 66 245 60Q245 46 334 46H359Q365 40 365 39T363 19Q359 6 353 0H336Q295 2 185 2Q120 2 86 2T48 1Z"></path><path id="MJMATHI-53" stroke-width="1" d="M308 24Q367 24 416 76T466 197Q466 260 414 284Q308 311 278 321T236 341Q176 383 176 462Q176 523 208 573T273 648Q302 673 343 688T407 704H418H425Q521 704 564 640Q565 640 577 653T603 682T623 704Q624 704 627 704T632 705Q645 705 645 698T617 577T585 459T569 456Q549 456 549 465Q549 471 550 475Q550 478 551 494T553 520Q553 554 544 579T526 616T501 641Q465 662 419 662Q362 662 313 616T263 510Q263 480 278 458T319 427Q323 425 389 408T456 390Q490 379 522 342T554 242Q554 216 546 186Q541 164 528 137T492 78T426 18T332 -20Q320 -22 298 -22Q199 -22 144 33L134 44L106 13Q83 -14 78 -18T65 -22Q52 -22 52 -14Q52 -11 110 221Q112 227 130 227H143Q149 221 149 216Q149 214 148 207T144 186T142 153Q144 114 160 87T203 47T255 29T308 24Z"></path><path id="MJMATHI-50" stroke-width="1" d="M287 628Q287 635 230 637Q206 637 199 638T192 648Q192 649 194 659Q200 679 203 681T397 683Q587 682 600 680Q664 669 707 631T751 530Q751 453 685 389Q616 321 507 303Q500 302 402 301H307L277 182Q247 66 247 59Q247 55 248 54T255 50T272 48T305 46H336Q342 37 342 35Q342 19 335 5Q330 0 319 0Q316 0 282 1T182 2Q120 2 87 2T51 1Q33 1 33 11Q33 13 36 25Q40 41 44 43T67 46Q94 46 127 49Q141 52 146 61Q149 65 218 339T287 628ZM645 554Q645 567 643 575T634 597T609 619T560 635Q553 636 480 637Q463 637 445 637T416 636T404 636Q391 635 386 627Q384 621 367 550T332 412T314 344Q314 342 395 342H407H430Q542 342 590 392Q617 419 631 471T645 554Z"></path><path id="MJMATHI-64" stroke-width="1" d="M366 683Q367 683 438 688T511 694Q523 694 523 686Q523 679 450 384T375 83T374 68Q374 26 402 26Q411 27 422 35Q443 55 463 131Q469 151 473 152Q475 153 483 153H487H491Q506 153 506 145Q506 140 503 129Q490 79 473 48T445 8T417 -8Q409 -10 393 -10Q359 -10 336 5T306 36L300 51Q299 52 296 50Q294 48 292 46Q233 -10 172 -10Q117 -10 75 30T33 157Q33 205 53 255T101 341Q148 398 195 420T280 442Q336 442 364 400Q369 394 369 396Q370 400 396 505T424 616Q424 629 417 632T378 637H357Q351 643 351 645T353 664Q358 683 366 683ZM352 326Q329 405 277 405Q242 405 210 374T160 293Q131 214 119 129Q119 126 119 118T118 106Q118 61 136 44T179 26Q233 26 290 98L298 109L352 326Z"></path><path id="MJMATHI-68" stroke-width="1" d="M137 683Q138 683 209 688T282 694Q294 694 294 685Q294 674 258 534Q220 386 220 383Q220 381 227 388Q288 442 357 442Q411 442 444 415T478 336Q478 285 440 178T402 50Q403 36 407 31T422 26Q450 26 474 56T513 138Q516 149 519 151T535 153Q555 153 555 145Q555 144 551 130Q535 71 500 33Q466 -10 419 -10H414Q367 -10 346 17T325 74Q325 90 361 192T398 345Q398 404 354 404H349Q266 404 205 306L198 293L164 158Q132 28 127 16Q114 -11 83 -11Q69 -11 59 -2T48 16Q48 30 121 320L195 616Q195 629 188 632T149 637H128Q122 643 122 645T124 664Q129 683 137 683Z"></path></defs></svg></div><div id="wmd-preview" class="wmd-preview wmd-preview-full-reader"><div class="md-section-divider"></div><div class="md-section-divider"></div><h1 data-anchor-id="b3j7" id="识别数字">识别数字</h1><div class="md-section-divider"></div><h2 data-anchor-id="h4bg" id="背景介绍">背景介绍</h2><p data-anchor-id="ifc9">当我们学习编程的时候,编写的第一个程序一般是实现打印"Hello World"。而机器学习(或深度学习)的入门教程,一般都是 <a href="http://yann.lecun.com/exdb/mnist/" target="_blank">MNIST</a> 数据库上的手写识别问题。原因是手写识别属于典型的图像分类问题,比较简单,同时MNIST数据集也很完备。MNIST数据集作为一个简单的计算机视觉数据集,包含一系列如图1所示的手写数字图片和对应的标签。图片是28x28的像素矩阵,标签则对应着0~9的10个数字。每张图片都经过了大小归一化和居中处理。</p><p align="center" data-anchor-id="4p0g">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/recognize_digits/image/mnist_example_image.png" width="400"><br>
图1. MNIST图片示例
</p><p data-anchor-id="zo4d">MNIST数据集是从 <a href="https://www.nist.gov/srd/nist-special-database-19" target="_blank">NIST</a> 的Special Database 3(SD-3)和Special Database 1(SD-1)构建而来。由于SD-3是由美国人口调查局的员工进行标注,SD-1是由美国高中生进行标注,因此SD-3比SD-1更干净也更容易识别。Yann LeCun等人从SD-1和SD-3中各取一半作为MNIST的训练集(60000条数据)和测试集(10000条数据),其中训练集来自250位不同的标注员,此外还保证了训练集和测试集的标注员是不完全相同的。</p><p data-anchor-id="rane">Yann LeCun早先在手写字符识别上做了很多研究,并在研究过程中提出了卷积神经网络(Convolutional Neural Network),大幅度地提高了手写字符的识别能力,也因此成为了深度学习领域的奠基人之一。如今的深度学习领域,卷积神经网络占据了至关重要的地位,从最早Yann LeCun提出的简单LeNet,到如今ImageNet大赛上的优胜模型VGGNet、GoogLeNet、ResNet等(请参见<a href="https://github.com/PaddlePaddle/book/tree/develop/image_classification" target="_blank">图像分类</a> 教程),人们在图像分类领域,利用卷积神经网络得到了一系列惊人的结果。</p><p data-anchor-id="mncw">有很多算法在MNIST上进行实验。1998年,LeCun分别用单层线性分类器、多层感知器(Multilayer Perceptron, MLP)和多层卷积神经网络LeNet进行实验,使得测试集上的误差不断下降(从12%下降到0.7%)[<a href="#参考文献">1</a>]。此后,科学家们又基于K近邻(K-Nearest Neighbors)算法[<a href="#参考文献">2</a>]、支持向量机(SVM)[<a href="#参考文献">3</a>]、神经网络[<a href="#参考文献">4-7</a>]和Boosting方法[<a href="#参考文献">8</a>]等做了大量实验,并采用多种预处理方法(如去除歪曲、去噪、模糊等)来提高识别的准确率。</p><p data-anchor-id="ic7s">本教程中,我们从简单的模型Softmax回归开始,带大家入门手写字符识别,并逐步进行模型优化。</p><div class="md-section-divider"></div><h2 data-anchor-id="515e" id="模型概览">模型概览</h2><p data-anchor-id="uibf">基于MNIST数据训练一个分类器,在介绍本教程使用的三个基本图像分类网络前,我们先给出一些定义: <br>
- <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-1-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 852.5 725.103370696049" style="width: 1.969ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58"></use></g></svg></span><script type="math/tex" id="MathJax-Element-1">X</script>是输入:MNIST图片是<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-2-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -687.0516853480245 3224.9444444444443 730.103370696049" style="width: 7.529ex; height: 1.737ex; vertical-align: -0.232ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-38" x="500" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-D7" x="1223" y="0"></use><g transform="translate(2223,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-38" x="500" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-2">28\times28</script> 的二维图像,为了进行计算,我们将其转化为<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-3-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -698.0516853480245 1501.5 741.103370696049" style="width: 3.475ex; height: 1.737ex; vertical-align: -0.232ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-37"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-38" x="500" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-34" x="1001" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-3">784</script>维向量,即<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-4-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 9427.256942141557 1042.103370696049" style="width: 21.892ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="1130" y="0"></use><g transform="translate(2186,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28"></use><g transform="translate(389,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-30" x="809" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="1415" y="0"></use><g transform="translate(1861,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="809" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="2887" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2026" x="3332" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="4671" y="0"></use><g transform="translate(5116,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><g transform="translate(572,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-37"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-38" x="500" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-33" x="1001" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="6851" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-4">X=\left ( x_0, x_1, \dots, x_{783} \right )</script><br>
- <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-5-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 763.5 724.103370696049" style="width: 1.737ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59"></use></g></svg></span><script type="math/tex" id="MathJax-Element-5">Y</script>是输出:分类器的输出是10类数字(0-9),即<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-6-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 8384.443054173824 1042.103370696049" style="width: 19.459ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="1041" y="0"></use><g transform="translate(2097,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28"></use><g transform="translate(389,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-30" x="693" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="1333" y="0"></use><g transform="translate(1779,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="693" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="2723" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2026" x="3168" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="4507" y="0"></use><g transform="translate(4952,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-39" x="693" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="5897" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-6">Y=\left ( y_0, y_1, \dots, y_9 \right )</script>,每一维<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-7-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 834.8053928999522 690.103370696049" style="width: 1.969ex; height: 1.622ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="693" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-7">y_i</script>代表图片分类为第<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-8-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -682.0516853480245 345.5 714.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use></g></svg></span><script type="math/tex" id="MathJax-Element-8">i</script>类数字的概率。 <br>
- <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-9-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 681.5 727.103370696049" style="width: 1.622ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-4C"></use></g></svg></span><script type="math/tex" id="MathJax-Element-9">L</script>是图片的真实标签:<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-10-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 7726.443054173823 1042.103370696049" style="width: 17.954ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-4C"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="959" y="0"></use><g transform="translate(2015,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28"></use><g transform="translate(389,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6C"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-30" x="422" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="1141" y="0"></use><g transform="translate(1587,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6C"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="422" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="2339" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2026" x="2784" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="4123" y="0"></use><g transform="translate(4568,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6C"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-39" x="422" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="5321" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-10">L=\left ( l_0, l_1, \dots, l_9 \right )</script>也是10维,但只有一维为1,其他都为0。</p><div class="md-section-divider"></div><h3 data-anchor-id="hytt" id="softmax回归softmax-regression">Softmax回归(Softmax Regression)</h3><p data-anchor-id="d36a">最简单的Softmax回归模型是先将输入层经过一个全连接层得到的特征,然后直接通过softmax 函数进行多分类[<a href="#参考文献">9</a>]。</p><p data-anchor-id="28f3">输入层的数据<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-11-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 852.5 725.103370696049" style="width: 1.969ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58"></use></g></svg></span><script type="math/tex" id="MathJax-Element-11">X</script>传到输出层,在激活操作之前,会乘以相应的权重 <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-12-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 1048.5 747.103370696049" style="width: 2.432ex; height: 1.737ex; vertical-align: -0.232ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use></g></svg></span><script type="math/tex" id="MathJax-Element-12">W</script> ,并加上偏置变量 <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-13-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 429.5 747.103370696049" style="width: 1.042ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62"></use></g></svg></span><script type="math/tex" id="MathJax-Element-13">b</script> ,具体如下:</p><div class="md-section-divider"></div><p data-anchor-id="rtf3"><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-14-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -971.0516853480245 13144.875178405879 2321.8309499847855" style="width: 30.579ex; height: 5.444ex; vertical-align: -3.243ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="693" y="-213"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="1112" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-73" x="2168" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6F" x="2638" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66" x="3123" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="3674" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6D" x="4035" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-61" x="4914" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78" x="5443" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="6016" y="0"></use><g transform="translate(6405,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ2-2211"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6A" x="815" y="-1536"></use></g><g transform="translate(8017,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use><g transform="translate(944,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="345" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6A" x="624" y="0"></use></g></g><g transform="translate(9794,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6A" x="809" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="10980" y="0"></use><g transform="translate(11981,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="607" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="12755" y="0"></use></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-14"> y_i = softmax(\sum_j W_{i,j}x_j + b_i) </script></p><p data-anchor-id="x53l">其中 <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-15-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -910.1487101814612 9371.122599106595 1697.582860294907" style="width: 21.776ex; height: 3.938ex; vertical-align: -1.969ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-73"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6F" x="469" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66" x="955" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="1505" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6D" x="1867" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-61" x="2745" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78" x="3275" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="3847" y="0"></use><g transform="translate(4237,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="809" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="5153" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="5821" y="0"></use><g transform="translate(6997,0)"><rect stroke="none" width="2253" height="60" x="0" y="220"></rect><g transform="translate(672,411)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65"></use><g transform="translate(329,256)"><use transform="scale(0.5000000000000001)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.5000000000000001)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="572" y="-318"></use></g></g><g transform="translate(60,-463)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ1-2211"></use><use transform="scale(0.5000000000000001)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6A" x="1494" y="-405"></use><g transform="translate(1190,0)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65"></use><g transform="translate(329,337)"><use transform="scale(0.5000000000000001)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.5000000000000001)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6A" x="572" y="-318"></use></g></g></g></g></g></svg></span><script type="math/tex" id="MathJax-Element-15"> softmax(x_i) = \frac{e^{x_i}}{\sum_j e^{x_j}} </script></p><p data-anchor-id="uwn6">对于有 <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-16-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 888.5 725.103370696049" style="width: 2.085ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-4E"></use></g></svg></span><script type="math/tex" id="MathJax-Element-16">N</script> 个类别的多分类问题,指定 <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-17-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 888.5 725.103370696049" style="width: 2.085ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-4E"></use></g></svg></span><script type="math/tex" id="MathJax-Element-17">N</script> 个输出节点,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-18-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 888.5 725.103370696049" style="width: 2.085ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-4E"></use></g></svg></span><script type="math/tex" id="MathJax-Element-18">N</script> 维输入特征经过softmax将归一化为 <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-19-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 888.5 725.103370696049" style="width: 2.085ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-4E"></use></g></svg></span><script type="math/tex" id="MathJax-Element-19">N</script> 个[0,1]范围内的实数值,分别表示该样本属于这 <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-20-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 888.5 725.103370696049" style="width: 2.085ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-4E"></use></g></svg></span><script type="math/tex" id="MathJax-Element-20">N</script> 个类别的概率。此处的 <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-21-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 834.8053928999522 690.103370696049" style="width: 1.969ex; height: 1.622ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="693" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-21">y_i</script> 即对应该图片为数字 <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-22-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -682.0516853480245 345.5 714.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use></g></svg></span><script type="math/tex" id="MathJax-Element-22">i</script> 的预测概率。</p><p data-anchor-id="jvku">在分类问题中,我们一般采用交叉熵代价损失函数(cross entropy),公式如下:</p><div class="md-section-divider"></div><p data-anchor-id="89ov"><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-23-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -971.0516853480245 18555.666341355456 2185.359341215782" style="width: 43.089ex; height: 5.097ex; vertical-align: -2.896ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-72" x="433" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6F" x="885" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-73" x="1370" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-73" x="1840" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65" x="2309" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="2776" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="3376" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-72" x="3738" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6F" x="4189" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-70" x="4675" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79" x="5178" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="5676" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6C" x="6065" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-61" x="6364" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62" x="6893" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65" x="7323" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6C" x="7789" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="8088" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79" x="8533" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="9030" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="9697" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="10754" y="0"></use><g transform="translate(11699,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ2-2211"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="848" y="-1536"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6C" x="13310" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-61" x="13609" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62" x="14138" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65" x="14568" y="0"></use><g transform="translate(15034,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6C"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="422" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6C" x="15677" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6F" x="15975" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-67" x="16461" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="16941" y="0"></use><g transform="translate(17331,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="693" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="18166" y="0"></use></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-23"> crossentropy(label, y) = -\sum_i label_ilog(y_i) </script></p><p data-anchor-id="8adp">图2为softmax回归的网络图,图中权重用黑线表示、偏置用红线表示、+1代表偏置参数的系数为1。</p><p align="center" data-anchor-id="ed2j">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/recognize_digits/image/softmax_regression.png" width="400"><br>
图2. softmax回归网络结构图<br>
</p><div class="md-section-divider"></div><h3 data-anchor-id="18v5" id="多层感知器multilayer-perceptron-mlp">多层感知器(Multilayer Perceptron, MLP)</h3><p data-anchor-id="h6cx">Softmax回归模型采用了最简单的两层神经网络,即只有输入层和输出层,因此其拟合能力有限。为了达到更好的识别效果,我们考虑在输入层和输出层中间加上若干个隐藏层[<a href="#参考文献">10</a>]。</p><ol data-anchor-id="vz8a">
<li>经过第一个隐藏层,可以得到 <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-24-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 8352.220831951603 1042.103370696049" style="width: 19.344ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-48"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1175" y="-213"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="1563" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3D5" x="2619" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="3215" y="0"></use><g transform="translate(3605,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1335" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58" x="5003" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="6078" y="0"></use><g transform="translate(7079,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="607" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="7962" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-24"> H_1 = \phi(W_1X + b_1) </script>,其中<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-25-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 596.5 941.103370696049" style="width: 1.39ex; height: 2.201ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3D5"></use></g></svg></span><script type="math/tex" id="MathJax-Element-25">\phi</script>代表激活函数,常见的有sigmoid、tanh或ReLU等函数。</li>
<li>经过第二个隐藏层,可以得到 <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-26-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 8785.127775935469 1042.103370696049" style="width: 20.386ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-48"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="1175" y="-213"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="1563" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3D5" x="2619" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="3215" y="0"></use><g transform="translate(3605,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="1335" y="-213"></use></g><g transform="translate(5003,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-48"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1175" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="6511" y="0"></use><g transform="translate(7512,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="607" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="8395" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-26"> H_2 = \phi(W_2H_1 + b_2) </script></li>
<li>最后,再经过输出层,得到的<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-27-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 11514.220831951603 1042.103370696049" style="width: 26.757ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-59"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="1041" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-73" x="2097" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6F" x="2567" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66" x="3052" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="3603" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6D" x="3964" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-61" x="4843" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78" x="5372" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="5945" y="0"></use><g transform="translate(6334,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-33" x="1335" y="-213"></use></g><g transform="translate(7732,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-48"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="1175" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="9240" y="0"></use><g transform="translate(10241,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-33" x="607" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="11124" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-27">Y=softmax(W_3H_2 + b_3)</script>,即为最后的分类结果向量。</li>
</ol><p data-anchor-id="kw2r">图3为多层感知器的网络结构图,图中权重用黑线表示、偏置用红线表示、+1代表偏置参数的系数为1。</p><p align="center" data-anchor-id="ejcm">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/recognize_digits/image/mlp.png" width="500"><br>
图3. 多层感知器网络结构图<br>
</p><div class="md-section-divider"></div><h3 data-anchor-id="c87o" id="卷积神经网络convolutional-neural-network-cnn">卷积神经网络(Convolutional Neural Network, CNN)</h3><div class="md-section-divider"></div><h4 data-anchor-id="dop9" id="卷积层">卷积层</h4><p align="center" data-anchor-id="oi4v">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/recognize_digits/image/conv_layer.png" width="500"><br>
图4. 卷积层图片<br>
</p><p data-anchor-id="5x4p">卷积层是卷积神经网络的核心基石。该层的参数由一组可学习的过滤器(也叫作卷积核)组成。在前向过程中,每个卷积核在输入层进行横向和纵向的扫描,与输入层对应扫描位置进行卷积,得到的结果加上偏置并用相应的激活函数进行激活,结果能够得到一个二维的激活图(activation map)。每个特定的卷积核都能得到特定的激活图(activation map),如有的卷积核可能对识别边角,有的可能识别圆圈,那这些卷积核可能对于对应的特征响应要强。</p><p data-anchor-id="yxad">图4是卷积层的一个动态图。由于3D量难以表示,所有的3D量(输入的3D量(蓝色),权重3D量(红色),输出3D量(绿色))通过将深度在行上堆叠来表示。如图4,输入层是<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-28-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 10360.2208319516 920.103370696049" style="width: 24.093ex; height: 2.085ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1335" y="-213"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="1676" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-35" x="2732" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="3232" y="0"></use><g transform="translate(3678,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-48"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1175" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="5241" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-35" x="6297" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="6798" y="0"></use><g transform="translate(7243,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-44"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1171" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="8803" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-33" x="9859" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-28">W_1=5,H_1=5,D_1=3</script>,我们常见的彩色图片其实就是类似这样的输入层,彩色图片的宽和高对应这里的<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-29-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 1398.406943983867 874.8104774772355" style="width: 3.243ex; height: 2.085ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1335" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-29">W_1</script><span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-30-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 1285.406943983867 874.8104774772355" style="width: 3.012ex; height: 2.085ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-48"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1175" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-30">H_1</script>,而彩色图片有RGB三个颜色通道,对应这里的<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-31-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 1282.406943983867 874.8104774772355" style="width: 3.012ex; height: 2.085ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-44"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1171" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-31">D_1</script>;卷积层的参数为<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-32-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -726.0516853480245 11709.72222222222 942.103370696049" style="width: 27.22ex; height: 2.201ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-4B"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="1167" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="2223" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="2724" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-46" x="3169" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="4196" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-33" x="5252" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="5753" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-53" x="6198" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="7121" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="8177" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="8678" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-50" x="9123" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="10152" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="11209" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-32">K=2,F=3,S=2,P=1</script>,这里的<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-33-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 889.5 725.103370696049" style="width: 2.085ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-4B"></use></g></svg></span><script type="math/tex" id="MathJax-Element-33">K</script>是卷积核的数量,如图4中有<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-34-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -716.0516853480245 4071.4069439838668 902.3668266633396" style="width: 9.498ex; height: 2.085ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-46"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="749" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6C" x="1095" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="1393" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65" x="1755" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-72" x="2221" y="0"></use><g transform="translate(2673,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-30" x="1335" y="-213"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-34">Filter W_0</script><span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-35-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -716.0516853480245 4071.4069439838668 886.8104774772355" style="width: 9.498ex; height: 2.085ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-46"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="749" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6C" x="1095" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="1393" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65" x="1755" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-72" x="2221" y="0"></use><g transform="translate(2673,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1335" y="-213"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-35">Filter W_1</script>两个卷积核,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-36-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -701.0516853480245 749.5 722.103370696049" style="width: 1.737ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-46"></use></g></svg></span><script type="math/tex" id="MathJax-Element-36">F</script>对应卷积核的大小,图中<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-37-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 1549 747.103370696049" style="width: 3.591ex; height: 1.737ex; vertical-align: -0.232ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-30" x="1048" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-37">W0</script><span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-38-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 1549 747.103370696049" style="width: 3.591ex; height: 1.737ex; vertical-align: -0.232ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1048" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-38">W1</script>在每一层深度上都是<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-39-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -686.0516853480245 2223.9444444444443 729.103370696049" style="width: 5.212ex; height: 1.737ex; vertical-align: -0.232ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-33"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-D7" x="722" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-33" x="1723" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-39">3\times3</script>的矩阵,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-40-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -726.0516853480245 645.5 769.103370696049" style="width: 1.506ex; height: 1.737ex; vertical-align: -0.232ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-53"></use></g></svg></span><script type="math/tex" id="MathJax-Element-40">S</script>对应卷积核扫描的步长,从动态图中可以看到,方框每次左移或下移2个单位,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-41-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 751.5 725.103370696049" style="width: 1.737ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-50"></use></g></svg></span><script type="math/tex" id="MathJax-Element-41">P</script>对应Padding扩展,是对输入层的扩展,图中输入层,原始数据为蓝色部分,可以看到灰色部分是进行了大小为1的扩展,用0来进行扩展;图4的动态可视化对输出层结果(绿色)进行迭代,显示每个输出元素是通过将突出显示的输入(蓝色)与滤波器(红色)进行元素相乘,将其相加,然后通过偏置抵消结果来计算的。</p><div class="md-section-divider"></div><h4 data-anchor-id="yh1l" id="池化层">池化层</h4><p align="center" data-anchor-id="lkxj">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/recognize_digits/image/max_pooling.png" width="400"><br>
图5. 池化层图片<br>
</p><p data-anchor-id="nv20">池化是非线性下采样的一种形式,主要作用是通过减少网络的参数来减小计算量,并且能够在一定程度上控制过拟合。通常在卷积层的后面会加上一个池化层。池化包括最大池化、平均池化等。其中最大池化是用不重叠的矩形框将输入层分成不同的区域,对于每个矩形框的数取最大值作为输出层,如图5所示。</p><div class="md-section-divider"></div><h4 data-anchor-id="8rsa" id="lenet-5网络">LeNet-5网络</h4><p align="center" data-anchor-id="dvhs">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/recognize_digits/image/cnn.png"><br>
图6. LeNet-5卷积神经网络结构<br>
</p><p data-anchor-id="fco1"><a href="http://yann.lecun.com/exdb/lenet/" target="_blank">LeNet-5</a>是一个最简单的卷积神经网络。图6显示了其结构:输入的二维图像,先经过两次卷积层到池化层,再经过全连接层,最后使用softmax分类作为输出层。卷积的如下三个特性,决定了LeNet-5能比同样使用全连接层的多层感知器更好地识别图像:</p><ul data-anchor-id="kwpl">
<li>神经元的三维特性: 卷积层的神经元在宽度、高度和深度上进行了组织排列。每一层的神经元仅仅与前一层的一块小区域连接,这块小区域被称为感受野(receptive field)。</li>
<li>局部连接:CNN通过在相邻层的神经元之间实施局部连接模式来利用空间局部相关性。这样的结构保证了学习后的过滤器能够对于局部的输入特征有最强的响应。堆叠许多这样的层导致非线性“过滤器”变得越来越“全局”。这允许网络首先创建输入的小部分的良好表示,然后从它们组合较大区域的表示。</li>
<li>共享权重:在CNN中,每个滤波器在整个视野中重复扫描。 这些复制单元共享相同的参数化(权重向量和偏差)并形成特征图。 这意味着给定卷积层中的所有神经元检测完全相同的特征。 以这种方式的复制单元允许不管它们在视野中的位置都能检测到特征,从而构成平移不变性的性质。</li>
</ul><p data-anchor-id="klh6">更详细的关于卷积神经网络的具体知识可以参考<a href="http://cs231n.github.io/convolutional-networks/" target="_blank">斯坦福大学公开课</a><a href="https://github.com/PaddlePaddle/book/blob/develop/image_classification/README.md" target="_blank">图像分类</a>教程。</p><div class="md-section-divider"></div><h3 data-anchor-id="kv0p" id="常见激活函数介绍">常见激活函数介绍</h3><ul data-anchor-id="wln2">
<li><p>sigmoid激活函数: <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-42-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -895.9073646014878 11790.576675790886 1349.7974852602724" style="width: 27.336ex; height: 3.127ex; vertical-align: -1.158ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="550" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78" x="940" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="1512" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="2179" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-73" x="3236" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="3705" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-67" x="4051" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6D" x="4531" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6F" x="5410" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="5895" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-64" x="6241" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="6764" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78" x="7154" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="7726" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="8393" y="0"></use><g transform="translate(9570,0)"><rect stroke="none" width="2100" height="60" x="0" y="220"></rect><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1235" y="571"></use><g transform="translate(59,-376)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="500" y="0"></use><g transform="translate(904,0)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65"></use><g transform="translate(329,204)"><use transform="scale(0.5000000000000001)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212"></use><use transform="scale(0.5000000000000001)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78" x="778" y="0"></use></g></g></g></g></g></svg></span><script type="math/tex" id="MathJax-Element-42"> f(x) = sigmoid(x) = \frac{1}{1+e^{-x}} </script></p></li>
<li><p>tanh激活函数: <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-43-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -910.1487101814612 10662.995723349197 1333.7312583679693" style="width: 24.788ex; height: 3.127ex; vertical-align: -1.042ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="550" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78" x="940" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="1512" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="2179" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="3236" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-61" x="3597" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="4127" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68" x="4727" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="5304" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78" x="5693" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="6266" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="6933" y="0"></use><g transform="translate(8109,0)"><rect stroke="none" width="2433" height="60" x="0" y="220"></rect><g transform="translate(60,411)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65"></use><use transform="scale(0.5000000000000001)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78" x="659" y="513"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="971" y="0"></use><g transform="translate(1237,0)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65"></use><g transform="translate(329,256)"><use transform="scale(0.5000000000000001)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212"></use><use transform="scale(0.5000000000000001)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78" x="778" y="0"></use></g></g></g><g transform="translate(60,-345)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65"></use><use transform="scale(0.5000000000000001)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78" x="659" y="408"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="971" y="0"></use><g transform="translate(1237,0)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65"></use><g transform="translate(329,204)"><use transform="scale(0.5000000000000001)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212"></use><use transform="scale(0.5000000000000001)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78" x="778" y="0"></use></g></g></g></g></g></svg></span><script type="math/tex" id="MathJax-Element-43"> f(x) = tanh(x) = \frac{e^x-e^{-x}}{e^x+e^{-x}} </script></p>
<p>实际上,tanh函数只是规模变化的sigmoid函数,将sigmoid函数值放大2倍之后再向下平移1个单位:tanh(x) = 2sigmoid(2x) - 1 。</p></li>
<li><p>ReLU激活函数: <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-44-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 7513.722222222223 1042.103370696049" style="width: 17.49ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="550" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78" x="940" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="1512" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="2179" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6D" x="3236" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-61" x="4114" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78" x="4644" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="5216" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-30" x="5606" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="6106" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78" x="6551" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="7124" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-44"> f(x) = max(0, x) </script></p></li>
</ul><p data-anchor-id="tss0">更详细的介绍请参考<a href="https://en.wikipedia.org/wiki/Activation_function" target="_blank">维基百科激活函数</a></p><div class="md-section-divider"></div><h2 data-anchor-id="xbcu" id="数据准备">数据准备</h2><div class="md-section-divider"></div><h3 data-anchor-id="c26s" id="数据介绍与下载">数据介绍与下载</h3><p data-anchor-id="pim6">执行以下命令,下载<a href="http://yann.lecun.com/exdb/mnist/" target="_blank">MNIST</a>数据库并解压缩,然后将训练集和测试集的地址分别写入train.list和test.list两个文件,供PaddlePaddle读取。</p><pre data-anchor-id="hdhx"><code>./data/get_mnist_data.sh
</code></pre><p data-anchor-id="fbdq">将下载下来的数据进行 <code>gzip</code> 解压,可以在文件夹 <code>data/raw_data</code> 中找到以下文件:</p><table data-anchor-id="tpwt" class="table table-striped-white table-bordered">
<thead>
<tr>
<th>文件名称</th>
<th>说明</th>
</tr>
</thead>
<tbody><tr>
<td>train-images-idx3-ubyte</td>
<td>训练数据图片,60,000条数据</td>
</tr>
<tr>
<td>train-labels-idx1-ubyte</td>
<td>训练数据标签,60,000条数据</td>
</tr>
<tr>
<td>t10k-images-idx3-ubyte</td>
<td>测试数据图片,10,000条数据</td>
</tr>
<tr>
<td>t10k-labels-idx1-ubyte</td>
<td>测试数据标签,10,000条数据</td>
</tr>
</tbody></table><p data-anchor-id="mste">用户可以通过以下脚本随机绘制10张图片(可参考图1):</p><pre data-anchor-id="27s8"><code>./load_data.py
</code></pre><div class="md-section-divider"></div><h3 data-anchor-id="ukke" id="提供数据给paddlepaddle">提供数据给PaddlePaddle</h3><p data-anchor-id="sdul">我们使用python接口传递数据给系统,下面 <code>mnist_provider.py</code>针对MNIST数据给出了完整示例。</p><pre data-anchor-id="x3bc"><code># Define a py data provider
@provider(
input_types={'pixel': dense_vector(28 * 28),
'label': integer_value(10)})
def process(settings, filename): # settings is not used currently.
# 打开图片文件
with open( filename + "-images-idx3-ubyte", "rb") as f:
# 读取开头的四个参数,magic代表数据的格式,n代表数据的总量,rows和cols分别代表行数和列数
magic, n, rows, cols = struct.upack("&gt;IIII", f.read(16))
# 以无符号字节为单位一个一个的读取数据
images = np.fromfile(
f, 'ubyte',
count=n * rows * cols).reshape(n, rows, cols).astype('float32')
# 将0~255的数据归一化到[-1,1]的区间
images = images / 255.0 * 2.0 - 1.0
# 打开标签文件
with open( filename + "-labels-idx1-ubyte", "rb") as l:
# 读取开头的两个参数
magic, n = struct.upack("&gt;II", l.read(8))
# 以无符号字节为单位一个一个的读取数据
labels = np.fromfile(l, 'ubyte', count=n).astype("int")
for i in xrange(n):
yield {"pixel": images[i, :], 'label': labels[i]}
</code></pre><div class="md-section-divider"></div><h2 data-anchor-id="oilx" id="模型配置说明">模型配置说明</h2><div class="md-section-divider"></div><h3 data-anchor-id="2lby" id="数据定义">数据定义</h3><p data-anchor-id="o0hn">在模型配置中,定义通过 <code>define_py_data_sources2</code> 函数从 <code>dataprovider</code> 中读入数据。如果该配置用于预测,则不需要数据定义部分。</p><pre data-anchor-id="tzmf"><code>if not is_predict:
data_dir = './data/'
define_py_data_sources2(
train_list=data_dir + 'train.list',
test_list=data_dir + 'test.list',
module='mnist_provider',
obj='process')
</code></pre><div class="md-section-divider"></div><h3 data-anchor-id="hhqm" id="算法配置">算法配置</h3><p data-anchor-id="y5hw">指定训练相关的参数。</p><ul data-anchor-id="ytth">
<li>batch_size: 表示神经网络每次训练使用的数据为128条。</li>
<li>训练速度(learning_rate): 迭代的速度,与网络的训练收敛速度有关系。</li>
<li>训练方法(learning_method): 代表训练过程在更新权重时采用动量优化器 <code>MomentumOptimizer</code> ,其中参数0.9代表动量优化每次保持前一次速度的0.9倍。</li>
<li><p>正则化(regularization): 是防止网络过拟合的一种手段,此处采用L2正则化。</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">settings</span><span class="pun">(</span></code></li><li class="L1"><code class="language-python"><span class="pln"> batch_size</span><span class="pun">=</span><span class="lit">128</span><span class="pun">,</span></code></li><li class="L2"><code class="language-python"><span class="pln"> learning_rate</span><span class="pun">=</span><span class="lit">0.1</span><span class="pln"> </span><span class="pun">/</span><span class="pln"> </span><span class="lit">128.0</span><span class="pun">,</span></code></li><li class="L3"><code class="language-python"><span class="pln"> learning_method</span><span class="pun">=</span><span class="typ">MomentumOptimizer</span><span class="pun">(</span><span class="lit">0.9</span><span class="pun">),</span></code></li><li class="L4"><code class="language-python"><span class="pln"> regularization</span><span class="pun">=</span><span class="pln">L2Regularization</span><span class="pun">(</span><span class="lit">0.0005</span><span class="pln"> </span><span class="pun">*</span><span class="pln"> </span><span class="lit">128</span><span class="pun">))</span></code></li></ol></pre></li>
</ul><div class="md-section-divider"></div><h3 data-anchor-id="23bp" id="模型结构">模型结构</h3><div class="md-section-divider"></div><h4 data-anchor-id="zdyh" id="整体结构">整体结构</h4><p data-anchor-id="6p9n">首先通过<code>data_layer</code>调用来获取数据,然后调用分类器(这里我们提供了三个不同的分类器)得到分类结果。训练时,对该结果计算其损失函数,分类问题常常选择交叉熵损失函数;而预测时直接输出该结果即可。</p><pre data-anchor-id="wjxj"><code>data_size = 1 * 28 * 28
label_size = 10
img = data_layer(name='pixel', size=data_size)
predict = softmax_regression(img) # Softmax回归
#predict = multilayer_perceptron(img) #多层感知器
#predict = convolutional_neural_network(img) #LeNet5卷积神经网络
if not is_predict:
lbl = data_layer(name="label", size=label_size)
inputs(img, lbl)
outputs(classification_cost(input=predict, label=lbl))
else:
outputs(predict)
</code></pre><div class="md-section-divider"></div><h4 data-anchor-id="iue9" id="softmax回归">Softmax回归</h4><p data-anchor-id="p6mv">只通过一层简单的以softmax为激活函数的全连接层,就可以得到分类的结果。</p><pre data-anchor-id="nop8"><code>def softmax_regression(img):
predict = fc_layer(input=img, size=10, act=SoftmaxActivation())
return predict
</code></pre><div class="md-section-divider"></div><h4 data-anchor-id="obqh" id="多层感知器">多层感知器</h4><p data-anchor-id="z8sk">下面代码实现了一个含有两个隐藏层(即全连接层)的多层感知器。其中两个隐藏层的激活函数均采用ReLU,输出层的激活函数用Softmax。</p><pre data-anchor-id="wyha"><code>def multilayer_perceptron(img):
# 第一个全连接层,激活函数为ReLU
hidden1 = fc_layer(input=img, size=128, act=ReluActivation())
# 第二个全连接层,激活函数为ReLU
hidden2 = fc_layer(input=hidden1, size=64, act=ReluActivation())
# 以softmax为激活函数的全连接输出层,输出层的大小必须为数字的个数10
predict = fc_layer(input=hidden2, size=10, act=SoftmaxActivation())
return predict
</code></pre><div class="md-section-divider"></div><h4 data-anchor-id="eiz3" id="卷积神经网络lenet-5">卷积神经网络LeNet-5</h4><p data-anchor-id="w1da">以下为LeNet-5的网络结构:输入的二维图像,首先经过两次卷积层到池化层,再经过全连接层,最后使用以softmax为激活函数的全连接层作为输出层。</p><pre data-anchor-id="8abp"><code>def convolutional_neural_network(img):
# 第一个卷积-池化层
conv_pool_1 = simple_img_conv_pool(
input=img,
filter_size=5,
num_filters=20,
num_channel=1,
pool_size=2,
pool_stride=2,
act=TanhActivation())
# 第二个卷积-池化层
conv_pool_2 = simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
num_channel=20,
pool_size=2,
pool_stride=2,
act=TanhActivation())
# 全连接层
fc1 = fc_layer(input=conv_pool_2, size=128, act=TanhActivation())
# 以softmax为激活函数的全连接输出层,输出层的大小必须为数字的个数10
predict = fc_layer(input=fc1, size=10, act=SoftmaxActivation())
return predict
</code></pre><div class="md-section-divider"></div><h2 data-anchor-id="lotz" id="训练模型">训练模型</h2><div class="md-section-divider"></div><h3 data-anchor-id="125p" id="训练命令及日志">训练命令及日志</h3><ol data-anchor-id="gks8">
<li><p>通过配置训练脚本 <code>train.sh</code> 来执行训练过程:</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-bash"><span class="pln">config</span><span class="pun">=</span><span class="pln">mnist_model</span><span class="pun">.</span><span class="pln">py </span><span class="com"># 在mnist_model.py中可以选择网络</span></code></li><li class="L1"><code class="language-bash"><span class="pln">output</span><span class="pun">=./</span><span class="pln">softmax_mnist_model </span></code></li><li class="L2"><code class="language-bash"><span class="pln">log</span><span class="pun">=</span><span class="pln">softmax_train</span><span class="pun">.</span><span class="pln">log </span></code></li><li class="L3"><code class="language-bash"></code></li><li class="L4"><code class="language-bash"><span class="pln">paddle train \</span></code></li><li class="L5"><code class="language-bash"><span class="pun">--</span><span class="pln">config</span><span class="pun">=</span><span class="pln">$config \ </span><span class="com"># 网络配置的脚本</span></code></li><li class="L6"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">dot_period</span><span class="pun">=</span><span class="lit">10</span><span class="pln"> \ </span><span class="com"># 每训练 `dot_period` 个批次后打印一个 `.`</span></code></li><li class="L7"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">log_period</span><span class="pun">=</span><span class="lit">100</span><span class="pln"> \ </span><span class="com"># 每隔多少batch打印一次日志</span></code></li><li class="L8"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">test_all_data_in_one_period</span><span class="pun">=</span><span class="lit">1</span><span class="pln"> \ </span><span class="com"># 每次测试是否用所有的数据</span></code></li><li class="L9"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">use_gpu</span><span class="pun">=</span><span class="lit">0</span><span class="pln"> \ </span><span class="com"># 是否使用GPU</span></code></li><li class="L0"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">trainer_count</span><span class="pun">=</span><span class="lit">1</span><span class="pln"> \ </span><span class="com"># 使用CPU或GPU的个数</span></code></li><li class="L1"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">num_passes</span><span class="pun">=</span><span class="lit">100</span><span class="pln"> \ </span><span class="com"># 训练进行的轮数(每次训练使用完所有数据为1轮)</span></code></li><li class="L2"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">save_dir</span><span class="pun">=</span><span class="pln">$output \ </span><span class="com"># 模型存储的位置</span></code></li><li class="L3"><code class="language-bash"><span class="lit">2</span><span class="pun">&gt;&amp;</span><span class="lit">1</span><span class="pln"> </span><span class="pun">|</span><span class="pln"> tee $log</span></code></li><li class="L4"><code class="language-bash"></code></li><li class="L5"><code class="language-bash"><span class="pln">python </span><span class="pun">-</span><span class="pln">m paddle</span><span class="pun">.</span><span class="pln">utils</span><span class="pun">.</span><span class="pln">plotcurve </span><span class="pun">-</span><span class="pln">i $log </span><span class="pun">&gt;</span><span class="pln"> plot</span><span class="pun">.</span><span class="pln">png</span></code></li></ol></pre>
<p>配置好参数之后,执行脚本 <code>./train.sh</code> 训练日志类似如下所示:</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code><span class="pln">I0117 </span><span class="lit">12</span><span class="pun">:</span><span class="lit">52</span><span class="pun">:</span><span class="lit">29.628617</span><span class="pln"> </span><span class="lit">4538</span><span class="pln"> </span><span class="typ">TrainerInternal</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">165</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Batch</span><span class="pun">=</span><span class="lit">100</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">12800</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">2.63996</span><span class="pln"> </span><span class="typ">CurrentCost</span><span class="pun">=</span><span class="lit">2.63996</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> classification_error_evaluator</span><span class="pun">=</span><span class="lit">0.241172</span><span class="pln"> </span><span class="typ">CurrentEval</span><span class="pun">:</span><span class="pln"> classification_error_evaluator</span><span class="pun">=</span><span class="lit">0.241172</span><span class="pln"> </span></code></li><li class="L1"><code><span class="pun">.........</span></code></li><li class="L2"><code><span class="pln">I0117 </span><span class="lit">12</span><span class="pun">:</span><span class="lit">52</span><span class="pun">:</span><span class="lit">29.768741</span><span class="pln"> </span><span class="lit">4538</span><span class="pln"> </span><span class="typ">TrainerInternal</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">165</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Batch</span><span class="pun">=</span><span class="lit">200</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">25600</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">1.74027</span><span class="pln"> </span><span class="typ">CurrentCost</span><span class="pun">=</span><span class="lit">0.840582</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> classification_error_evaluator</span><span class="pun">=</span><span class="lit">0.185234</span><span class="pln"> </span><span class="typ">CurrentEval</span><span class="pun">:</span><span class="pln"> classification_error_evaluator</span><span class="pun">=</span><span class="lit">0.129297</span><span class="pln"> </span></code></li><li class="L3"><code><span class="pun">.........</span></code></li><li class="L4"><code><span class="pln">I0117 </span><span class="lit">12</span><span class="pun">:</span><span class="lit">52</span><span class="pun">:</span><span class="lit">29.916970</span><span class="pln"> </span><span class="lit">4538</span><span class="pln"> </span><span class="typ">TrainerInternal</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">165</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Batch</span><span class="pun">=</span><span class="lit">300</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">38400</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">1.42119</span><span class="pln"> </span><span class="typ">CurrentCost</span><span class="pun">=</span><span class="lit">0.783026</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> classification_error_evaluator</span><span class="pun">=</span><span class="lit">0.167786</span><span class="pln"> </span><span class="typ">CurrentEval</span><span class="pun">:</span><span class="pln"> classification_error_evaluator</span><span class="pun">=</span><span class="lit">0.132891</span><span class="pln"> </span></code></li><li class="L5"><code><span class="pun">.........</span></code></li><li class="L6"><code><span class="pln">I0117 </span><span class="lit">12</span><span class="pun">:</span><span class="lit">52</span><span class="pun">:</span><span class="lit">30.061213</span><span class="pln"> </span><span class="lit">4538</span><span class="pln"> </span><span class="typ">TrainerInternal</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">165</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Batch</span><span class="pun">=</span><span class="lit">400</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">51200</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">1.23965</span><span class="pln"> </span><span class="typ">CurrentCost</span><span class="pun">=</span><span class="lit">0.695054</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> classification_error_evaluator</span><span class="pun">=</span><span class="lit">0.160039</span><span class="pln"> </span><span class="typ">CurrentEval</span><span class="pun">:</span><span class="pln"> classification_error_evaluator</span><span class="pun">=</span><span class="lit">0.136797</span><span class="pln"> </span></code></li><li class="L7"><code><span class="pun">......</span><span class="pln">I0117 </span><span class="lit">12</span><span class="pun">:</span><span class="lit">52</span><span class="pun">:</span><span class="lit">30.223270</span><span class="pln"> </span><span class="lit">4538</span><span class="pln"> </span><span class="typ">TrainerInternal</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">181</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Pass</span><span class="pun">=</span><span class="lit">0</span><span class="pln"> </span><span class="typ">Batch</span><span class="pun">=</span><span class="lit">469</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">60000</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">1.1628</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> classification_error_evaluator</span><span class="pun">=</span><span class="lit">0.156233</span><span class="pln"> </span></code></li><li class="L8"><code><span class="pln">I0117 </span><span class="lit">12</span><span class="pun">:</span><span class="lit">52</span><span class="pun">:</span><span class="lit">30.366894</span><span class="pln"> </span><span class="lit">4538</span><span class="pln"> </span><span class="typ">Tester</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">109</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Test</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">10000</span><span class="pln"> cost</span><span class="pun">=</span><span class="lit">0.50777</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> classification_error_evaluator</span><span class="pun">=</span><span class="lit">0.0978</span><span class="pln"> </span></code></li></ol></pre></li>
<li><p>用脚本 <code>plot_cost.py</code> 可以画出训练过程中的误差变化曲线:</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-bash"><span class="pln">python plot_cost</span><span class="pun">.</span><span class="pln">py softmax_train</span><span class="pun">.</span><span class="pln">log </span></code></li></ol></pre></li>
<li><p>用脚本 <code>evaluate.py</code> 可以选出最佳训练的模型:</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-bash"><span class="pln">python evaluate</span><span class="pun">.</span><span class="pln">py softmax_train</span><span class="pun">.</span><span class="pln">log</span></code></li></ol></pre></li>
</ol><div class="md-section-divider"></div><h3 data-anchor-id="5qjw" id="softmax回归的训练结果">softmax回归的训练结果</h3><p align="center" data-anchor-id="j38j">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/recognize_digits/image/softmax_train_log.png" width="400"><br>
图7. softmax回归的误差曲线图<br>
</p><p data-anchor-id="2jhq">评估模型结果如下:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="45oz"><ol class="linenums"><li class="L0"><code class="language-text"><span class="typ">Best</span><span class="pln"> </span><span class="kwd">pass</span><span class="pln"> </span><span class="kwd">is</span><span class="pln"> </span><span class="lit">00013</span><span class="pun">,</span><span class="pln"> testing </span><span class="typ">Avgcost</span><span class="pln"> </span><span class="kwd">is</span><span class="pln"> </span><span class="lit">0.484447</span></code></li><li class="L1"><code class="language-text"><span class="typ">The</span><span class="pln"> classification accuracy </span><span class="kwd">is</span><span class="pln"> </span><span class="lit">90.01</span><span class="pun">%</span></code></li></ol></pre><div class="md-section-divider"></div><p align="center" data-anchor-id="p1pu">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/recognize_digits/image/mlp_train_log.png" width="400"><br>
图8. 多层感知器的误差曲线图
</p><p data-anchor-id="eza7">评估模型结果如下:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="hk46"><ol class="linenums"><li class="L0"><code class="language-text"><span class="typ">Best</span><span class="pln"> </span><span class="kwd">pass</span><span class="pln"> </span><span class="kwd">is</span><span class="pln"> </span><span class="lit">00085</span><span class="pun">,</span><span class="pln"> testing </span><span class="typ">Avgcost</span><span class="pln"> </span><span class="kwd">is</span><span class="pln"> </span><span class="lit">0.164746</span></code></li><li class="L1"><code class="language-text"><span class="typ">The</span><span class="pln"> classification accuracy </span><span class="kwd">is</span><span class="pln"> </span><span class="lit">94.95</span><span class="pun">%</span></code></li></ol></pre><div class="md-section-divider"></div><p align="center" data-anchor-id="4hu1">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/recognize_digits/image/cnn_train_log.png" width="400"><br>
图9. 卷积神经网络的误差曲线图
</p><p data-anchor-id="fh5w">评估模型结果如下:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="u3f4"><ol class="linenums"><li class="L0"><code class="language-text"><span class="typ">Best</span><span class="pln"> </span><span class="kwd">pass</span><span class="pln"> </span><span class="kwd">is</span><span class="pln"> </span><span class="lit">00076</span><span class="pun">,</span><span class="pln"> testing </span><span class="typ">Avgcost</span><span class="pln"> </span><span class="kwd">is</span><span class="pln"> </span><span class="lit">0.0244684</span></code></li><li class="L1"><code class="language-text"><span class="typ">The</span><span class="pln"> classification accuracy </span><span class="kwd">is</span><span class="pln"> </span><span class="lit">99.20</span><span class="pun">%</span></code></li></ol></pre><p data-anchor-id="77la">从评估结果可以看到,卷积神经网络的最好分类准确率达到惊人的99.20%。说明对于图像问题而言,卷积神经网络能够比一般的全连接网络达到更好的识别效果,而这与卷积层具有局部连接和共享权重的特性是分不开的。同时,从图9中可以看到,卷积神经网络在很早的时候就能达到很好的效果,说明其收敛速度非常快。</p><div class="md-section-divider"></div><h2 data-anchor-id="iks3" id="应用模型">应用模型</h2><div class="md-section-divider"></div><h3 data-anchor-id="amus" id="预测命令与结果">预测命令与结果</h3><p data-anchor-id="gx0z">脚本 <code>predict.py</code> 可以对训练好的模型进行预测,例如softmax回归中:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="ahcm"><ol class="linenums"><li class="L0"><code class="language-bash"><span class="pln">python predict</span><span class="pun">.</span><span class="pln">py </span><span class="pun">-</span><span class="pln">c softmax_mnist</span><span class="pun">.</span><span class="pln">py </span><span class="pun">-</span><span class="pln">d data</span><span class="pun">/</span><span class="pln">raw_data</span><span class="pun">/</span><span class="pln"> </span><span class="pun">-</span><span class="pln">m softmax_mnist_model</span><span class="pun">/</span><span class="pln">pass</span><span class="pun">-</span><span class="lit">00047</span></code></li></ol></pre><ul data-anchor-id="ekfu">
<li>-c 指定模型的结构</li>
<li>-d 指定需要预测的数据源,这里用测试数据集进行预测</li>
<li>-m 指定模型的参数,这里用之前训练效果最好的模型进行预测</li>
</ul><p data-anchor-id="csv6">根据提示,输入需要预测的图片序号,分类器能够给出各个数字的生成概率、预测的结果(取最大生成概率对应的数字)和实际的标签。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="o1mt"><ol class="linenums"><li class="L0"><code><span class="typ">Input</span><span class="pln"> image_id </span><span class="pun">[</span><span class="lit">0</span><span class="pun">~</span><span class="lit">9999</span><span class="pun">]:</span><span class="pln"> </span><span class="lit">3</span></code></li><li class="L1"><code><span class="typ">Predicted</span><span class="pln"> probability of each digit</span><span class="pun">:</span></code></li><li class="L2"><code><span class="pun">[[</span><span class="pln"> </span><span class="lit">1.00000000e+00</span><span class="pln"> </span><span class="lit">1.60381094e-28</span><span class="pln"> </span><span class="lit">1.60381094e-28</span><span class="pln"> </span><span class="lit">1.60381094e-28</span></code></li><li class="L3"><code><span class="pln"> </span><span class="lit">1.60381094e-28</span><span class="pln"> </span><span class="lit">1.60381094e-28</span><span class="pln"> </span><span class="lit">1.60381094e-28</span><span class="pln"> </span><span class="lit">1.60381094e-28</span></code></li><li class="L4"><code><span class="pln"> </span><span class="lit">1.60381094e-28</span><span class="pln"> </span><span class="lit">1.60381094e-28</span><span class="pun">]]</span></code></li><li class="L5"><code><span class="typ">Predict</span><span class="pln"> </span><span class="typ">Number</span><span class="pun">:</span><span class="pln"> </span><span class="lit">0</span><span class="pln"> </span></code></li><li class="L6"><code><span class="typ">Actual</span><span class="pln"> </span><span class="typ">Number</span><span class="pun">:</span><span class="pln"> </span><span class="lit">0</span></code></li></ol></pre><p data-anchor-id="35uw">从结果看出,该分类器接近100%地认为第3张图片上面的数字为0,而实际标签给出的类也确实如此。</p><div class="md-section-divider"></div><h2 data-anchor-id="znuu" id="总结">总结</h2><p data-anchor-id="q20y">本教程的softmax回归、多层感知器和卷积神经网络是最基础的深度学习模型,后续章节中复杂的神经网络都是从它们衍生出来的,因此这几个模型对之后的学习大有裨益。同时,我们也观察到从最简单的softmax回归变换到稍复杂的卷积神经网络的时候,MNIST数据集上的识别准确率有了大幅度的提升,原因是卷积层具有局部连接和共享权重的特性。在之后学习新模型的时候,希望大家也要深入到新模型相比原模型带来效果提升的关键之处。此外,本教程还介绍了PaddlePaddle模型搭建的基本流程,从dataprovider的编写、网络层的构建,到最后的训练和预测。对这个流程熟悉以后,大家就可以用自己的数据,定义自己的网络模型,并完成自己的训练和预测任务了。</p><div class="md-section-divider"></div><h2 data-anchor-id="1zy4" id="参考文献">参考文献</h2><ol data-anchor-id="0fud">
<li>LeCun, Yann, Léon Bottou, Yoshua Bengio, and Patrick Haffner. <a href="http://ieeexplore.ieee.org/abstract/document/726791/" target="_blank">"Gradient-based learning applied to document recognition."</a> Proceedings of the IEEE 86, no. 11 (1998): 2278-2324.</li>
<li>Wejéus, Samuel. <a href="http://www.diva-portal.org/smash/record.jsf?pid=diva2%3A753279&amp;dswid=-434" target="_blank">"A Neural Network Approach to Arbitrary SymbolRecognition on Modern Smartphones."</a> (2014).</li>
<li>Decoste, Dennis, and Bernhard Schölkopf. <a href="http://link.springer.com/article/10.1023/A:1012454411458" target="_blank">"Training invariant support vector machines."</a> Machine learning 46, no. 1-3 (2002): 161-190.</li>
<li>Simard, Patrice Y., David Steinkraus, and John C. Platt. <a href="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.160.8494&amp;rep=rep1&amp;type=pdf" target="_blank">"Best Practices for Convolutional Neural Networks Applied to Visual Document Analysis."</a> In ICDAR, vol. 3, pp. 958-962. 2003.</li>
<li>Salakhutdinov, Ruslan, and Geoffrey E. Hinton. <a href="http://www.jmlr.org/proceedings/papers/v2/salakhutdinov07a/salakhutdinov07a.pdf" target="_blank">"Learning a Nonlinear Embedding by Preserving Class Neighbourhood Structure."</a> In AISTATS, vol. 11. 2007.</li>
<li>Cireşan, Dan Claudiu, Ueli Meier, Luca Maria Gambardella, and Jürgen Schmidhuber. <a href="http://www.mitpressjournals.org/doi/abs/10.1162/NECO_a_00052" target="_blank">"Deep, big, simple neural nets for handwritten digit recognition."</a> Neural computation 22, no. 12 (2010): 3207-3220.</li>
<li>Deng, Li, Michael L. Seltzer, Dong Yu, Alex Acero, Abdel-rahman Mohamed, and Geoffrey E. Hinton. <a href="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.185.1908&amp;rep=rep1&amp;type=pdf" target="_blank">"Binary coding of speech spectrograms using a deep auto-encoder."</a> In Interspeech, pp. 1692-1695. 2010.</li>
<li>Kégl, Balázs, and Róbert Busa-Fekete. <a href="http://dl.acm.org/citation.cfm?id=1553439" target="_blank">"Boosting products of base classifiers."</a> In Proceedings of the 26th Annual International Conference on Machine Learning, pp. 497-504. ACM, 2009.</li>
<li>Rosenblatt, Frank. <a href="http://psycnet.apa.org/journals/rev/65/6/386/" target="_blank">"The perceptron: A probabilistic model for information storage and organization in the brain."</a> Psychological review 65, no. 6 (1958): 386.</li>
<li>Bishop, Christopher M. <a href="http://s3.amazonaws.com/academia.edu.documents/30428242/bg0137.pdf?AWSAccessKeyId=AKIAJ56TQJRTWSMTNPEA&amp;Expires=1484816640&amp;Signature=85Ad6%2Fca8T82pmHzxaSXermovIA%3D&amp;response-content-disposition=inline%3B%20filename%3DPattern_recognition_and_machine_learning.pdf" target="_blank">"Pattern recognition."</a> Machine Learning 128 (2006): 1-58.</li>
</ol><p data-anchor-id="ohhf"><br> <br>
<img src="https://i.creativecommons.org/l/by-nc-sa/4.0/88x31.png" alt="知识共享许可协议"></p><p data-anchor-id="qbw8">本教程由<a href="http://book.paddlepaddle.org" target="_blank">PaddlePaddle</a>创作,采用<a href="http://creativecommons.org/licenses/by-nc-sa/4.0/" target="_blank">知识共享 署名-非商业性使用-相同方式共享 4.0 国际 许可协议</a>进行许可。</p></div>
</body>
</html>
......@@ -484,6 +484,7 @@ marked.setOptions({
code = code.replace(/&amp;/g, "&")
code = code.replace(/&gt;/g, ">")
code = code.replace(/&lt;/g, "<")
code = code.replace(/&nbsp;/g, " ")
return hljs.highlightAuto(code, [lang]).value;
}
});
......
<!DOCTYPE html>
<html class="theme theme-white">
<head>
<meta charset="utf-8">
<title>个性化推荐</title>
<link href="https://www.zybuluo.com/static/assets/template-theme-white.css" rel="stylesheet" media="screen">
<style type="text/css">
#wmd-preview h1 {
color: #0077bb; /* 将标题改为蓝色 */
}</style>
</head>
<body class="theme theme-white">
<div style="visibility: hidden; overflow: hidden; position: absolute; top: 0px; height: 1px; width: auto; padding: 0px; border: 0px; margin: 0px; text-align: left; text-indent: 0px; text-transform: none; line-height: normal; letter-spacing: normal; word-spacing: normal;"><div id="MathJax_SVG_Hidden"></div><svg><defs id="MathJax_SVG_glyphs"><path id="MJMATHI-6B" stroke-width="1" d="M121 647Q121 657 125 670T137 683Q138 683 209 688T282 694Q294 694 294 686Q294 679 244 477Q194 279 194 272Q213 282 223 291Q247 309 292 354T362 415Q402 442 438 442Q468 442 485 423T503 369Q503 344 496 327T477 302T456 291T438 288Q418 288 406 299T394 328Q394 353 410 369T442 390L458 393Q446 405 434 405H430Q398 402 367 380T294 316T228 255Q230 254 243 252T267 246T293 238T320 224T342 206T359 180T365 147Q365 130 360 106T354 66Q354 26 381 26Q429 26 459 145Q461 153 479 153H483Q499 153 499 144Q499 139 496 130Q455 -11 378 -11Q333 -11 305 15T277 90Q277 108 280 121T283 145Q283 167 269 183T234 206T200 217T182 220H180Q168 178 159 139T145 81T136 44T129 20T122 7T111 -2Q98 -11 83 -11Q66 -11 57 -1T48 16Q48 26 85 176T158 471L195 616Q196 629 188 632T149 637H144Q134 637 131 637T124 640T121 647Z"></path><path id="MJMATHI-55" stroke-width="1" d="M107 637Q73 637 71 641Q70 643 70 649Q70 673 81 682Q83 683 98 683Q139 681 234 681Q268 681 297 681T342 682T362 682Q378 682 378 672Q378 670 376 658Q371 641 366 638H364Q362 638 359 638T352 638T343 637T334 637Q295 636 284 634T266 623Q265 621 238 518T184 302T154 169Q152 155 152 140Q152 86 183 55T269 24Q336 24 403 69T501 205L552 406Q599 598 599 606Q599 633 535 637Q511 637 511 648Q511 650 513 660Q517 676 519 679T529 683Q532 683 561 682T645 680Q696 680 723 681T752 682Q767 682 767 672Q767 650 759 642Q756 637 737 637Q666 633 648 597Q646 592 598 404Q557 235 548 205Q515 105 433 42T263 -22Q171 -22 116 34T60 167V183Q60 201 115 421Q164 622 164 628Q164 635 107 637Z"></path><path id="MJMATHI-3C9" stroke-width="1" d="M495 384Q495 406 514 424T555 443Q574 443 589 425T604 364Q604 334 592 278T555 155T483 38T377 -11Q297 -11 267 66Q266 68 260 61Q201 -11 125 -11Q15 -11 15 139Q15 230 56 325T123 434Q135 441 147 436Q160 429 160 418Q160 406 140 379T94 306T62 208Q61 202 61 187Q61 124 85 100T143 76Q201 76 245 129L253 137V156Q258 297 317 297Q348 297 348 261Q348 243 338 213T318 158L308 135Q309 133 310 129T318 115T334 97T358 83T393 76Q456 76 501 148T546 274Q546 305 533 325T508 357T495 384Z"></path><path id="MJMATHI-69" stroke-width="1" d="M184 600Q184 624 203 642T247 661Q265 661 277 649T290 619Q290 596 270 577T226 557Q211 557 198 567T184 600ZM21 287Q21 295 30 318T54 369T98 420T158 442Q197 442 223 419T250 357Q250 340 236 301T196 196T154 83Q149 61 149 51Q149 26 166 26Q175 26 185 29T208 43T235 78T260 137Q263 149 265 151T282 153Q302 153 302 143Q302 135 293 112T268 61T223 11T161 -11Q129 -11 102 10T74 74Q74 91 79 106T122 220Q160 321 166 341T173 380Q173 404 156 404H154Q124 404 99 371T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Z"></path><path id="MJMATHI-50" stroke-width="1" d="M287 628Q287 635 230 637Q206 637 199 638T192 648Q192 649 194 659Q200 679 203 681T397 683Q587 682 600 680Q664 669 707 631T751 530Q751 453 685 389Q616 321 507 303Q500 302 402 301H307L277 182Q247 66 247 59Q247 55 248 54T255 50T272 48T305 46H336Q342 37 342 35Q342 19 335 5Q330 0 319 0Q316 0 282 1T182 2Q120 2 87 2T51 1Q33 1 33 11Q33 13 36 25Q40 41 44 43T67 46Q94 46 127 49Q141 52 146 61Q149 65 218 339T287 628ZM645 554Q645 567 643 575T634 597T609 619T560 635Q553 636 480 637Q463 637 445 637T416 636T404 636Q391 635 386 627Q384 621 367 550T332 412T314 344Q314 342 395 342H407H430Q542 342 590 392Q617 419 631 471T645 554Z"></path><path id="MJMAIN-28" stroke-width="1" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path id="MJMAIN-3D" stroke-width="1" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path id="MJMAIN-7C" stroke-width="1" d="M139 -249H137Q125 -249 119 -235V251L120 737Q130 750 139 750Q152 750 159 735V-235Q151 -249 141 -249H139Z"></path><path id="MJMATHI-75" stroke-width="1" d="M21 287Q21 295 30 318T55 370T99 420T158 442Q204 442 227 417T250 358Q250 340 216 246T182 105Q182 62 196 45T238 27T291 44T328 78L339 95Q341 99 377 247Q407 367 413 387T427 416Q444 431 463 431Q480 431 488 421T496 402L420 84Q419 79 419 68Q419 43 426 35T447 26Q469 29 482 57T512 145Q514 153 532 153Q551 153 551 144Q550 139 549 130T540 98T523 55T498 17T462 -8Q454 -10 438 -10Q372 -10 347 46Q345 45 336 36T318 21T296 6T267 -6T233 -11Q189 -11 155 7Q103 38 103 113Q103 170 138 262T173 379Q173 380 173 381Q173 390 173 393T169 400T158 404H154Q131 404 112 385T82 344T65 302T57 280Q55 278 41 278H27Q21 284 21 287Z"></path><path id="MJMAIN-29" stroke-width="1" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path><path id="MJMATHI-65" stroke-width="1" d="M39 168Q39 225 58 272T107 350T174 402T244 433T307 442H310Q355 442 388 420T421 355Q421 265 310 237Q261 224 176 223Q139 223 138 221Q138 219 132 186T125 128Q125 81 146 54T209 26T302 45T394 111Q403 121 406 121Q410 121 419 112T429 98T420 82T390 55T344 24T281 -1T205 -11Q126 -11 83 42T39 168ZM373 353Q367 405 305 405Q272 405 244 391T199 357T170 316T154 280T149 261Q149 260 169 260Q282 260 327 284T373 353Z"></path><path id="MJMATHI-76" stroke-width="1" d="M173 380Q173 405 154 405Q130 405 104 376T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Q21 294 29 316T53 368T97 419T160 441Q202 441 225 417T249 361Q249 344 246 335Q246 329 231 291T200 202T182 113Q182 86 187 69Q200 26 250 26Q287 26 319 60T369 139T398 222T409 277Q409 300 401 317T383 343T365 361T357 383Q357 405 376 424T417 443Q436 443 451 425T467 367Q467 340 455 284T418 159T347 40T241 -11Q177 -11 139 22Q102 54 102 117Q102 148 110 181T151 298Q173 362 173 380Z"></path><path id="MJSZ1-2211" stroke-width="1" d="M61 748Q64 750 489 750H913L954 640Q965 609 976 579T993 533T999 516H979L959 517Q936 579 886 621T777 682Q724 700 655 705T436 710H319Q183 710 183 709Q186 706 348 484T511 259Q517 250 513 244L490 216Q466 188 420 134T330 27L149 -187Q149 -188 362 -188Q388 -188 436 -188T506 -189Q679 -189 778 -162T936 -43Q946 -27 959 6H999L913 -249L489 -250Q65 -250 62 -248Q56 -246 56 -239Q56 -234 118 -161Q186 -81 245 -11L428 206Q428 207 242 462L57 717L56 728Q56 744 61 748Z"></path><path id="MJMATHI-6A" stroke-width="1" d="M297 596Q297 627 318 644T361 661Q378 661 389 651T403 623Q403 595 384 576T340 557Q322 557 310 567T297 596ZM288 376Q288 405 262 405Q240 405 220 393T185 362T161 325T144 293L137 279Q135 278 121 278H107Q101 284 101 286T105 299Q126 348 164 391T252 441Q253 441 260 441T272 442Q296 441 316 432Q341 418 354 401T367 348V332L318 133Q267 -67 264 -75Q246 -125 194 -164T75 -204Q25 -204 7 -183T-12 -137Q-12 -110 7 -91T53 -71Q70 -71 82 -81T95 -112Q95 -148 63 -167Q69 -168 77 -168Q111 -168 139 -140T182 -74L193 -32Q204 11 219 72T251 197T278 308T289 365Q289 372 288 376Z"></path><path id="MJMAIN-2208" stroke-width="1" d="M84 250Q84 372 166 450T360 539Q361 539 377 539T419 540T469 540H568Q583 532 583 520Q583 511 570 501L466 500Q355 499 329 494Q280 482 242 458T183 409T147 354T129 306T124 272V270H568Q583 262 583 250T568 230H124V228Q124 207 134 177T167 112T231 48T328 7Q355 1 466 0H570Q583 -10 583 -20Q583 -32 568 -40H471Q464 -40 446 -40T417 -41Q262 -41 172 45Q84 127 84 250Z"></path><path id="MJMATHI-56" stroke-width="1" d="M52 648Q52 670 65 683H76Q118 680 181 680Q299 680 320 683H330Q336 677 336 674T334 656Q329 641 325 637H304Q282 635 274 635Q245 630 242 620Q242 618 271 369T301 118L374 235Q447 352 520 471T595 594Q599 601 599 609Q599 633 555 637Q537 637 537 648Q537 649 539 661Q542 675 545 679T558 683Q560 683 570 683T604 682T668 681Q737 681 755 683H762Q769 676 769 672Q769 655 760 640Q757 637 743 637Q730 636 719 635T698 630T682 623T670 615T660 608T652 599T645 592L452 282Q272 -9 266 -16Q263 -18 259 -21L241 -22H234Q216 -22 216 -15Q213 -9 177 305Q139 623 138 626Q133 637 76 637H59Q52 642 52 648Z"></path></defs></svg></div><div id="wmd-preview" class="wmd-preview wmd-preview-full-reader"><div class="md-section-divider"></div><div class="md-section-divider"></div><h1 data-anchor-id="2hyd" id="个性化推荐">个性化推荐</h1><div class="md-section-divider"></div><h2 data-anchor-id="k5t4" id="背景介绍">背景介绍</h2><p data-anchor-id="6e0a">在网络技术不断发展和电子商务规模不断扩大的背景下,商品数量和种类快速增长,用户需要花费大量时间才能找到自己想买的商品,这就是信息超载问题。为了解决这个难题,推荐系统(Recommender System)应运而生。</p><p data-anchor-id="3wu6">个性化推荐系统是信息过滤系统(Information Filtering System)的子集,它可以用在很多领域,如电影、音乐、电商和 Feed 流推荐等。推荐系统通过分析、挖掘用户行为,发现用户的个性化需求与兴趣特点,将用户可能感兴趣的信息或商品推荐给用户。与搜索引擎不同,推荐系统不需要用户准确地描述出自己的需求,而是根据分析历史行为建模,主动提供满足用户兴趣和需求的信息。</p><p data-anchor-id="npdl">传统的推荐系统方法主要有:</p><ul data-anchor-id="1obm">
<li>协同过滤推荐(Collaborative Filtering Recommendation):该方法收集分析用户历史行为、活动、偏好,计算一个用户与其他用户的相似度,利用目标用户的相似用户对商品评价的加权评价值,来预测目标用户对特定商品的喜好程度。优点是可以给用户推荐未浏览过的新产品;缺点是对于没有任何行为的新用户存在冷启动的问题,同时也存在用户与商品之间的交互数据不够多造成的稀疏问题,会导致模型难以找到相近用户。</li>
<li>基于内容过滤推荐[<a href="#参考文献">1</a>](Content-based Filtering Recommendation):该方法利用商品的内容描述,抽象出有意义的特征,通过计算用户的兴趣和商品描述之间的相似度,来给用户做推荐。优点是简单直接,不需要依据其他用户对商品的评价,而是通过商品属性进行商品相似度度量,从而推荐给用户所感兴趣商品的相似商品;缺点是对于没有任何行为的新用户同样存在冷启动的问题。</li>
<li>组合推荐[<a href="#参考文献">2</a>](Hybrid Recommendation):运用不同的输入和技术共同进行推荐,以弥补各自推荐技术的缺点。</li>
</ul><p data-anchor-id="o51b">其中协同过滤是应用最广泛的技术之一,它又可以分为多个子类:基于用户 (User-Based)的推荐[<a href="#参考文献">3</a>] 、基于物品(Item-Based)的推荐[<a href="#参考文献">4</a>]、基于社交网络关系(Social-Based)的推荐[<a href="#参考文献">5</a>]、基于模型(Model-based)的推荐等。1994年明尼苏达大学推出的GroupLens系统[<a href="#参考文献">3</a>]一般被认为是推荐系统成为一个相对独立的研究方向的标志。该系统首次提出了基于协同过滤来完成推荐任务的思想,此后,基于该模型的协同过滤推荐引领了推荐系统十几年的发展方向。</p><p data-anchor-id="n78e">深度学习具有优秀的自动提取特征的能力,能够学习多层次的抽象特征表示,并对异质或跨域的内容信息进行学习,可以一定程度上处理推荐系统冷启动问题[<a href="#参考文献">6</a>]。本教程主要介绍个性化推荐的深度学习模型,以及如何使用PaddlePaddle实现模型。</p><div class="md-section-divider"></div><h2 data-anchor-id="z9ab" id="效果展示">效果展示</h2><p data-anchor-id="077s">我们使用包含用户信息、电影信息与电影评分的数据集作为个性化推荐的应用场景。当我们训练好模型后,只需要输入对应的用户ID和电影ID,就可以得出一个匹配的分数(范围[1,5],分数越高视为兴趣越大),然后根据所有电影的推荐得分排序,推荐给用户可能感兴趣的电影。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="nuqj"><ol class="linenums"><li class="L0"><code><span class="typ">Input</span><span class="pln"> movie_id</span><span class="pun">:</span><span class="pln"> </span><span class="lit">1962</span></code></li><li class="L1"><code><span class="typ">Input</span><span class="pln"> user_id</span><span class="pun">:</span><span class="pln"> </span><span class="lit">1</span></code></li><li class="L2"><code><span class="typ">Prediction</span><span class="pln"> </span><span class="typ">Score</span><span class="pln"> </span><span class="kwd">is</span><span class="pln"> </span><span class="lit">4.25</span></code></li></ol></pre><div class="md-section-divider"></div><div class="md-section-divider"></div><p align="center" data-anchor-id="kpkf">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/recommender_system/image/YouTube_Overview.png" width="700"><br>
图1. YouTube 推荐系统结构
</p><div class="md-section-divider"></div><h4 data-anchor-id="fvxx" id="候选生成网络candidate-generation-network">候选生成网络(Candidate Generation Network)</h4><p data-anchor-id="telc">候选生成网络将推荐问题建模为一个类别数极大的多类分类问题:对于一个Youtube用户,使用其观看历史(视频ID)、搜索词记录(search tokens)、人口学信息(如地理位置、用户登录设备)、二值特征(如性别,是否登录)和连续特征(如用户年龄)等,对视频库中所有视频进行多分类,得到每一类别的分类结果(即每一个视频的推荐概率),最终输出概率较高的几百个视频。</p><p data-anchor-id="78ec">首先,将观看历史及搜索词记录这类历史信息,映射为向量后取平均值得到定长表示;同时,输入人口学特征以优化新用户的推荐效果,并将二值特征和连续特征归一化处理到[0, 1]范围。接下来,将所有特征表示拼接为一个向量,并输入给非线形多层感知器(MLP,详见<a href="https://github.com/PaddlePaddle/book/blob/develop/recognize_digits/README.md" target="_blank">识别数字</a>教程)处理。最后,训练时将MLP的输出给softmax做分类,预测时计算用户的综合特征(MLP的输出)与所有视频的相似度,取得分最高的<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-1-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 521.5 747.103370696049" style="width: 1.158ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6B"></use></g></svg></span><script type="math/tex" id="MathJax-Element-1">k</script>个作为候选生成网络的筛选结果。图2显示了候选生成网络结构。</p><p align="center" data-anchor-id="ocmu">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/recommender_system/image/Deep_candidate_generation_model_architecture.png" width="700"><br>
图2. 候选生成网络结构
</p><p data-anchor-id="m58q">对于一个用户<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-2-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 767.5 747.103370696049" style="width: 1.737ex; height: 1.737ex; vertical-align: -0.232ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-55"></use></g></svg></span><script type="math/tex" id="MathJax-Element-2">U</script>,预测此刻用户要观看的视频<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-3-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 622.5 497.10337069604896" style="width: 1.39ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3C9"></use></g></svg></span><script type="math/tex" id="MathJax-Element-3">\omega</script>为视频<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-4-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -682.0516853480245 345.5 714.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use></g></svg></span><script type="math/tex" id="MathJax-Element-4">i</script>的概率公式为:</p><div class="md-section-divider"></div><p data-anchor-id="ft5n"><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-5-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -1373.4070961948516 10600.151422196319 2540.448809973245" style="width: 24.672ex; height: 5.907ex; vertical-align: -2.78ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-50"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="751" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3C9" x="1141" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="2041" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="3097" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C" x="3443" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-75" x="3721" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="4294" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="4961" y="0"></use><g transform="translate(6137,0)"><rect stroke="none" width="4342" height="60" x="0" y="220"></rect><g transform="translate(1392,676)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65"></use><g transform="translate(466,362)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-76"></use><use transform="scale(0.5000000000000001)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="686" y="-213"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-75" x="829" y="0"></use></g></g><g transform="translate(60,-716)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ1-2211"></use><g transform="translate(1056,-287)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6A"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2208" x="412" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-56" x="1080" y="0"></use></g><g transform="translate(2630,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65"></use><g transform="translate(466,316)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-76"></use><use transform="scale(0.5000000000000001)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6A" x="686" y="-213"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-75" x="877" y="0"></use></g></g></g></g></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-5">P(\omega=i|u)=\frac{e^{v_{i}u}}{\sum_{j \in V}e^{v_{j}u}}</script></p><p data-anchor-id="cyym">其中<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-6-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -463.0516853480245 572.5 495.10337069604896" style="width: 1.274ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-75"></use></g></svg></span><script type="math/tex" id="MathJax-Element-6">u</script>为用户<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-7-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 767.5 747.103370696049" style="width: 1.737ex; height: 1.737ex; vertical-align: -0.232ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-55"></use></g></svg></span><script type="math/tex" id="MathJax-Element-7">U</script>的特征表示,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-8-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 769.5 747.103370696049" style="width: 1.737ex; height: 1.737ex; vertical-align: -0.232ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-56"></use></g></svg></span><script type="math/tex" id="MathJax-Element-8">V</script>为视频库集合,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-9-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 829.8053928999522 642.5886520702876" style="width: 1.969ex; height: 1.506ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-76"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="686" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-9">v_i</script>为视频库中第<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-10-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -682.0516853480245 345.5 714.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use></g></svg></span><script type="math/tex" id="MathJax-Element-10">i</script>个视频的特征表示。<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-11-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -463.0516853480245 572.5 495.10337069604896" style="width: 1.274ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-75"></use></g></svg></span><script type="math/tex" id="MathJax-Element-11">u</script><span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-12-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 829.8053928999522 642.5886520702876" style="width: 1.969ex; height: 1.506ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-76"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="686" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-12">v_i</script>为长度相等的向量,两者点积可以通过全连接层实现。</p><p data-anchor-id="o2y1">考虑到softmax分类的类别数非常多,为了保证一定的计算效率:1)训练阶段,使用负样本类别采样将实际计算的类别数缩小至数千;2)推荐(预测)阶段,忽略softmax的归一化计算(不影响结果),将类别打分问题简化为点积(dot product)空间中的最近邻(nearest neighbor)搜索问题,取与<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-13-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -463.0516853480245 572.5 495.10337069604896" style="width: 1.274ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-75"></use></g></svg></span><script type="math/tex" id="MathJax-Element-13">u</script>最近的<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-14-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 521.5 747.103370696049" style="width: 1.158ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6B"></use></g></svg></span><script type="math/tex" id="MathJax-Element-14">k</script>个视频作为生成的候选。</p><div class="md-section-divider"></div><h4 data-anchor-id="d1va" id="排序网络ranking-network">排序网络(Ranking Network)</h4><p data-anchor-id="veyo">排序网络的结构类似于候选生成网络,但是它的目标是对候选进行更细致的打分排序。和传统广告排序中的特征抽取方法类似,这里也构造了大量的用于视频排序的相关特征(如视频 ID、上次观看时间等)。这些特征的处理方式和候选生成网络类似,不同之处是排序网络的顶部是一个加权逻辑回归(weighted logistic regression),它对所有候选视频进行打分,从高到底排序后将分数较高的一些视频返回给用户。</p><div class="md-section-divider"></div><h3 data-anchor-id="oa0y" id="融合推荐模型">融合推荐模型</h3><p data-anchor-id="2h2q">在下文的电影推荐系统中:</p><ol data-anchor-id="bmth">
<li><p>首先,使用用户特征和电影特征作为神经网络的输入,其中:</p>
<ul><li><p>用户特征融合了四个属性信息,分别是用户ID、性别、职业和年龄。</p></li>
<li><p>电影特征融合了三个属性信息,分别是电影ID、电影类型ID和电影名称。</p></li></ul></li>
<li><p>对用户特征,将用户ID映射为维度大小为256的向量表示,输入全连接层,并对其他三个属性也做类似的处理。然后将四个属性的特征表示分别全连接并相加。</p></li>
<li><p>对电影特征,将电影ID以类似用户ID的方式进行处理,电影类型ID以向量的形式直接输入全连接层,电影名称用文本卷积神经网络(详见<a href="https://github.com/PaddlePaddle/book/blob/develop/understand_sentiment/README.md" target="_blank">第5章</a>)得到其定长向量表示。然后将三个属性的特征表示分别全连接并相加。</p></li>
<li><p>得到用户和电影的向量表示后,计算二者的余弦相似度作为推荐系统的打分。最后,用该相似度打分和用户真实打分的差异的平方作为该回归模型的损失函数。</p></li>
</ol><p align="center" data-anchor-id="frqc">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/recommender_system/image/rec_regression_network.png" width="700"><br>
图3. 融合推荐模型
</p><div class="md-section-divider"></div><h2 data-anchor-id="qce9" id="数据准备">数据准备</h2><div class="md-section-divider"></div><h3 data-anchor-id="ubvc" id="数据介绍与下载">数据介绍与下载</h3><p data-anchor-id="f1od">我们以 <a href="http://files.grouplens.org/datasets/movielens/ml-1m.zip" target="_blank">MovieLens 百万数据集(ml-1m)</a>为例进行介绍。ml-1m 数据集包含了 6,000 位用户对 4,000 部电影的 1,000,000 条评价(评分范围 1~5 分,均为整数),由 GroupLens Research 实验室搜集整理。</p><p data-anchor-id="t7tr">您可以运行 <code>data/getdata.sh</code> 下载数据,如果数椐获取成功,您将在目录<code>data/ml-1m</code>中看到下面的文件:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="gyqy"><ol class="linenums"><li class="L0"><code><span class="pln">movies</span><span class="pun">.</span><span class="pln">dat ratings</span><span class="pun">.</span><span class="pln">dat users</span><span class="pun">.</span><span class="pln">dat README </span></code></li></ol></pre><ul data-anchor-id="n72h">
<li>movies.dat:电影特征数据,格式为<code>电影ID::电影名称::电影类型</code></li>
<li>ratings.dat:评分数据,格式为<code>用户ID::电影ID::评分::时间戳</code></li>
<li>users.dat:用户特征数据,格式为<code>用户ID::性别::年龄::职业::邮编</code></li>
<li>README:数据集的详细描述</li>
</ul><div class="md-section-divider"></div><h3 data-anchor-id="lnbp" id="数据预处理">数据预处理</h3><p data-anchor-id="nn2k">首先安装 Python 第三方库(推荐使用 Virtualenv):</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="1pnx"><ol class="linenums"><li class="L0"><code class="language-shell"><span class="pln">pip install </span><span class="pun">-</span><span class="pln">r data</span><span class="pun">/</span><span class="pln">requirements</span><span class="pun">.</span><span class="pln">txt</span></code></li></ol></pre><p data-anchor-id="qyxa">其次在预处理<code>./preprocess.sh</code>过程中,我们将字段配置文件<code>data/config.json</code>转化为meta配置文件<code>meta_config.json</code>,并生成对应的meta文件<code>meta.bin</code>,以完成数据文件的序列化。然后再将<code>ratings.dat</code>分为训练集、测试集两部分,把它们的地址写入<code>train.list</code><code>test.list</code></p><p data-anchor-id="nom2">运行成功后目录<code>./data</code> 新增以下文件:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="77p2"><ol class="linenums"><li class="L0"><code><span class="pln">meta_config</span><span class="pun">.</span><span class="pln">json meta</span><span class="pun">.</span><span class="pln">bin ratings</span><span class="pun">.</span><span class="pln">dat</span><span class="pun">.</span><span class="pln">train ratings</span><span class="pun">.</span><span class="pln">dat</span><span class="pun">.</span><span class="pln">test train</span><span class="pun">.</span><span class="pln">list test</span><span class="pun">.</span><span class="pln">list</span></code></li></ol></pre><ul data-anchor-id="3j2u">
<li>meta.bin: meta文件是Python的pickle对象, 存储着电影和用户信息。</li>
<li>meta_config.json: meta配置文件,用来具体描述如何解析数据集中的每一个字段,由字段配置文件生成。</li>
<li>ratings.dat.train和ratings.dat.test: 训练集和测试集,训练集已经随机打乱。</li>
<li>train.list和test.list: 训练集和测试集的文件地址列表。</li>
</ul><div class="md-section-divider"></div><h3 data-anchor-id="fcqm" id="提供数据给-paddlepaddle">提供数据给 PaddlePaddle</h3><p data-anchor-id="50df">我们使用 Python 接口传递数据给系统,下面 <code>dataprovider.py</code> 给出了完整示例。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="qzcv"><ol class="linenums"><li class="L0"><code class="language-python"><span class="kwd">from</span><span class="pln"> paddle</span><span class="pun">.</span><span class="pln">trainer</span><span class="pun">.</span><span class="typ">PyDataProvider2</span><span class="pln"> </span><span class="kwd">import</span><span class="pln"> </span><span class="pun">*</span></code></li><li class="L1"><code class="language-python"><span class="kwd">from</span><span class="pln"> common_utils </span><span class="kwd">import</span><span class="pln"> meta_to_header</span></code></li><li class="L2"><code class="language-python"></code></li><li class="L3"><code class="language-python"><span class="kwd">def</span><span class="pln"> __list_to_map__</span><span class="pun">(</span><span class="pln">lst</span><span class="pun">):</span><span class="pln"> </span><span class="com"># 将list转为map</span></code></li><li class="L4"><code class="language-python"><span class="pln"> ret_val </span><span class="pun">=</span><span class="pln"> dict</span><span class="pun">()</span></code></li><li class="L5"><code class="language-python"><span class="pln"> </span><span class="kwd">for</span><span class="pln"> each </span><span class="kwd">in</span><span class="pln"> lst</span><span class="pun">:</span></code></li><li class="L6"><code class="language-python"><span class="pln"> k</span><span class="pun">,</span><span class="pln"> v </span><span class="pun">=</span><span class="pln"> each</span></code></li><li class="L7"><code class="language-python"><span class="pln"> ret_val</span><span class="pun">[</span><span class="pln">k</span><span class="pun">]</span><span class="pln"> </span><span class="pun">=</span><span class="pln"> v</span></code></li><li class="L8"><code class="language-python"><span class="pln"> </span><span class="kwd">return</span><span class="pln"> ret_val</span></code></li><li class="L9"><code class="language-python"></code></li><li class="L0"><code class="language-python"><span class="kwd">def</span><span class="pln"> hook</span><span class="pun">(</span><span class="pln">settings</span><span class="pun">,</span><span class="pln"> meta</span><span class="pun">,</span><span class="pln"> </span><span class="pun">**</span><span class="pln">kwargs</span><span class="pun">):</span><span class="pln"> </span><span class="com"># 读取meta.bin</span></code></li><li class="L1"><code class="language-python"><span class="pln"> </span><span class="com"># 定义电影特征</span></code></li><li class="L2"><code class="language-python"><span class="pln"> movie_headers </span><span class="pun">=</span><span class="pln"> list</span><span class="pun">(</span><span class="pln">meta_to_header</span><span class="pun">(</span><span class="pln">meta</span><span class="pun">,</span><span class="pln"> </span><span class="str">'movie'</span><span class="pun">))</span></code></li><li class="L3"><code class="language-python"><span class="pln"> settings</span><span class="pun">.</span><span class="pln">movie_names </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[</span><span class="pln">h</span><span class="pun">[</span><span class="lit">0</span><span class="pun">]</span><span class="pln"> </span><span class="kwd">for</span><span class="pln"> h </span><span class="kwd">in</span><span class="pln"> movie_headers</span><span class="pun">]</span></code></li><li class="L4"><code class="language-python"><span class="pln"> headers </span><span class="pun">=</span><span class="pln"> movie_headers</span></code></li><li class="L5"><code class="language-python"></code></li><li class="L6"><code class="language-python"><span class="pln"> </span><span class="com"># 定义用户特征</span></code></li><li class="L7"><code class="language-python"><span class="pln"> user_headers </span><span class="pun">=</span><span class="pln"> list</span><span class="pun">(</span><span class="pln">meta_to_header</span><span class="pun">(</span><span class="pln">meta</span><span class="pun">,</span><span class="pln"> </span><span class="str">'user'</span><span class="pun">))</span></code></li><li class="L8"><code class="language-python"><span class="pln"> settings</span><span class="pun">.</span><span class="pln">user_names </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[</span><span class="pln">h</span><span class="pun">[</span><span class="lit">0</span><span class="pun">]</span><span class="pln"> </span><span class="kwd">for</span><span class="pln"> h </span><span class="kwd">in</span><span class="pln"> user_headers</span><span class="pun">]</span></code></li><li class="L9"><code class="language-python"><span class="pln"> headers</span><span class="pun">.</span><span class="pln">extend</span><span class="pun">(</span><span class="pln">user_headers</span><span class="pun">)</span></code></li><li class="L0"><code class="language-python"></code></li><li class="L1"><code class="language-python"><span class="pln"> </span><span class="com"># 加载评分信息</span></code></li><li class="L2"><code class="language-python"><span class="pln"> headers</span><span class="pun">.</span><span class="pln">append</span><span class="pun">((</span><span class="str">"rating"</span><span class="pun">,</span><span class="pln"> dense_vector</span><span class="pun">(</span><span class="lit">1</span><span class="pun">)))</span></code></li><li class="L3"><code class="language-python"></code></li><li class="L4"><code class="language-python"><span class="pln"> settings</span><span class="pun">.</span><span class="pln">input_types </span><span class="pun">=</span><span class="pln"> __list_to_map__</span><span class="pun">(</span><span class="pln">headers</span><span class="pun">)</span></code></li><li class="L5"><code class="language-python"><span class="pln"> settings</span><span class="pun">.</span><span class="pln">meta </span><span class="pun">=</span><span class="pln"> meta</span></code></li><li class="L6"><code class="language-python"></code></li><li class="L7"><code class="language-python"><span class="lit">@provider</span><span class="pun">(</span><span class="pln">init_hook</span><span class="pun">=</span><span class="pln">hook</span><span class="pun">,</span><span class="pln"> cache</span><span class="pun">=</span><span class="typ">CacheType</span><span class="pun">.</span><span class="pln">CACHE_PASS_IN_MEM</span><span class="pun">)</span></code></li><li class="L8"><code class="language-python"><span class="kwd">def</span><span class="pln"> process</span><span class="pun">(</span><span class="pln">settings</span><span class="pun">,</span><span class="pln"> filename</span><span class="pun">):</span></code></li><li class="L9"><code class="language-python"><span class="pln"> </span><span class="kwd">with</span><span class="pln"> open</span><span class="pun">(</span><span class="pln">filename</span><span class="pun">,</span><span class="pln"> </span><span class="str">'r'</span><span class="pun">)</span><span class="pln"> </span><span class="kwd">as</span><span class="pln"> f</span><span class="pun">:</span></code></li><li class="L0"><code class="language-python"><span class="pln"> </span><span class="kwd">for</span><span class="pln"> line </span><span class="kwd">in</span><span class="pln"> f</span><span class="pun">:</span></code></li><li class="L1"><code class="language-python"><span class="pln"> </span><span class="com"># 从评分文件中读取评分</span></code></li><li class="L2"><code class="language-python"><span class="pln"> user_id</span><span class="pun">,</span><span class="pln"> movie_id</span><span class="pun">,</span><span class="pln"> score </span><span class="pun">=</span><span class="pln"> map</span><span class="pun">(</span><span class="pln">int</span><span class="pun">,</span><span class="pln"> line</span><span class="pun">.</span><span class="pln">split</span><span class="pun">(</span><span class="str">'::'</span><span class="pun">)[:-</span><span class="lit">1</span><span class="pun">])</span></code></li><li class="L3"><code class="language-python"><span class="pln"> </span><span class="com"># 将评分平移到[-2, +2]范围内的整数</span></code></li><li class="L4"><code class="language-python"><span class="pln"> score </span><span class="pun">=</span><span class="pln"> float</span><span class="pun">(</span><span class="pln">score </span><span class="pun">-</span><span class="pln"> </span><span class="lit">3</span><span class="pun">)</span></code></li><li class="L5"><code class="language-python"></code></li><li class="L6"><code class="language-python"><span class="pln"> movie_meta </span><span class="pun">=</span><span class="pln"> settings</span><span class="pun">.</span><span class="pln">meta</span><span class="pun">[</span><span class="str">'movie'</span><span class="pun">][</span><span class="pln">movie_id</span><span class="pun">]</span></code></li><li class="L7"><code class="language-python"><span class="pln"> user_meta </span><span class="pun">=</span><span class="pln"> settings</span><span class="pun">.</span><span class="pln">meta</span><span class="pun">[</span><span class="str">'user'</span><span class="pun">][</span><span class="pln">user_id</span><span class="pun">]</span></code></li><li class="L8"><code class="language-python"></code></li><li class="L9"><code class="language-python"><span class="pln"> </span><span class="com"># 添加电影ID与电影特征</span></code></li><li class="L0"><code class="language-python"><span class="pln"> outputs </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[(</span><span class="str">'movie_id'</span><span class="pun">,</span><span class="pln"> movie_id </span><span class="pun">-</span><span class="pln"> </span><span class="lit">1</span><span class="pun">)]</span></code></li><li class="L1"><code class="language-python"><span class="pln"> </span><span class="kwd">for</span><span class="pln"> i</span><span class="pun">,</span><span class="pln"> each_meta </span><span class="kwd">in</span><span class="pln"> enumerate</span><span class="pun">(</span><span class="pln">movie_meta</span><span class="pun">):</span></code></li><li class="L2"><code class="language-python"><span class="pln"> outputs</span><span class="pun">.</span><span class="pln">append</span><span class="pun">((</span><span class="pln">settings</span><span class="pun">.</span><span class="pln">movie_names</span><span class="pun">[</span><span class="pln">i </span><span class="pun">+</span><span class="pln"> </span><span class="lit">1</span><span class="pun">],</span><span class="pln"> each_meta</span><span class="pun">))</span></code></li><li class="L3"><code class="language-python"></code></li><li class="L4"><code class="language-python"><span class="pln"> </span><span class="com"># 添加用户ID与用户特征</span></code></li><li class="L5"><code class="language-python"><span class="pln"> outputs</span><span class="pun">.</span><span class="pln">append</span><span class="pun">((</span><span class="str">'user_id'</span><span class="pun">,</span><span class="pln"> user_id </span><span class="pun">-</span><span class="pln"> </span><span class="lit">1</span><span class="pun">))</span></code></li><li class="L6"><code class="language-python"><span class="pln"> </span><span class="kwd">for</span><span class="pln"> i</span><span class="pun">,</span><span class="pln"> each_meta </span><span class="kwd">in</span><span class="pln"> enumerate</span><span class="pun">(</span><span class="pln">user_meta</span><span class="pun">):</span></code></li><li class="L7"><code class="language-python"><span class="pln"> outputs</span><span class="pun">.</span><span class="pln">append</span><span class="pun">((</span><span class="pln">settings</span><span class="pun">.</span><span class="pln">user_names</span><span class="pun">[</span><span class="pln">i </span><span class="pun">+</span><span class="pln"> </span><span class="lit">1</span><span class="pun">],</span><span class="pln"> each_meta</span><span class="pun">))</span></code></li><li class="L8"><code class="language-python"></code></li><li class="L9"><code class="language-python"><span class="pln"> </span><span class="com"># 添加评分</span></code></li><li class="L0"><code class="language-python"><span class="pln"> outputs</span><span class="pun">.</span><span class="pln">append</span><span class="pun">((</span><span class="str">'rating'</span><span class="pun">,</span><span class="pln"> </span><span class="pun">[</span><span class="pln">score</span><span class="pun">]))</span></code></li><li class="L1"><code class="language-python"><span class="pln"> </span><span class="com"># 将数据返回给 paddle</span></code></li><li class="L2"><code class="language-python"><span class="pln"> </span><span class="kwd">yield</span><span class="pln"> __list_to_map__</span><span class="pun">(</span><span class="pln">outputs</span><span class="pun">)</span></code></li></ol></pre><div class="md-section-divider"></div><h2 data-anchor-id="kqr2" id="模型配置说明">模型配置说明</h2><div class="md-section-divider"></div><h3 data-anchor-id="lurj" id="数据定义">数据定义</h3><p data-anchor-id="irta">加载<code>meta.bin</code>文件并定义通过<code>define_py_data_sources2</code>从dataprovider中读入数据:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="szqz"><ol class="linenums"><li class="L0"><code class="language-python"><span class="kwd">from</span><span class="pln"> paddle</span><span class="pun">.</span><span class="pln">trainer_config_helpers </span><span class="kwd">import</span><span class="pln"> </span><span class="pun">*</span></code></li><li class="L1"><code class="language-python"></code></li><li class="L2"><code class="language-python"><span class="kwd">try</span><span class="pun">:</span></code></li><li class="L3"><code class="language-python"><span class="pln"> </span><span class="kwd">import</span><span class="pln"> cPickle </span><span class="kwd">as</span><span class="pln"> pickle</span></code></li><li class="L4"><code class="language-python"><span class="kwd">except</span><span class="pln"> </span><span class="typ">ImportError</span><span class="pun">:</span></code></li><li class="L5"><code class="language-python"><span class="pln"> </span><span class="kwd">import</span><span class="pln"> pickle</span></code></li><li class="L6"><code class="language-python"></code></li><li class="L7"><code class="language-python"><span class="pln">is_predict </span><span class="pun">=</span><span class="pln"> get_config_arg</span><span class="pun">(</span><span class="str">'is_predict'</span><span class="pun">,</span><span class="pln"> bool</span><span class="pun">,</span><span class="pln"> </span><span class="kwd">False</span><span class="pun">)</span></code></li><li class="L8"><code class="language-python"></code></li><li class="L9"><code class="language-python"><span class="pln">META_FILE </span><span class="pun">=</span><span class="pln"> </span><span class="str">'data/meta.bin'</span></code></li><li class="L0"><code class="language-python"></code></li><li class="L1"><code class="language-python"><span class="com"># 加载 meta 文件</span></code></li><li class="L2"><code class="language-python"><span class="kwd">with</span><span class="pln"> open</span><span class="pun">(</span><span class="pln">META_FILE</span><span class="pun">,</span><span class="pln"> </span><span class="str">'rb'</span><span class="pun">)</span><span class="pln"> </span><span class="kwd">as</span><span class="pln"> f</span><span class="pun">:</span></code></li><li class="L3"><code class="language-python"><span class="pln"> meta </span><span class="pun">=</span><span class="pln"> pickle</span><span class="pun">.</span><span class="pln">load</span><span class="pun">(</span><span class="pln">f</span><span class="pun">)</span></code></li><li class="L4"><code class="language-python"></code></li><li class="L5"><code class="language-python"><span class="kwd">if</span><span class="pln"> </span><span class="kwd">not</span><span class="pln"> is_predict</span><span class="pun">:</span></code></li><li class="L6"><code class="language-python"><span class="pln"> define_py_data_sources2</span><span class="pun">(</span></code></li><li class="L7"><code class="language-python"><span class="pln"> </span><span class="str">'data/train.list'</span><span class="pun">,</span></code></li><li class="L8"><code class="language-python"><span class="pln"> </span><span class="str">'data/test.list'</span><span class="pun">,</span></code></li><li class="L9"><code class="language-python"><span class="pln"> module</span><span class="pun">=</span><span class="str">'dataprovider'</span><span class="pun">,</span></code></li><li class="L0"><code class="language-python"><span class="pln"> obj</span><span class="pun">=</span><span class="str">'process'</span><span class="pun">,</span></code></li><li class="L1"><code class="language-python"><span class="pln"> args</span><span class="pun">={</span><span class="str">'meta'</span><span class="pun">:</span><span class="pln"> meta</span><span class="pun">})</span></code></li></ol></pre><div class="md-section-divider"></div><h3 data-anchor-id="uzsr" id="算法配置">算法配置</h3><p data-anchor-id="ek1r">这里我们设置了batch size、网络初始学习率和RMSProp自适应优化方法。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="bg79"><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">settings</span><span class="pun">(</span></code></li><li class="L1"><code class="language-python"><span class="pln"> batch_size</span><span class="pun">=</span><span class="lit">1600</span><span class="pun">,</span><span class="pln"> learning_rate</span><span class="pun">=</span><span class="lit">1e-3</span><span class="pun">,</span><span class="pln"> learning_method</span><span class="pun">=</span><span class="typ">RMSPropOptimizer</span><span class="pun">())</span></code></li></ol></pre><div class="md-section-divider"></div><h3 data-anchor-id="km6i" id="模型结构">模型结构</h3><ol data-anchor-id="e135">
<li><p>定义数据输入和参数维度。</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">movie_meta </span><span class="pun">=</span><span class="pln"> meta</span><span class="pun">[</span><span class="str">'movie'</span><span class="pun">][</span><span class="str">'__meta__'</span><span class="pun">][</span><span class="str">'raw_meta'</span><span class="pun">]</span></code></li><li class="L1"><code class="language-python"><span class="pln">user_meta </span><span class="pun">=</span><span class="pln"> meta</span><span class="pun">[</span><span class="str">'user'</span><span class="pun">][</span><span class="str">'__meta__'</span><span class="pun">][</span><span class="str">'raw_meta'</span><span class="pun">]</span></code></li><li class="L2"><code class="language-python"></code></li><li class="L3"><code class="language-python"><span class="pln">movie_id </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="str">'movie_id'</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">movie_meta</span><span class="pun">[</span><span class="lit">0</span><span class="pun">][</span><span class="str">'max'</span><span class="pun">])</span><span class="pln"> </span><span class="com"># 电影ID</span></code></li><li class="L4"><code class="language-python"><span class="pln">title </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="str">'title'</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">len</span><span class="pun">(</span><span class="pln">movie_meta</span><span class="pun">[</span><span class="lit">1</span><span class="pun">][</span><span class="str">'dict'</span><span class="pun">]))</span><span class="pln"> </span><span class="com"># 电影名称</span></code></li><li class="L5"><code class="language-python"><span class="pln">genres </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="str">'genres'</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">len</span><span class="pun">(</span><span class="pln">movie_meta</span><span class="pun">[</span><span class="lit">2</span><span class="pun">][</span><span class="str">'dict'</span><span class="pun">]))</span><span class="pln"> </span><span class="com"># 电影类型</span></code></li><li class="L6"><code class="language-python"><span class="pln">user_id </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="str">'user_id'</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">user_meta</span><span class="pun">[</span><span class="lit">0</span><span class="pun">][</span><span class="str">'max'</span><span class="pun">])</span><span class="pln"> </span><span class="com"># 用户ID</span></code></li><li class="L7"><code class="language-python"><span class="pln">gender </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="str">'gender'</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">len</span><span class="pun">(</span><span class="pln">user_meta</span><span class="pun">[</span><span class="lit">1</span><span class="pun">][</span><span class="str">'dict'</span><span class="pun">]))</span><span class="pln"> </span><span class="com"># 用户性别</span></code></li><li class="L8"><code class="language-python"><span class="pln">age </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="str">'age'</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">len</span><span class="pun">(</span><span class="pln">user_meta</span><span class="pun">[</span><span class="lit">2</span><span class="pun">][</span><span class="str">'dict'</span><span class="pun">]))</span><span class="pln"> </span><span class="com"># 用户年龄</span></code></li><li class="L9"><code class="language-python"><span class="pln">occupation </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="str">'occupation'</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">len</span><span class="pun">(</span><span class="pln">user_meta</span><span class="pun">[</span><span class="lit">3</span><span class="pun">][</span><span class="str">'dict'</span><span class="pun">]))</span><span class="pln"> </span><span class="com"># 用户职业</span></code></li><li class="L0"><code class="language-python"></code></li><li class="L1"><code class="language-python"><span class="pln">embsize </span><span class="pun">=</span><span class="pln"> </span><span class="lit">256</span><span class="pln"> </span><span class="com"># 向量维度</span></code></li></ol></pre></li>
<li><p>构造“电影”特征。</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"></code></li><li class="L1"><code class="language-python"><span class="com"># 电影ID和电影类型分别映射到其对应的特征隐层(256维)。</span></code></li><li class="L2"><code class="language-python"></code></li><li class="L3"><code class="language-python"><span class="pln">movie_id_emb </span><span class="pun">=</span><span class="pln"> embedding_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">movie_id</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">embsize</span><span class="pun">)</span></code></li><li class="L4"><code class="language-python"><span class="pln">movie_id_hidden </span><span class="pun">=</span><span class="pln"> fc_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">movie_id_emb</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">embsize</span><span class="pun">)</span></code></li><li class="L5"><code class="language-python"></code></li><li class="L6"><code class="language-python"><span class="pln">genres_emb </span><span class="pun">=</span><span class="pln"> fc_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">genres</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">embsize</span><span class="pun">)</span></code></li><li class="L7"><code class="language-python"></code></li><li class="L8"><code class="language-python"></code></li><li class="L9"><code class="language-python"><span class="com"># 对于电影名称,一个ID序列表示的词语序列,在输入卷积层后,</span></code></li><li class="L0"><code class="language-python"></code></li><li class="L1"><code class="language-python"></code></li><li class="L2"><code class="language-python"><span class="com"># 将得到每个时间窗口的特征(序列特征),然后通过在时间维度</span></code></li><li class="L3"><code class="language-python"></code></li><li class="L4"><code class="language-python"></code></li><li class="L5"><code class="language-python"><span class="com"># 降采样得到固定维度的特征,整个过程在text_conv_pool实现</span></code></li><li class="L6"><code class="language-python"></code></li><li class="L7"><code class="language-python"><span class="pln">title_emb </span><span class="pun">=</span><span class="pln"> embedding_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">title</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">embsize</span><span class="pun">)</span></code></li><li class="L8"><code class="language-python"><span class="pln">title_hidden </span><span class="pun">=</span><span class="pln"> text_conv_pool</span><span class="pun">(</span></code></li><li class="L9"><code class="language-python"><span class="pln"> input</span><span class="pun">=</span><span class="pln">title_emb</span><span class="pun">,</span><span class="pln"> context_len</span><span class="pun">=</span><span class="lit">5</span><span class="pun">,</span><span class="pln"> hidden_size</span><span class="pun">=</span><span class="pln">embsize</span><span class="pun">)</span></code></li><li class="L0"><code class="language-python"></code></li><li class="L1"><code class="language-python"></code></li><li class="L2"><code class="language-python"><span class="com"># 将三个属性的特征表示分别全连接并相加,结果即是电影特征的最终表示</span></code></li><li class="L3"><code class="language-python"></code></li><li class="L4"><code class="language-python"><span class="pln">movie_feature </span><span class="pun">=</span><span class="pln"> fc_layer</span><span class="pun">(</span></code></li><li class="L5"><code class="language-python"><span class="pln"> input</span><span class="pun">=[</span><span class="pln">movie_id_hidden</span><span class="pun">,</span><span class="pln"> title_hidden</span><span class="pun">,</span><span class="pln"> genres_emb</span><span class="pun">],</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">embsize</span><span class="pun">)</span></code></li></ol></pre></li>
<li><p>构造“用户”特征。</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"></code></li><li class="L1"><code class="language-python"><span class="com"># 将用户ID,性别,职业,年龄四个属性分别映射到其特征隐层。</span></code></li><li class="L2"><code class="language-python"></code></li><li class="L3"><code class="language-python"><span class="pln">user_id_emb </span><span class="pun">=</span><span class="pln"> embedding_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">user_id</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">embsize</span><span class="pun">)</span></code></li><li class="L4"><code class="language-python"><span class="pln">user_id_hidden </span><span class="pun">=</span><span class="pln"> fc_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">user_id_emb</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">embsize</span><span class="pun">)</span></code></li><li class="L5"><code class="language-python"></code></li><li class="L6"><code class="language-python"><span class="pln">gender_emb </span><span class="pun">=</span><span class="pln"> embedding_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">gender</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">embsize</span><span class="pun">)</span></code></li><li class="L7"><code class="language-python"><span class="pln">gender_hidden </span><span class="pun">=</span><span class="pln"> fc_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">gender_emb</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">embsize</span><span class="pun">)</span></code></li><li class="L8"><code class="language-python"></code></li><li class="L9"><code class="language-python"><span class="pln">age_emb </span><span class="pun">=</span><span class="pln"> embedding_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">age</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">embsize</span><span class="pun">)</span></code></li><li class="L0"><code class="language-python"><span class="pln">age_hidden </span><span class="pun">=</span><span class="pln"> fc_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">age_emb</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">embsize</span><span class="pun">)</span></code></li><li class="L1"><code class="language-python"></code></li><li class="L2"><code class="language-python"><span class="pln">occup_emb </span><span class="pun">=</span><span class="pln"> embedding_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">occupation</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">embsize</span><span class="pun">)</span></code></li><li class="L3"><code class="language-python"><span class="pln">occup_hidden </span><span class="pun">=</span><span class="pln"> fc_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">occup_emb</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">embsize</span><span class="pun">)</span></code></li><li class="L4"><code class="language-python"></code></li><li class="L5"><code class="language-python"></code></li><li class="L6"><code class="language-python"><span class="com"># 同样将这四个属性分别全连接并相加形成用户特征的最终表示。</span></code></li><li class="L7"><code class="language-python"></code></li><li class="L8"><code class="language-python"><span class="pln">user_feature </span><span class="pun">=</span><span class="pln"> fc_layer</span><span class="pun">(</span></code></li><li class="L9"><code class="language-python"><span class="pln"> input</span><span class="pun">=[</span><span class="pln">user_id_hidden</span><span class="pun">,</span><span class="pln"> gender_hidden</span><span class="pun">,</span><span class="pln"> age_hidden</span><span class="pun">,</span><span class="pln"> occup_hidden</span><span class="pun">],</span></code></li><li class="L0"><code class="language-python"><span class="pln"> size</span><span class="pun">=</span><span class="pln">embsize</span><span class="pun">)</span></code></li></ol></pre></li>
<li><p>计算余弦相似度,定义损失函数和网络输出。</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">similarity </span><span class="pun">=</span><span class="pln"> cos_sim</span><span class="pun">(</span><span class="pln">a</span><span class="pun">=</span><span class="pln">movie_feature</span><span class="pun">,</span><span class="pln"> b</span><span class="pun">=</span><span class="pln">user_feature</span><span class="pun">,</span><span class="pln"> scale</span><span class="pun">=</span><span class="lit">2</span><span class="pun">)</span></code></li><li class="L1"><code class="language-python"></code></li><li class="L2"><code class="language-python"></code></li><li class="L3"><code class="language-python"><span class="com"># 训练时,采用regression_cost作为损失函数计算回归误差代价,并作为网络的输出。</span></code></li><li class="L4"><code class="language-python"></code></li><li class="L5"><code class="language-python"></code></li><li class="L6"><code class="language-python"><span class="com"># 预测时,网络的输出即为余弦相似度。</span></code></li><li class="L7"><code class="language-python"></code></li><li class="L8"><code class="language-python"><span class="kwd">if</span><span class="pln"> </span><span class="kwd">not</span><span class="pln"> is_predict</span><span class="pun">:</span></code></li><li class="L9"><code class="language-python"><span class="pln"> lbl</span><span class="pun">=</span><span class="pln">data_layer</span><span class="pun">(</span><span class="str">'rating'</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="lit">1</span><span class="pun">)</span></code></li><li class="L0"><code class="language-python"><span class="pln">cost</span><span class="pun">=</span><span class="pln">regression_cost</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">similarity</span><span class="pun">,</span><span class="pln"> label</span><span class="pun">=</span><span class="pln">lbl</span><span class="pun">)</span></code></li><li class="L1"><code class="language-python"><span class="pln">outputs</span><span class="pun">(</span><span class="pln">cost</span><span class="pun">)</span></code></li><li class="L2"><code class="language-python"><span class="kwd">else</span><span class="pun">:</span></code></li><li class="L3"><code class="language-python"><span class="pln"> outputs</span><span class="pun">(</span><span class="pln">similarity</span><span class="pun">)</span></code></li></ol></pre></li>
</ol><div class="md-section-divider"></div><h2 data-anchor-id="4oba" id="训练模型">训练模型</h2><p data-anchor-id="egsb">执行<code>sh train.sh</code> 开始训练模型,将日志写入文件 <code>log.txt</code> 并打印在屏幕上。其中指定了总共需要执行 50 个pass。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="vmzb"><ol class="linenums"><li class="L0"><code class="language-shell"><span class="kwd">set</span><span class="pln"> </span><span class="pun">-</span><span class="pln">e</span></code></li><li class="L1"><code class="language-shell"><span class="pln">paddle train \</span></code></li><li class="L2"><code class="language-shell"><span class="pln"> </span><span class="pun">--</span><span class="pln">config</span><span class="pun">=</span><span class="pln">trainer_config</span><span class="pun">.</span><span class="pln">py \ </span><span class="com"># 神经网络配置文件</span></code></li><li class="L3"><code class="language-shell"><span class="pln"> </span><span class="pun">--</span><span class="pln">save_dir</span><span class="pun">=./</span><span class="pln">output \ </span><span class="com"># 模型保存路径</span></code></li><li class="L4"><code class="language-shell"><span class="pln"> </span><span class="pun">--</span><span class="pln">use_gpu</span><span class="pun">=</span><span class="kwd">false</span><span class="pln"> \ </span><span class="com"># 是否使用GPU(默认不使用)</span></code></li><li class="L5"><code class="language-shell"><span class="pln"> </span><span class="pun">--</span><span class="pln">trainer_count</span><span class="pun">=</span><span class="lit">4</span><span class="pln">\ </span><span class="com"># 一台机器上面的线程数量</span></code></li><li class="L6"><code class="language-shell"><span class="pln"> </span><span class="pun">--</span><span class="pln">test_all_data_in_one_period</span><span class="pun">=</span><span class="kwd">true</span><span class="pln"> \ </span><span class="com"># 每个训练周期训练一次所有数据,否则每个训练周期测试batch_size个batch数据</span></code></li><li class="L7"><code class="language-shell"><span class="pln"> </span><span class="pun">--</span><span class="pln">log_period</span><span class="pun">=</span><span class="lit">100</span><span class="pln"> \ </span><span class="com"># 训练log_period个batch后打印日志</span></code></li><li class="L8"><code class="language-shell"><span class="pln"> </span><span class="pun">--</span><span class="pln">dot_period</span><span class="pun">=</span><span class="lit">1</span><span class="pln"> \ </span><span class="com"># 每训练dot_period个batch后打印一个"."</span></code></li><li class="L9"><code class="language-shell"><span class="pln"> </span><span class="pun">--</span><span class="pln">num_passes</span><span class="pun">=</span><span class="lit">50</span><span class="pln"> </span><span class="lit">2</span><span class="pun">&gt;&amp;</span><span class="lit">1</span><span class="pln"> </span><span class="pun">|</span><span class="pln"> tee </span><span class="str">'log.txt'</span></code></li></ol></pre><p data-anchor-id="umsb">成功的输出类似如下:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="feiy"><ol class="linenums"><li class="L0"><code class="language-bash"><span class="pln">I0117 </span><span class="lit">01</span><span class="pun">:</span><span class="lit">01</span><span class="pun">:</span><span class="lit">48.585651</span><span class="pln"> </span><span class="lit">9998</span><span class="pln"> </span><span class="typ">TrainerInternal</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">165</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Batch</span><span class="pun">=</span><span class="lit">100</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">160000</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">0.600042</span><span class="pln"> </span><span class="typ">CurrentCost</span><span class="pun">=</span><span class="lit">0.600042</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> </span><span class="typ">CurrentEval</span><span class="pun">:</span></code></li><li class="L1"><code class="language-bash"><span class="pun">...................................................................................................</span></code></li><li class="L2"><code class="language-bash"><span class="pln">I0117 </span><span class="lit">01</span><span class="pun">:</span><span class="lit">02</span><span class="pun">:</span><span class="lit">53.821918</span><span class="pln"> </span><span class="lit">9998</span><span class="pln"> </span><span class="typ">TrainerInternal</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">165</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Batch</span><span class="pun">=</span><span class="lit">200</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">320000</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">0.602855</span><span class="pln"> </span><span class="typ">CurrentCost</span><span class="pun">=</span><span class="lit">0.605668</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> </span><span class="typ">CurrentEval</span><span class="pun">:</span></code></li><li class="L3"><code class="language-bash"><span class="pun">...................................................................................................</span></code></li><li class="L4"><code class="language-bash"><span class="pln">I0117 </span><span class="lit">01</span><span class="pun">:</span><span class="lit">03</span><span class="pun">:</span><span class="lit">58.937922</span><span class="pln"> </span><span class="lit">9998</span><span class="pln"> </span><span class="typ">TrainerInternal</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">165</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Batch</span><span class="pun">=</span><span class="lit">300</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">480000</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">0.605199</span><span class="pln"> </span><span class="typ">CurrentCost</span><span class="pun">=</span><span class="lit">0.609887</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> </span><span class="typ">CurrentEval</span><span class="pun">:</span></code></li><li class="L5"><code class="language-bash"><span class="pun">...................................................................................................</span></code></li><li class="L6"><code class="language-bash"><span class="pln">I0117 </span><span class="lit">01</span><span class="pun">:</span><span class="lit">05</span><span class="pun">:</span><span class="lit">04.083251</span><span class="pln"> </span><span class="lit">9998</span><span class="pln"> </span><span class="typ">TrainerInternal</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">165</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Batch</span><span class="pun">=</span><span class="lit">400</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">640000</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">0.608693</span><span class="pln"> </span><span class="typ">CurrentCost</span><span class="pun">=</span><span class="lit">0.619175</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> </span><span class="typ">CurrentEval</span><span class="pun">:</span></code></li><li class="L7"><code class="language-bash"><span class="pun">...................................................................................................</span></code></li><li class="L8"><code class="language-bash"><span class="pln">I0117 </span><span class="lit">01</span><span class="pun">:</span><span class="lit">06</span><span class="pun">:</span><span class="lit">09.155859</span><span class="pln"> </span><span class="lit">9998</span><span class="pln"> </span><span class="typ">TrainerInternal</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">165</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Batch</span><span class="pun">=</span><span class="lit">500</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">800000</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">0.613273</span><span class="pln"> </span><span class="typ">CurrentCost</span><span class="pun">=</span><span class="lit">0.631591</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> </span><span class="typ">CurrentEval</span><span class="pun">:</span></code></li><li class="L9"><code class="language-bash"><span class="pun">.................................................................</span><span class="pln">I0117 </span><span class="lit">01</span><span class="pun">:</span><span class="lit">06</span><span class="pun">:</span><span class="lit">51.109654</span><span class="pln"> </span><span class="lit">9998</span><span class="pln"> </span><span class="typ">TrainerInternal</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">181</span><span class="pun">]</span></code></li><li class="L0"><code class="language-bash"><span class="pln"> </span><span class="typ">Pass</span><span class="pun">=</span><span class="lit">49</span><span class="pln"> </span><span class="typ">Batch</span><span class="pun">=</span><span class="lit">565</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">902826</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">0.614772</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span></code></li><li class="L1"><code class="language-bash"><span class="pln">I0117 </span><span class="lit">01</span><span class="pun">:</span><span class="lit">07</span><span class="pun">:</span><span class="lit">04.205142</span><span class="pln"> </span><span class="lit">9998</span><span class="pln"> </span><span class="typ">Tester</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">115</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Test</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">97383</span><span class="pln"> cost</span><span class="pun">=</span><span class="lit">0.721995</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span></code></li><li class="L2"><code class="language-bash"><span class="pln">I0117 </span><span class="lit">01</span><span class="pun">:</span><span class="lit">07</span><span class="pun">:</span><span class="lit">04.205281</span><span class="pln"> </span><span class="lit">9998</span><span class="pln"> </span><span class="typ">GradientMachine</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">113</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Saving</span><span class="pln"> parameters to </span><span class="pun">./</span><span class="pln">output</span><span class="pun">/</span><span class="pln">pass</span><span class="pun">-</span><span class="lit">00049</span></code></li></ol></pre><div class="md-section-divider"></div><h2 data-anchor-id="vc6d" id="应用模型">应用模型</h2><p data-anchor-id="uhz0">在训练了几轮以后,您可以对模型进行评估。运行以下命令,可以通过选择最小训练误差的一轮参数得到最好轮次的模型。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="525h"><ol class="linenums"><li class="L0"><code class="language-shell"><span class="pun">./</span><span class="pln">evaluate</span><span class="pun">.</span><span class="pln">py log</span><span class="pun">.</span><span class="pln">txt</span></code></li></ol></pre><p data-anchor-id="6awi">您将看到:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="u3is"><ol class="linenums"><li class="L0"><code class="language-shell"><span class="typ">Best</span><span class="pln"> </span><span class="kwd">pass</span><span class="pln"> </span><span class="kwd">is</span><span class="pln"> </span><span class="lit">00036</span><span class="pun">,</span><span class="pln"> error </span><span class="kwd">is</span><span class="pln"> </span><span class="lit">0.719281</span><span class="pun">,</span><span class="pln"> which means predict </span><span class="kwd">get</span><span class="pln"> error </span><span class="kwd">as</span><span class="pln"> </span><span class="lit">0.424052</span></code></li><li class="L1"><code class="language-shell"><span class="pln">evaluating </span><span class="kwd">from</span><span class="pln"> </span><span class="kwd">pass</span><span class="pln"> output</span><span class="pun">/</span><span class="kwd">pass</span><span class="pun">-</span><span class="lit">00036</span></code></li></ol></pre><p data-anchor-id="6svk">预测任何用户对于任何一部电影评价的命令如下:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="xsrb"><ol class="linenums"><li class="L0"><code class="language-shell"><span class="pln">python prediction</span><span class="pun">.</span><span class="pln">py </span><span class="str">'output/pass-00036/'</span></code></li></ol></pre><p data-anchor-id="a4x3">预测程序将读取用户的输入,然后输出预测分数。您会看到如下命令行界面:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="4ltb"><ol class="linenums"><li class="L0"><code><span class="typ">Input</span><span class="pln"> movie_id</span><span class="pun">:</span><span class="pln"> </span><span class="lit">1962</span></code></li><li class="L1"><code><span class="typ">Input</span><span class="pln"> user_id</span><span class="pun">:</span><span class="pln"> </span><span class="lit">1</span></code></li><li class="L2"><code><span class="typ">Prediction</span><span class="pln"> </span><span class="typ">Score</span><span class="pln"> </span><span class="kwd">is</span><span class="pln"> </span><span class="lit">4.25</span></code></li></ol></pre><div class="md-section-divider"></div><h2 data-anchor-id="sayc" id="总结">总结</h2><p data-anchor-id="9u3u">本章介绍了传统的推荐系统方法和YouTube的深度神经网络推荐系统,并以电影推荐为例,使用PaddlePaddle训练了一个个性化推荐神经网络模型。推荐系统几乎涵盖了电商系统、社交网络、广告推荐、搜索引擎等领域的方方面面,而在图像处理、自然语言处理等领域已经发挥重要作用的深度学习技术,也将会在推荐系统领域大放异彩。</p><div class="md-section-divider"></div><h2 data-anchor-id="w59q" id="参考文献">参考文献</h2><ol data-anchor-id="rikd">
<li><a href="https://en.wikipedia.org/wiki/Peter_Brusilovsky" target="_blank">Peter Brusilovsky</a> (2007). <em>The Adaptive Web</em>. p. 325.</li>
<li>Robin Burke , <a href="http://www.dcs.warwick.ac.uk/~acristea/courses/CS411/2010/Book%20-%20The%20Adaptive%20Web/HybridWebRecommenderSystems.pdf" target="_blank">Hybrid Web Recommender Systems</a>, pp. 377-408, The Adaptive Web, Peter Brusilovsky, Alfred Kobsa, Wolfgang Nejdl (Ed.), Lecture Notes in Computer Science, Springer-Verlag, Berlin, Germany, Lecture Notes in Computer Science, Vol. 4321, May 2007, 978-3-540-72078-2.</li>
<li>P. Resnick, N. Iacovou, etc. “<a href="http://ccs.mit.edu/papers/CCSWP165.html" target="_blank">GroupLens: An Open Architecture for Collaborative Filtering of Netnews</a>”, Proceedings of ACM Conference on Computer Supported Cooperative Work, CSCW 1994. pp.175-186.</li>
<li>Sarwar, Badrul, et al. "<a href="http://files.grouplens.org/papers/www10_sarwar.pdf" target="_blank">Item-based collaborative filtering recommendation algorithms.</a>" <em>Proceedings of the 10th international conference on World Wide Web</em>. ACM, 2001.</li>
<li>Kautz, Henry, Bart Selman, and Mehul Shah. "<a href="http://www.cs.cornell.edu/selman/papers/pdf/97.cacm.refweb.pdf" target="_blank">Referral Web: combining social networks and collaborative filtering.</a>" Communications of the ACM 40.3 (1997): 63-65. APA</li>
<li>Yuan, Jianbo, et al. <a href="https://arxiv.org/pdf/1611.05480v1.pdf" target="_blank">"Solving Cold-Start Problem in Large-scale Recommendation Engines: A Deep Learning Approach."</a> <em>arXiv preprint arXiv:1611.05480</em> (2016).</li>
<li>Covington P, Adams J, Sargin E. <a href="https://static.googleusercontent.com/media/research.google.com/zh-CN//pubs/archive/45530.pdf" target="_blank">Deep neural networks for youtube recommendations</a>[C]//Proceedings of the 10th ACM Conference on Recommender Systems. ACM, 2016: 191-198.</li>
</ol><p data-anchor-id="e7gv"><br> <br>
<img src="https://i.creativecommons.org/l/by-nc-sa/4.0/88x31.png" alt="知识共享许可协议"></p><p data-anchor-id="166j">本教程由<a href="http://book.paddlepaddle.org" target="_blank">PaddlePaddle</a>创作,采用<a href="http://creativecommons.org/licenses/by-nc-sa/4.0/" target="_blank">知识共享 署名-非商业性使用-相同方式共享 4.0 国际 许可协议</a>进行许可。</p></div>
</body>
</html>
......@@ -434,6 +434,7 @@ marked.setOptions({
code = code.replace(/&amp;/g, "&")
code = code.replace(/&gt;/g, ">")
code = code.replace(/&lt;/g, "<")
code = code.replace(/&nbsp;/g, " ")
return hljs.highlightAuto(code, [lang]).value;
}
});
......
......@@ -52,6 +52,7 @@ marked.setOptions({
code = code.replace(/&amp;/g, "&")
code = code.replace(/&gt;/g, ">")
code = code.replace(/&lt;/g, "<")
code = code.replace(/&nbsp;/g, " ")
return hljs.highlightAuto(code, [lang]).value;
}
});
......
......@@ -52,6 +52,7 @@ marked.setOptions({
code = code.replace(/&amp;/g, "&")
code = code.replace(/&gt;/g, ">")
code = code.replace(/&lt;/g, "<")
code = code.replace(/&nbsp;/g, " ")
return hljs.highlightAuto(code, [lang]).value;
}
});
......
<!DOCTYPE html>
<html class="theme theme-white">
<head>
<meta charset="utf-8">
<title>情感分析</title>
<link href="https://www.zybuluo.com/static/assets/template-theme-white.css" rel="stylesheet" media="screen">
<style type="text/css">
#wmd-preview h1 {
color: #0077bb; /* 将标题改为蓝色 */
}</style>
</head>
<body class="theme theme-white">
<div style="visibility: hidden; overflow: hidden; position: absolute; top: 0px; height: 1px; width: auto; padding: 0px; border: 0px; margin: 0px; text-align: left; text-indent: 0px; text-transform: none; line-height: normal; letter-spacing: normal; word-spacing: normal;"><div id="MathJax_SVG_Hidden"></div><svg><defs id="MathJax_SVG_glyphs"><path id="MJMATHI-6E" stroke-width="1" d="M21 287Q22 293 24 303T36 341T56 388T89 425T135 442Q171 442 195 424T225 390T231 369Q231 367 232 367L243 378Q304 442 382 442Q436 442 469 415T503 336T465 179T427 52Q427 26 444 26Q450 26 453 27Q482 32 505 65T540 145Q542 153 560 153Q580 153 580 145Q580 144 576 130Q568 101 554 73T508 17T439 -10Q392 -10 371 17T350 73Q350 92 386 193T423 345Q423 404 379 404H374Q288 404 229 303L222 291L189 157Q156 26 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 112 180T152 343Q153 348 153 366Q153 405 129 405Q91 405 66 305Q60 285 60 284Q58 278 41 278H27Q21 284 21 287Z"></path><path id="MJMATHI-69" stroke-width="1" d="M184 600Q184 624 203 642T247 661Q265 661 277 649T290 619Q290 596 270 577T226 557Q211 557 198 567T184 600ZM21 287Q21 295 30 318T54 369T98 420T158 442Q197 442 223 419T250 357Q250 340 236 301T196 196T154 83Q149 61 149 51Q149 26 166 26Q175 26 185 29T208 43T235 78T260 137Q263 149 265 151T282 153Q302 153 302 143Q302 135 293 112T268 61T223 11T161 -11Q129 -11 102 10T74 74Q74 91 79 106T122 220Q160 321 166 341T173 380Q173 404 156 404H154Q124 404 99 371T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Z"></path><path id="MJMATHI-78" stroke-width="1" d="M52 289Q59 331 106 386T222 442Q257 442 286 424T329 379Q371 442 430 442Q467 442 494 420T522 361Q522 332 508 314T481 292T458 288Q439 288 427 299T415 328Q415 374 465 391Q454 404 425 404Q412 404 406 402Q368 386 350 336Q290 115 290 78Q290 50 306 38T341 26Q378 26 414 59T463 140Q466 150 469 151T485 153H489Q504 153 504 145Q504 144 502 134Q486 77 440 33T333 -11Q263 -11 227 52Q186 -10 133 -10H127Q78 -10 57 16T35 71Q35 103 54 123T99 143Q142 143 142 101Q142 81 130 66T107 46T94 41L91 40Q91 39 97 36T113 29T132 26Q168 26 194 71Q203 87 217 139T245 247T261 313Q266 340 266 352Q266 380 251 392T217 404Q177 404 142 372T93 290Q91 281 88 280T72 278H58Q52 284 52 289Z"></path><path id="MJMAIN-2208" stroke-width="1" d="M84 250Q84 372 166 450T360 539Q361 539 377 539T419 540T469 540H568Q583 532 583 520Q583 511 570 501L466 500Q355 499 329 494Q280 482 242 458T183 409T147 354T129 306T124 272V270H568Q583 262 583 250T568 230H124V228Q124 207 134 177T167 112T231 48T328 7Q355 1 466 0H570Q583 -10 583 -20Q583 -32 568 -40H471Q464 -40 446 -40T417 -41Q262 -41 172 45Q84 127 84 250Z"></path><path id="MJAMS-52" stroke-width="1" d="M17 665Q17 672 28 683H221Q415 681 439 677Q461 673 481 667T516 654T544 639T566 623T584 607T597 592T607 578T614 565T618 554L621 548Q626 530 626 497Q626 447 613 419Q578 348 473 326L455 321Q462 310 473 292T517 226T578 141T637 72T686 35Q705 30 705 16Q705 7 693 -1H510Q503 6 404 159L306 310H268V183Q270 67 271 59Q274 42 291 38Q295 37 319 35Q344 35 353 28Q362 17 353 3L346 -1H28Q16 5 16 16Q16 35 55 35Q96 38 101 52Q106 60 106 341T101 632Q95 645 55 648Q17 648 17 665ZM241 35Q238 42 237 45T235 78T233 163T233 337V621L237 635L244 648H133Q136 641 137 638T139 603T141 517T141 341Q141 131 140 89T134 37Q133 36 133 35H241ZM457 496Q457 540 449 570T425 615T400 634T377 643Q374 643 339 648Q300 648 281 635Q271 628 270 610T268 481V346H284Q327 346 375 352Q421 364 439 392T457 496ZM492 537T492 496T488 427T478 389T469 371T464 361Q464 360 465 360Q469 360 497 370Q593 400 593 495Q593 592 477 630L457 637L461 626Q474 611 488 561Q492 537 492 496ZM464 243Q411 317 410 317Q404 317 401 315Q384 315 370 312H346L526 35H619L606 50Q553 109 464 243Z"></path><path id="MJMATHI-6B" stroke-width="1" d="M121 647Q121 657 125 670T137 683Q138 683 209 688T282 694Q294 694 294 686Q294 679 244 477Q194 279 194 272Q213 282 223 291Q247 309 292 354T362 415Q402 442 438 442Q468 442 485 423T503 369Q503 344 496 327T477 302T456 291T438 288Q418 288 406 299T394 328Q394 353 410 369T442 390L458 393Q446 405 434 405H430Q398 402 367 380T294 316T228 255Q230 254 243 252T267 246T293 238T320 224T342 206T359 180T365 147Q365 130 360 106T354 66Q354 26 381 26Q429 26 459 145Q461 153 479 153H483Q499 153 499 144Q499 139 496 130Q455 -11 378 -11Q333 -11 305 15T277 90Q277 108 280 121T283 145Q283 167 269 183T234 206T200 217T182 220H180Q168 178 159 139T145 81T136 44T129 20T122 7T111 -2Q98 -11 83 -11Q66 -11 57 -1T48 16Q48 26 85 176T158 471L195 616Q196 629 188 632T149 637H144Q134 637 131 637T124 640T121 647Z"></path><path id="MJMATHI-68" stroke-width="1" d="M137 683Q138 683 209 688T282 694Q294 694 294 685Q294 674 258 534Q220 386 220 383Q220 381 227 388Q288 442 357 442Q411 442 444 415T478 336Q478 285 440 178T402 50Q403 36 407 31T422 26Q450 26 474 56T513 138Q516 149 519 151T535 153Q555 153 555 145Q555 144 551 130Q535 71 500 33Q466 -10 419 -10H414Q367 -10 346 17T325 74Q325 90 361 192T398 345Q398 404 354 404H349Q266 404 205 306L198 293L164 158Q132 28 127 16Q114 -11 83 -11Q69 -11 59 -2T48 16Q48 30 121 320L195 616Q195 629 188 632T149 637H128Q122 643 122 645T124 664Q129 683 137 683Z"></path><path id="MJMAIN-3A" stroke-width="1" d="M78 370Q78 394 95 412T138 430Q162 430 180 414T199 371Q199 346 182 328T139 310T96 327T78 370ZM78 60Q78 84 95 102T138 120Q162 120 180 104T199 61Q199 36 182 18T139 0T96 17T78 60Z"></path><path id="MJMAIN-2B" stroke-width="1" d="M56 237T56 250T70 270H369V420L370 570Q380 583 389 583Q402 583 409 568V270H707Q722 262 722 250T707 230H409V-68Q401 -82 391 -82H389H387Q375 -82 369 -68V230H70Q56 237 56 250Z"></path><path id="MJMAIN-2212" stroke-width="1" d="M84 237T84 250T98 270H679Q694 262 694 250T679 230H98Q84 237 84 250Z"></path><path id="MJMAIN-31" stroke-width="1" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path><path id="MJMAIN-2C" stroke-width="1" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path><path id="MJMAIN-2026" stroke-width="1" d="M78 60Q78 84 95 102T138 120Q162 120 180 104T199 61Q199 36 182 18T139 0T96 17T78 60ZM525 60Q525 84 542 102T585 120Q609 120 627 104T646 61Q646 36 629 18T586 0T543 17T525 60ZM972 60Q972 84 989 102T1032 120Q1056 120 1074 104T1093 61Q1093 36 1076 18T1033 0T990 17T972 60Z"></path><path id="MJMATHI-77" stroke-width="1" d="M580 385Q580 406 599 424T641 443Q659 443 674 425T690 368Q690 339 671 253Q656 197 644 161T609 80T554 12T482 -11Q438 -11 404 5T355 48Q354 47 352 44Q311 -11 252 -11Q226 -11 202 -5T155 14T118 53T104 116Q104 170 138 262T173 379Q173 380 173 381Q173 390 173 393T169 400T158 404H154Q131 404 112 385T82 344T65 302T57 280Q55 278 41 278H27Q21 284 21 287Q21 293 29 315T52 366T96 418T161 441Q204 441 227 416T250 358Q250 340 217 250T184 111Q184 65 205 46T258 26Q301 26 334 87L339 96V119Q339 122 339 128T340 136T341 143T342 152T345 165T348 182T354 206T362 238T373 281Q402 395 406 404Q419 431 449 431Q468 431 475 421T483 402Q483 389 454 274T422 142Q420 131 420 107V100Q420 85 423 71T442 42T487 26Q558 26 600 148Q609 171 620 213T632 273Q632 306 619 325T593 357T580 385Z"></path><path id="MJMATHI-63" stroke-width="1" d="M34 159Q34 268 120 355T306 442Q362 442 394 418T427 355Q427 326 408 306T360 285Q341 285 330 295T319 325T330 359T352 380T366 386H367Q367 388 361 392T340 400T306 404Q276 404 249 390Q228 381 206 359Q162 315 142 235T121 119Q121 73 147 50Q169 26 205 26H209Q321 26 394 111Q403 121 406 121Q410 121 419 112T429 98T420 83T391 55T346 25T282 0T202 -11Q127 -11 81 37T34 159Z"></path><path id="MJMAIN-3D" stroke-width="1" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path id="MJMATHI-66" stroke-width="1" d="M118 -162Q120 -162 124 -164T135 -167T147 -168Q160 -168 171 -155T187 -126Q197 -99 221 27T267 267T289 382V385H242Q195 385 192 387Q188 390 188 397L195 425Q197 430 203 430T250 431Q298 431 298 432Q298 434 307 482T319 540Q356 705 465 705Q502 703 526 683T550 630Q550 594 529 578T487 561Q443 561 443 603Q443 622 454 636T478 657L487 662Q471 668 457 668Q445 668 434 658T419 630Q412 601 403 552T387 469T380 433Q380 431 435 431Q480 431 487 430T498 424Q499 420 496 407T491 391Q489 386 482 386T428 385H372L349 263Q301 15 282 -47Q255 -132 212 -173Q175 -205 139 -205Q107 -205 81 -186T55 -132Q55 -95 76 -78T118 -61Q162 -61 162 -103Q162 -122 151 -136T127 -157L118 -162Z"></path><path id="MJMAIN-28" stroke-width="1" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path id="MJMAIN-22C5" stroke-width="1" d="M78 250Q78 274 95 292T138 310Q162 310 180 294T199 251Q199 226 182 208T139 190T96 207T78 250Z"></path><path id="MJMATHI-62" stroke-width="1" d="M73 647Q73 657 77 670T89 683Q90 683 161 688T234 694Q246 694 246 685T212 542Q204 508 195 472T180 418L176 399Q176 396 182 402Q231 442 283 442Q345 442 383 396T422 280Q422 169 343 79T173 -11Q123 -11 82 27T40 150V159Q40 180 48 217T97 414Q147 611 147 623T109 637Q104 637 101 637H96Q86 637 83 637T76 640T73 647ZM336 325V331Q336 405 275 405Q258 405 240 397T207 376T181 352T163 330L157 322L136 236Q114 150 114 114Q114 66 138 42Q154 26 178 26Q211 26 245 58Q270 81 285 114T318 219Q336 291 336 325Z"></path><path id="MJMAIN-29" stroke-width="1" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path><path id="MJMATHI-73" stroke-width="1" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"></path><path id="MJMATHI-67" stroke-width="1" d="M311 43Q296 30 267 15T206 0Q143 0 105 45T66 160Q66 265 143 353T314 442Q361 442 401 394L404 398Q406 401 409 404T418 412T431 419T447 422Q461 422 470 413T480 394Q480 379 423 152T363 -80Q345 -134 286 -169T151 -205Q10 -205 10 -137Q10 -111 28 -91T74 -71Q89 -71 102 -80T116 -111Q116 -121 114 -130T107 -144T99 -154T92 -162L90 -164H91Q101 -167 151 -167Q189 -167 211 -155Q234 -144 254 -122T282 -75Q288 -56 298 -13Q311 35 311 43ZM384 328L380 339Q377 350 375 354T369 368T359 382T346 393T328 402T306 405Q262 405 221 352Q191 313 171 233T151 117Q151 38 213 38Q269 38 323 108L331 118L384 328Z"></path><path id="MJMATHI-6D" stroke-width="1" d="M21 287Q22 293 24 303T36 341T56 388T88 425T132 442T175 435T205 417T221 395T229 376L231 369Q231 367 232 367L243 378Q303 442 384 442Q401 442 415 440T441 433T460 423T475 411T485 398T493 385T497 373T500 364T502 357L510 367Q573 442 659 442Q713 442 746 415T780 336Q780 285 742 178T704 50Q705 36 709 31T724 26Q752 26 776 56T815 138Q818 149 821 151T837 153Q857 153 857 145Q857 144 853 130Q845 101 831 73T785 17T716 -10Q669 -10 648 17T627 73Q627 92 663 193T700 345Q700 404 656 404H651Q565 404 506 303L499 291L466 157Q433 26 428 16Q415 -11 385 -11Q372 -11 364 -4T353 8T350 18Q350 29 384 161L420 307Q423 322 423 345Q423 404 379 404H374Q288 404 229 303L222 291L189 157Q156 26 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 112 181Q151 335 151 342Q154 357 154 369Q154 405 129 405Q107 405 92 377T69 316T57 280Q55 278 41 278H27Q21 284 21 287Z"></path><path id="MJMATHI-6F" stroke-width="1" d="M201 -11Q126 -11 80 38T34 156Q34 221 64 279T146 380Q222 441 301 441Q333 441 341 440Q354 437 367 433T402 417T438 387T464 338T476 268Q476 161 390 75T201 -11ZM121 120Q121 70 147 48T206 26Q250 26 289 58T351 142Q360 163 374 216T388 308Q388 352 370 375Q346 405 306 405Q243 405 195 347Q158 303 140 230T121 120Z"></path><path id="MJMATHI-64" stroke-width="1" d="M366 683Q367 683 438 688T511 694Q523 694 523 686Q523 679 450 384T375 83T374 68Q374 26 402 26Q411 27 422 35Q443 55 463 131Q469 151 473 152Q475 153 483 153H487H491Q506 153 506 145Q506 140 503 129Q490 79 473 48T445 8T417 -8Q409 -10 393 -10Q359 -10 336 5T306 36L300 51Q299 52 296 50Q294 48 292 46Q233 -10 172 -10Q117 -10 75 30T33 157Q33 205 53 255T101 341Q148 398 195 420T280 442Q336 442 364 400Q369 394 369 396Q370 400 396 505T424 616Q424 629 417 632T378 637H357Q351 643 351 645T353 664Q358 683 366 683ZM352 326Q329 405 277 405Q242 405 210 374T160 293Q131 214 119 129Q119 126 119 118T118 106Q118 61 136 44T179 26Q233 26 290 98L298 109L352 326Z"></path><path id="MJMAIN-32" stroke-width="1" d="M109 429Q82 429 66 447T50 491Q50 562 103 614T235 666Q326 666 387 610T449 465Q449 422 429 383T381 315T301 241Q265 210 201 149L142 93L218 92Q375 92 385 97Q392 99 409 186V189H449V186Q448 183 436 95T421 3V0H50V19V31Q50 38 56 46T86 81Q115 113 136 137Q145 147 170 174T204 211T233 244T261 278T284 308T305 340T320 369T333 401T340 431T343 464Q343 527 309 573T212 619Q179 619 154 602T119 569T109 550Q109 549 114 549Q132 549 151 535T170 489Q170 464 154 447T109 429Z"></path><path id="MJMAIN-5B" stroke-width="1" d="M118 -250V750H255V710H158V-210H255V-250H118Z"></path><path id="MJMAIN-5D" stroke-width="1" d="M22 710V750H159V-250H22V-210H119V710H22Z"></path><path id="MJMAIN-5E" stroke-width="1" d="M112 560L249 694L257 686Q387 562 387 560L361 531Q359 532 303 581L250 627L195 580Q182 569 169 557T148 538L140 532Q138 530 125 546L112 560Z"></path><path id="MJMATHI-61" stroke-width="1" d="M33 157Q33 258 109 349T280 441Q331 441 370 392Q386 422 416 422Q429 422 439 414T449 394Q449 381 412 234T374 68Q374 43 381 35T402 26Q411 27 422 35Q443 55 463 131Q469 151 473 152Q475 153 483 153H487Q506 153 506 144Q506 138 501 117T481 63T449 13Q436 0 417 -8Q409 -10 393 -10Q359 -10 336 5T306 36L300 51Q299 52 296 50Q294 48 292 46Q233 -10 172 -10Q117 -10 75 30T33 157ZM351 328Q351 334 346 350T323 385T277 405Q242 405 210 374T160 293Q131 214 119 129Q119 126 119 118T118 106Q118 61 136 44T179 26Q217 26 254 59T298 110Q300 114 325 217T351 328Z"></path><path id="MJMATHI-74" stroke-width="1" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path id="MJMAIN-30" stroke-width="1" d="M96 585Q152 666 249 666Q297 666 345 640T423 548Q460 465 460 320Q460 165 417 83Q397 41 362 16T301 -15T250 -22Q224 -22 198 -16T137 16T82 83Q39 165 39 320Q39 494 96 585ZM321 597Q291 629 250 629Q208 629 178 597Q153 571 145 525T137 333Q137 175 145 125T181 46Q209 16 250 16Q290 16 318 46Q347 76 354 130T362 333Q362 478 354 524T321 597Z"></path><path id="MJMATHI-3C3" stroke-width="1" d="M184 -11Q116 -11 74 34T31 147Q31 247 104 333T274 430Q275 431 414 431H552Q553 430 555 429T559 427T562 425T565 422T567 420T569 416T570 412T571 407T572 401Q572 357 507 357Q500 357 490 357T476 358H416L421 348Q439 310 439 263Q439 153 359 71T184 -11ZM361 278Q361 358 276 358Q152 358 115 184Q114 180 114 178Q106 141 106 117Q106 67 131 47T188 26Q242 26 287 73Q316 103 334 153T356 233T361 278Z"></path><path id="MJMATHI-57" stroke-width="1" d="M436 683Q450 683 486 682T553 680Q604 680 638 681T677 682Q695 682 695 674Q695 670 692 659Q687 641 683 639T661 637Q636 636 621 632T600 624T597 615Q597 603 613 377T629 138L631 141Q633 144 637 151T649 170T666 200T690 241T720 295T759 362Q863 546 877 572T892 604Q892 619 873 628T831 637Q817 637 817 647Q817 650 819 660Q823 676 825 679T839 682Q842 682 856 682T895 682T949 681Q1015 681 1034 683Q1048 683 1048 672Q1048 666 1045 655T1038 640T1028 637Q1006 637 988 631T958 617T939 600T927 584L923 578L754 282Q586 -14 585 -15Q579 -22 561 -22Q546 -22 542 -17Q539 -14 523 229T506 480L494 462Q472 425 366 239Q222 -13 220 -15T215 -19Q210 -22 197 -22Q178 -22 176 -15Q176 -12 154 304T131 622Q129 631 121 633T82 637H58Q51 644 51 648Q52 671 64 683H76Q118 680 176 680Q301 680 313 683H323Q329 677 329 674T327 656Q322 641 318 637H297Q236 634 232 620Q262 160 266 136L501 550L499 587Q496 629 489 632Q483 636 447 637Q428 637 422 639T416 648Q416 650 418 660Q419 664 420 669T421 676T424 680T428 682T436 683Z"></path><path id="MJMATHI-46" stroke-width="1" d="M48 1Q31 1 31 11Q31 13 34 25Q38 41 42 43T65 46Q92 46 125 49Q139 52 144 61Q146 66 215 342T285 622Q285 629 281 629Q273 632 228 634H197Q191 640 191 642T193 659Q197 676 203 680H742Q749 676 749 669Q749 664 736 557T722 447Q720 440 702 440H690Q683 445 683 453Q683 454 686 477T689 530Q689 560 682 579T663 610T626 626T575 633T503 634H480Q398 633 393 631Q388 629 386 623Q385 622 352 492L320 363H375Q378 363 398 363T426 364T448 367T472 374T489 386Q502 398 511 419T524 457T529 475Q532 480 548 480H560Q567 475 567 470Q567 467 536 339T502 207Q500 200 482 200H470Q463 206 463 212Q463 215 468 234T473 274Q473 303 453 310T364 317H309L277 190Q245 66 245 60Q245 46 334 46H359Q365 40 365 39T363 19Q359 6 353 0H336Q295 2 185 2Q120 2 86 2T48 1Z"></path><path id="MJMAIN-2299" stroke-width="1" d="M56 250Q56 394 156 488T384 583Q530 583 626 485T722 250Q722 110 625 14T390 -83Q249 -83 153 14T56 250ZM682 250Q682 322 649 387T546 497T381 542Q272 542 184 459T95 250Q95 132 178 45T389 -42Q515 -42 598 45T682 250ZM311 250Q311 285 332 304T375 328Q376 328 382 328T392 329Q424 326 445 305T466 250Q466 217 445 195T389 172Q354 172 333 195T311 250Z"></path><path id="MJMATHI-52" stroke-width="1" d="M230 637Q203 637 198 638T193 649Q193 676 204 682Q206 683 378 683Q550 682 564 680Q620 672 658 652T712 606T733 563T739 529Q739 484 710 445T643 385T576 351T538 338L545 333Q612 295 612 223Q612 212 607 162T602 80V71Q602 53 603 43T614 25T640 16Q668 16 686 38T712 85Q717 99 720 102T735 105Q755 105 755 93Q755 75 731 36Q693 -21 641 -21H632Q571 -21 531 4T487 82Q487 109 502 166T517 239Q517 290 474 313Q459 320 449 321T378 323H309L277 193Q244 61 244 59Q244 55 245 54T252 50T269 48T302 46H333Q339 38 339 37T336 19Q332 6 326 0H311Q275 2 180 2Q146 2 117 2T71 2T50 1Q33 1 33 10Q33 12 36 24Q41 43 46 45Q50 46 61 46H67Q94 46 127 49Q141 52 146 61Q149 65 218 339T287 628Q287 635 230 637ZM630 554Q630 586 609 608T523 636Q521 636 500 636T462 637H440Q393 637 386 627Q385 624 352 494T319 361Q319 360 388 360Q466 361 492 367Q556 377 592 426Q608 449 619 486T630 554Z"></path><path id="MJMATHI-65" stroke-width="1" d="M39 168Q39 225 58 272T107 350T174 402T244 433T307 442H310Q355 442 388 420T421 355Q421 265 310 237Q261 224 176 223Q139 223 138 221Q138 219 132 186T125 128Q125 81 146 54T209 26T302 45T394 111Q403 121 406 121Q410 121 419 112T429 98T420 82T390 55T344 24T281 -1T205 -11Q126 -11 83 42T39 168ZM373 353Q367 405 305 405Q272 405 244 391T199 357T170 316T154 280T149 261Q149 260 169 260Q282 260 327 284T373 353Z"></path><path id="MJMATHI-72" stroke-width="1" d="M21 287Q22 290 23 295T28 317T38 348T53 381T73 411T99 433T132 442Q161 442 183 430T214 408T225 388Q227 382 228 382T236 389Q284 441 347 441H350Q398 441 422 400Q430 381 430 363Q430 333 417 315T391 292T366 288Q346 288 334 299T322 328Q322 376 378 392Q356 405 342 405Q286 405 239 331Q229 315 224 298T190 165Q156 25 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 114 189T154 366Q154 405 128 405Q107 405 92 377T68 316T57 280Q55 278 41 278H27Q21 284 21 287Z"></path><path id="MJMATHI-75" stroke-width="1" d="M21 287Q21 295 30 318T55 370T99 420T158 442Q204 442 227 417T250 358Q250 340 216 246T182 105Q182 62 196 45T238 27T291 44T328 78L339 95Q341 99 377 247Q407 367 413 387T427 416Q444 431 463 431Q480 431 488 421T496 402L420 84Q419 79 419 68Q419 43 426 35T447 26Q469 29 482 57T512 145Q514 153 532 153Q551 153 551 144Q550 139 549 130T540 98T523 55T498 17T462 -8Q454 -10 438 -10Q372 -10 347 46Q345 45 336 36T318 21T296 6T267 -6T233 -11Q189 -11 155 7Q103 38 103 113Q103 170 138 262T173 379Q173 380 173 381Q173 390 173 393T169 400T158 404H154Q131 404 112 385T82 344T65 302T57 280Q55 278 41 278H27Q21 284 21 287Z"></path></defs></svg></div><div id="wmd-preview" class="wmd-preview wmd-preview-full-reader"><div class="md-section-divider"></div><div class="md-section-divider"></div><h1 data-anchor-id="woa1" id="情感分析">情感分析</h1><div class="md-section-divider"></div><h2 data-anchor-id="6sb6" id="背景介绍">背景介绍</h2><p data-anchor-id="p24n">在自然语言处理中,情感分析一般是指判断一段文本所表达的情绪状态。其中,一段文本可以是一个句子,一个段落或一个文档。情绪状态可以是两类,如(正面,负面),(高兴,悲伤);也可以是三类,如(积极,消极,中性)等等。情感分析的应用场景十分广泛,如把用户在购物网站(亚马逊、天猫、淘宝等)、旅游网站、电影评论网站上发表的评论分成正面评论和负面评论;或为了分析用户对于某一产品的整体使用感受,抓取产品的用户评论并进行情感分析等等。表格1展示了对电影评论进行情感分析的例子:</p><table data-anchor-id="ydgg" class="table table-striped-white table-bordered">
<thead>
<tr>
<th>电影评论</th>
<th>类别</th>
</tr>
</thead>
<tbody><tr>
<td>在冯小刚这几年的电影里,算最好的一部的了</td>
<td>正面</td>
</tr>
<tr>
<td>很不好看,好像一个地方台的电视剧</td>
<td>负面</td>
</tr>
<tr>
<td>圆方镜头全程炫技,色调背景美则美矣,但剧情拖沓,口音不伦不类,一直努力却始终无法入戏</td>
<td>负面</td>
</tr>
<tr>
<td>剧情四星。但是圆镜视角加上婺源的风景整个非常有中国写意山水画的感觉,看得实在太舒服了。。</td>
<td>正面</td>
</tr>
</tbody></table><p data-anchor-id="vben">在自然语言处理中,情感分析属于典型的<strong>文本分类</strong>问题,即把需要进行情感分析的文本划分为其所属类别。文本分类涉及文本表示和分类方法两个问题。在深度学习的方法出现之前,主流的文本表示方法为词袋模型BOW(bag of words),话题模型等等;分类方法有SVM(support vector machine), LR(logistic regression)等等。 </p><p data-anchor-id="g3bc">对于一段文本,BOW表示会忽略其词顺序、语法和句法,将这段文本仅仅看做是一个词集合,因此BOW方法并不能充分表示文本的语义信息。例如,句子“这部电影糟糕透了”和“一个乏味,空洞,没有内涵的作品”在情感分析中具有很高的语义相似度,但是它们的BOW表示的相似度为0。又如,句子“一个空洞,没有内涵的作品”和“一个不空洞而且有内涵的作品”的BOW相似度很高,但实际上它们的意思很不一样。 </p><p data-anchor-id="9b8i">本章我们所要介绍的深度学习模型克服了BOW表示的上述缺陷,它在考虑词顺序的基础上把文本映射到低维度的语义空间,并且以端对端(end to end)的方式进行文本表示及分类,其性能相对于传统方法有显著的提升[<a href="#参考文献">1</a>]。</p><div class="md-section-divider"></div><h2 data-anchor-id="o6dd" id="模型概览">模型概览</h2><p data-anchor-id="pfp7">本章所使用的文本表示模型为卷积神经网络(Convolutional Neural Networks)和循环神经网络(Recurrent Neural Networks)及其扩展。下面依次介绍这几个模型。</p><div class="md-section-divider"></div><h3 data-anchor-id="3ho4" id="文本卷积神经网络cnn">文本卷积神经网络(CNN)</h3><p data-anchor-id="tb0x">卷积神经网络经常用来处理具有类似网格拓扑结构(grid-like topology)的数据。例如,图像可以视为二维网格的像素点,自然语言可以视为一维的词序列。卷积神经网络可以提取多种局部特征,并对其进行组合抽象得到更高级的特征表示。实验表明,卷积神经网络能高效地对图像及文本问题进行建模处理。 </p><p data-anchor-id="3mtf">卷积神经网络主要由卷积(convolution)和池化(pooling)操作构成,其应用及组合方式灵活多变,种类繁多。本小结我们以一种简单的文本分类卷积神经网络为例进行讲解[<a href="#参考文献">1</a>],如图1所示:</p><p align="center" data-anchor-id="jos4">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/understand_sentiment/image/text_cnn.png" width="700"><br>
图1. 卷积神经网络文本分类模型
</p><p data-anchor-id="514w">假设待处理句子的长度为<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-1-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 600.5 496.10337069604896" style="width: 1.39ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E"></use></g></svg></span><script type="math/tex" id="MathJax-Element-1">n</script>,其中第<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-2-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -682.0516853480245 345.5 714.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use></g></svg></span><script type="math/tex" id="MathJax-Element-2">i</script>个词的词向量(word embedding)为<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-3-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -922.4713132022996 3331.117134844292 1101.0082799245627" style="width: 7.761ex; height: 2.548ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="809" y="-213"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2208" x="1194" y="0"></use><g transform="translate(2139,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJAMS-52"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6B" x="1021" y="581"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-3">x_i\in\mathbb{R}^k</script><span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-4-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 521.5 747.103370696049" style="width: 1.158ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6B"></use></g></svg></span><script type="math/tex" id="MathJax-Element-4">k</script>为维度大小。 </p><p data-anchor-id="gvwb">首先,进行词向量的拼接操作:将每<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-5-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 576.5 747.103370696049" style="width: 1.39ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use></g></svg></span><script type="math/tex" id="MathJax-Element-5">h</script>个词拼接起来形成一个大小为<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-6-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 576.5 747.103370696049" style="width: 1.39ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use></g></svg></span><script type="math/tex" id="MathJax-Element-6">h</script>的词窗口,记为<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-7-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -463.0516853480245 3220.5592860057245 691.7932335345324" style="width: 7.529ex; height: 1.622ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><g transform="translate(572,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3A" x="345" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="624" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="969" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68" x="1747" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="2324" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="3103" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-7">x_{i:i+h-1}</script>,它表示词序列<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-8-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -463.0516853480245 8191.991680149484 691.7932335345324" style="width: 18.996ex; height: 1.622ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="809" y="-213"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="916" y="0"></use><g transform="translate(1361,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><g transform="translate(572,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="345" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1124" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="3183" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2026" x="3628" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="4967" y="0"></use><g transform="translate(5412,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><g transform="translate(572,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="345" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68" x="1124" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="1700" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="2479" y="0"></use></g></g></g></svg></span><script type="math/tex" id="MathJax-Element-8">x_{i},x_{i+1},\ldots,x_{i+h-1}</script>的拼接,其中,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-9-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -682.0516853480245 345.5 714.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use></g></svg></span><script type="math/tex" id="MathJax-Element-9">i</script>表示词窗口中第一个词在整个句子中的位置,取值范围从<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-10-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -687.0516853480245 500.5 708.103370696049" style="width: 1.158ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31"></use></g></svg></span><script type="math/tex" id="MathJax-Element-10">1</script><span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-11-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 4123.388888888889 818.103370696049" style="width: 9.614ex; height: 1.853ex; vertical-align: -0.347ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="822" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68" x="1823" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="2622" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="3622" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-11">n-h+1</script><span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-12-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -922.4713132022996 6042.518087304109 1151.2128613888076" style="width: 14.015ex; height: 2.664ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><g transform="translate(572,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3A" x="345" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="624" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="969" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68" x="1747" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="2324" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="3103" y="0"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2208" x="3498" y="0"></use><g transform="translate(4443,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJAMS-52"></use><g transform="translate(722,410)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6B" x="576" y="0"></use></g></g></g></svg></span><script type="math/tex" id="MathJax-Element-12">x_{i:i+h-1}\in\mathbb{R}^{hk}</script></p><p data-anchor-id="6eru">其次,进行卷积操作:把卷积核(kernel)<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-13-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -922.4713132022996 3538.458801298385 984.522998550324" style="width: 8.224ex; height: 2.317ex; vertical-align: -0.232ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2208" x="994" y="0"></use><g transform="translate(1939,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJAMS-52"></use><g transform="translate(722,410)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6B" x="576" y="0"></use></g></g></g></svg></span><script type="math/tex" id="MathJax-Element-13">w\in\mathbb{R}^{hk}</script>应用于包含<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-14-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 576.5 747.103370696049" style="width: 1.39ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use></g></svg></span><script type="math/tex" id="MathJax-Element-14">h</script>个词的窗口<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-15-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -463.0516853480245 3220.5592860057245 691.7932335345324" style="width: 7.529ex; height: 1.622ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><g transform="translate(572,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3A" x="345" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="624" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="969" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68" x="1747" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="2324" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="3103" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-15">x_{i:i+h-1}</script>,得到特征<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-16-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 9753.809123350122 1042.103370696049" style="width: 22.703ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="613" y="-213"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="1055" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66" x="2111" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="2662" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77" x="3051" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-22C5" x="3990" y="0"></use><g transform="translate(4491,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><g transform="translate(572,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3A" x="345" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="624" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="969" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68" x="1747" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="2324" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="3103" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="7934" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62" x="8934" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="9364" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-16">c_i=f(w\cdot x_{i:i+h-1}+b)</script>,其中<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-17-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 2375.0555555555557 777.103370696049" style="width: 5.56ex; height: 1.853ex; vertical-align: -0.232ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2208" x="707" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJAMS-52" x="1652" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-17">b\in\mathbb{R}</script>为偏置项(bias),<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-18-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -726.0516853480245 550.5 952.103370696049" style="width: 1.274ex; height: 2.201ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66"></use></g></svg></span><script type="math/tex" id="MathJax-Element-18">f</script>为非线性激活函数,如<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-19-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 3528.5 941.103370696049" style="width: 8.224ex; height: 2.201ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-73"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="469" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-67" x="815" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6D" x="1295" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6F" x="2174" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="2659" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-64" x="3005" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-19">sigmoid</script>。将卷积核应用于句子中所有的词窗口<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-20-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -463.0516853480245 10422.206468011855 691.7932335345324" style="width: 24.208ex; height: 1.622ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><g transform="translate(572,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3A" x="500" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68" x="779" y="0"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="1630" y="0"></use><g transform="translate(2076,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><g transform="translate(572,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3A" x="500" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68" x="779" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="1355" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="2134" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="4611" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2026" x="5056" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="6395" y="0"></use><g transform="translate(6841,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><g transform="translate(572,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="600" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68" x="1379" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="1955" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="2734" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3A" x="3234" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="3513" y="0"></use></g></g></g></svg></span><script type="math/tex" id="MathJax-Element-20">{x_{1:h},x_{2:h+1},\ldots,x_{n-h+1:n}}</script>,产生一个特征图(feature map):</p><div class="md-section-divider"></div><p data-anchor-id="adaa"><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-21-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -924.3828982726751 14806.032099907956 1195.4345836206996" style="width: 34.402ex; height: 2.78ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="711" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5B" x="1767" y="0"></use><g transform="translate(2046,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="613" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="2933" y="0"></use><g transform="translate(3378,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="613" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="4266" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2026" x="4711" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="6050" y="0"></use><g transform="translate(6495,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63"></use><g transform="translate(433,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="600" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68" x="1379" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="1955" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="2734" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5D" x="9316" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="9594" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63" x="10039" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2208" x="10751" y="0"></use><g transform="translate(11696,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJAMS-52"></use><g transform="translate(722,412)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="600" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68" x="1379" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="1955" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="2734" y="0"></use></g></g></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-21">c=[c_1,c_2,\ldots,c_{n-h+1}], c \in \mathbb{R}^{n-h+1}</script></p><p data-anchor-id="vkpd">接下来,对特征图采用时间维度上的最大池化(max pooling over time)操作得到此卷积核对应的整句话的特征<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-22-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -702.8583648847041 556.1 735.9100502327285" style="width: 1.274ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63" x="33" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5E" x="55" y="-13"></use></g></svg></span><script type="math/tex" id="MathJax-Element-22">\hat c</script>,它是特征图中所有元素的最大值:</p><div class="md-section-divider"></div><p data-anchor-id="od9l"><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-23-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 5083.155555555555 1042.103370696049" style="width: 11.815ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63" x="33" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5E" x="55" y="-13"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="833" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6D" x="1890" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-61" x="2768" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78" x="3298" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="3870" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63" x="4260" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="4693" y="0"></use></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-23">\hat c=max(c)</script></p><p data-anchor-id="tmo2">在实际应用中,我们会使用多个卷积核来处理句子,窗口大小相同的卷积核堆叠起来形成一个矩阵(上文中的单个卷积核参数<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-24-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 716.5 496.10337069604896" style="width: 1.622ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use></g></svg></span><script type="math/tex" id="MathJax-Element-24">w</script>相当于矩阵的某一行),这样可以更高效的完成运算。另外,我们也可使用窗口大小不同的卷积核来处理句子(图1作为示意画了四个卷积核,不同颜色表示不同大小的卷积核操作)。 </p><p data-anchor-id="pbpk">最后,将所有卷积核得到的特征拼接起来即为文本的定长向量表示,对于文本分类问题,将其连接至softmax即构建出完整的模型。</p><p data-anchor-id="5gyc">对于一般的短文本分类问题,上文所述的简单的文本卷积网络即可达到很高的正确率[<a href="#参考文献">1</a>]。若想得到更抽象更高级的文本特征表示,可以构建深层文本卷积神经网络[<a href="#参考文献">2</a>,<a href="#参考文献">3</a>]。</p><div class="md-section-divider"></div><h3 data-anchor-id="76da" id="循环神经网络rnn">循环神经网络(RNN)</h3><p data-anchor-id="jix2">循环神经网络是一种能对序列数据进行精确建模的有力工具。实际上,循环神经网络的理论计算能力是图灵完备的[<a href="#参考文献">4</a>]。自然语言是一种典型的序列数据(词序列),近年来,循环神经网络及其变体(如long short term memory[<a href="#参考文献">5</a>]等)在自然语言处理的多个领域,如语言模型、句法解析、语义角色标注(或一般的序列标注)、语义表示、图文生成、对话、机器翻译等任务上均表现优异甚至成为目前效果最好的方法。</p><p align="center" data-anchor-id="5qsd">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/understand_sentiment/image/rnn.png" width="400"><br>
图2. 循环神经网络按时间展开的示意图
</p><p data-anchor-id="rwui">循环神经网络按时间展开后如图2所示:在第<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-25-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -647.0516853480245 361.5 679.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use></g></svg></span><script type="math/tex" id="MathJax-Element-25">t</script>时刻,网络读入第<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-26-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -647.0516853480245 361.5 679.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use></g></svg></span><script type="math/tex" id="MathJax-Element-26">t</script>个输入<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-27-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -463.0516853480245 928.1191013989369 641.5886520702876" style="width: 2.201ex; height: 1.506ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="809" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-27">x_t</script>(向量表示)及前一时刻隐层的状态值<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-28-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 1836.5086745365313 893.5886520702876" style="width: 4.286ex; height: 2.085ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><g transform="translate(576,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1140" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-28">h_{t-1}</script>(向量表示,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-29-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 1030.406943983867 901.3668266633396" style="width: 2.432ex; height: 2.085ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-30" x="815" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-29">h_0</script>一般初始化为<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-30-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -687.0516853480245 500.5 730.103370696049" style="width: 1.158ex; height: 1.737ex; vertical-align: -0.232ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-30"></use></g></svg></span><script type="math/tex" id="MathJax-Element-30">0</script>向量),计算得出本时刻隐层的状态值<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-31-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 932.1191013989369 893.5886520702876" style="width: 2.201ex; height: 2.085ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="815" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-31">h_t</script>,重复这一步骤直至读完所有输入。如果将循环神经网络所表示的函数记为<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-32-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -726.0516853480245 550.5 952.103370696049" style="width: 1.274ex; height: 2.201ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66"></use></g></svg></span><script type="math/tex" id="MathJax-Element-32">f</script>,则其公式可表示为:</p><div class="md-section-divider"></div><p data-anchor-id="ffs1"><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-33-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 19507.476147537123 1042.103370696049" style="width: 45.29ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="815" y="-213"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="1209" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66" x="2266" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="2816" y="0"></use><g transform="translate(3206,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="809" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="4134" y="0"></use><g transform="translate(4579,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><g transform="translate(576,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1140" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="6415" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="7083" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3C3" x="8139" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="8712" y="0"></use><g transform="translate(9101,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use><g transform="translate(944,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68" x="572" y="0"></use></g></g><g transform="translate(10958,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="809" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="12108" y="0"></use><g transform="translate(13109,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use><g transform="translate(944,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68" x="576" y="0"></use></g></g><g transform="translate(14969,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><g transform="translate(576,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="576" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1355" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="17180" y="0"></use><g transform="translate(18180,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68" x="607" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="19117" y="0"></use></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-33">h_t=f(x_t,h_{t-1})=\sigma(W_{xh}x_t+W_{hh}h_{h-1}+b_h)</script></p><p data-anchor-id="863e">其中<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-34-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 1856.9656915833432 882.5886520702876" style="width: 4.286ex; height: 2.085ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use><g transform="translate(944,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68" x="572" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-34">W_{xh}</script>是输入到隐层的矩阵参数,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-35-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 1859.7941187080894 882.5886520702876" style="width: 4.286ex; height: 2.085ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use><g transform="translate(944,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68" x="576" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-35">W_{hh}</script>是隐层到隐层的矩阵参数,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-36-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 937.1470593540447 893.5886520702876" style="width: 2.201ex; height: 2.085ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68" x="607" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-36">b_h</script>为隐层的偏置向量(bias)参数,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-37-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -452.0516853480245 572.5 484.10337069604896" style="width: 1.274ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3C3"></use></g></svg></span><script type="math/tex" id="MathJax-Element-37">\sigma</script><span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-38-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 3528.5 941.103370696049" style="width: 8.224ex; height: 2.201ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-73"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="469" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-67" x="815" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6D" x="1295" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6F" x="2174" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="2659" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-64" x="3005" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-38">sigmoid</script>函数。 </p><p data-anchor-id="7ac9">在处理自然语言时,一般会先将词(one-hot表示)映射为其词向量(word embedding)表示,然后再作为循环神经网络每一时刻的输入<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-39-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -463.0516853480245 928.1191013989369 641.5886520702876" style="width: 2.201ex; height: 1.506ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="809" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-39">x_t</script>。此外,可以根据实际需要的不同在循环神经网络的隐层上连接其它层。如,可以把一个循环神经网络的隐层输出连接至下一个循环神经网络的输入构建深层(deep or stacked)循环神经网络,或者提取最后一个时刻的隐层状态作为句子表示进而使用分类模型等等。 </p><div class="md-section-divider"></div><h3 data-anchor-id="opm4" id="长短期记忆网络lstm">长短期记忆网络(LSTM)</h3><p data-anchor-id="puym">对于较长的序列数据,循环神经网络的训练过程中容易出现梯度消失或爆炸现象[<a href="#参考文献">6</a>]。为了解决这一问题,Hochreiter S, Schmidhuber J. (1997)提出了LSTM(long short term memory[<a href="#参考文献">5</a>])。 </p><p data-anchor-id="ornj">相比于简单的循环神经网络,LSTM增加了记忆单元<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-40-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -463.0516853480245 433.5 496.10337069604896" style="width: 1.042ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63"></use></g></svg></span><script type="math/tex" id="MathJax-Element-40">c</script>、输入门<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-41-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -682.0516853480245 345.5 714.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use></g></svg></span><script type="math/tex" id="MathJax-Element-41">i</script>、遗忘门<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-42-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -726.0516853480245 550.5 952.103370696049" style="width: 1.274ex; height: 2.201ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66"></use></g></svg></span><script type="math/tex" id="MathJax-Element-42">f</script>及输出门<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-43-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -462.0516853480245 485.5 494.10337069604896" style="width: 1.158ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6F"></use></g></svg></span><script type="math/tex" id="MathJax-Element-43">o</script>。这些门及记忆单元组合起来大大提升了循环神经网络处理长序列数据的能力。若将基于LSTM的循环神经网络表示的函数记为<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-44-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -701.0516853480245 749.5 722.103370696049" style="width: 1.737ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-46"></use></g></svg></span><script type="math/tex" id="MathJax-Element-44">F</script>,则其公式为:</p><div class="md-section-divider"></div><p data-anchor-id="5cdo"><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-45-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 7004.469099556628 1042.103370696049" style="width: 16.216ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="815" y="-213"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="1209" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-46" x="2266" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="3015" y="0"></use><g transform="translate(3405,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="809" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="4333" y="0"></use><g transform="translate(4778,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><g transform="translate(576,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1140" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="6614" y="0"></use></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-45"> h_t=F(x_t,h_{t-1})</script></p><p data-anchor-id="51da"><span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-46-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -701.0516853480245 749.5 722.103370696049" style="width: 1.737ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-46"></use></g></svg></span><script type="math/tex" id="MathJax-Element-46">F</script>由下列公式组合而成[<a href="#参考文献">7</a>]:</p><div class="md-section-divider"></div><p data-anchor-id="x9vt"><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-47-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 39896.895752895754 6821.757555138181" style="width: 92.664ex; height: 15.869ex; vertical-align: -14.131ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><g transform="translate(11236,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="488" y="-213"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="978" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3C3" x="2035" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="2607" y="0"></use><g transform="translate(2997,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use><g transform="translate(944,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="572" y="0"></use></g></g><g transform="translate(4690,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="809" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="5841" y="0"></use><g transform="translate(6841,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use><g transform="translate(944,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="576" y="0"></use></g></g><g transform="translate(8538,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><g transform="translate(576,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="576" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1355" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="10749" y="0"></use><g transform="translate(11749,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use><g transform="translate(944,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="433" y="0"></use></g></g><g transform="translate(13345,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63"></use><g transform="translate(433,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1140" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="15260" y="0"></use><g transform="translate(16261,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="607" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="17035" y="0"></use></g><g transform="translate(10873,-1433)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="693" y="-213"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="1123" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3C3" x="2180" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="2752" y="0"></use><g transform="translate(3142,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use><g transform="translate(944,-155)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66" x="572" y="0"></use></g></g><g transform="translate(4980,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="809" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="6131" y="0"></use><g transform="translate(7131,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use><g transform="translate(944,-155)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66" x="576" y="0"></use></g></g><g transform="translate(8973,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><g transform="translate(576,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="576" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1355" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="11183" y="0"></use><g transform="translate(12184,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use><g transform="translate(944,-155)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66" x="433" y="0"></use></g></g><g transform="translate(13925,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63"></use><g transform="translate(433,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1140" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="15840" y="0"></use><g transform="translate(16841,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66" x="607" y="-219"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="17760" y="0"></use></g><g transform="translate(9152,-2915)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="613" y="-213"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="1066" y="0"></use><g transform="translate(2123,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="693" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2299" x="3191" y="0"></use><g transform="translate(4192,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63"></use><g transform="translate(433,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1140" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="6107" y="0"></use><g transform="translate(7108,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="488" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2299" x="8032" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="9032" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-61" x="9394" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="9923" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68" x="10524" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="11100" y="0"></use><g transform="translate(11490,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use><g transform="translate(944,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63" x="572" y="0"></use></g></g><g transform="translate(13246,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="809" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="14396" y="0"></use><g transform="translate(15397,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use><g transform="translate(944,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63" x="576" y="0"></use></g></g><g transform="translate(17155,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><g transform="translate(576,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="576" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1355" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="19366" y="0"></use><g transform="translate(20367,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63" x="607" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="21203" y="0"></use></g><g transform="translate(11420,-4348)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6F"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="686" y="-213"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="1118" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3C3" x="2175" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="2747" y="0"></use><g transform="translate(3137,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use><g transform="translate(944,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6F" x="572" y="0"></use></g></g><g transform="translate(4929,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="809" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="6080" y="0"></use><g transform="translate(7080,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use><g transform="translate(944,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6F" x="576" y="0"></use></g></g><g transform="translate(8876,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><g transform="translate(576,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="576" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1355" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="11087" y="0"></use><g transform="translate(12087,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use><g transform="translate(944,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6F" x="433" y="0"></use></g></g><g transform="translate(13782,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="613" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="14793" y="0"></use><g transform="translate(15794,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6F" x="607" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="16666" y="0"></use></g><g transform="translate(15965,-5780)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="815" y="-213"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="1209" y="0"></use><g transform="translate(2266,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6F"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="686" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2299" x="3329" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="4330" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-61" x="4691" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="5221" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68" x="5821" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="6398" y="0"></use><g transform="translate(6787,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="613" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="7576" y="0"></use></g></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-47">i_t = \sigma(W_{xi}x_t+W_{hi}h_{h-1}+W_{ci}c_{t-1}+b_i)\\
f_t = \sigma(W_{xf}x_t+W_{hf}h_{h-1}+W_{cf}c_{t-1}+b_f)\\
c_t = f_t\odot c_{t-1}+i_t\odot tanh(W_{xc}x_t+W_{hc}h_{h-1}+b_c)\\
o_t = \sigma(W_{xo}x_t+W_{ho}h_{h-1}+W_{co}c_{t}+b_o)\\
h_t = o_t\odot tanh(c_t)</script> <br>
其中,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-48-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -726.0516853480245 4512.976405595748 952.103370696049" style="width: 10.425ex; height: 2.201ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="488" y="-213"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="701" y="0"></use><g transform="translate(1146,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="693" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="1992" y="0"></use><g transform="translate(2437,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="613" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="3226" y="0"></use><g transform="translate(3671,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6F"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="686" y="-213"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-48">i_t, f_t, c_t, o_t</script>分别表示输入门,遗忘门,记忆单元及输出门的向量值,带角标的<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-49-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 1048.5 747.103370696049" style="width: 2.432ex; height: 1.737ex; vertical-align: -0.232ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use></g></svg></span><script type="math/tex" id="MathJax-Element-49">W</script><span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-50-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 429.5 747.103370696049" style="width: 1.042ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62"></use></g></svg></span><script type="math/tex" id="MathJax-Element-50">b</script>为模型参数,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-51-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 2068 747.103370696049" style="width: 4.749ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-61" x="361" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="891" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68" x="1491" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-51">tanh</script>为双曲正切函数,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-52-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -604.0516853480245 778.5 708.103370696049" style="width: 1.853ex; height: 1.622ex; vertical-align: -0.347ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2299"></use></g></svg></span><script type="math/tex" id="MathJax-Element-52">\odot</script>表示逐元素(elementwise)的乘法操作。输入门控制着新输入进入记忆单元<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-53-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -463.0516853480245 433.5 496.10337069604896" style="width: 1.042ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63"></use></g></svg></span><script type="math/tex" id="MathJax-Element-53">c</script>的强度,遗忘门控制着记忆单元维持上一时刻值的强度,输出门控制着输出记忆单元的强度。三种门的计算方式类似,但有着完全不同的参数,它们各自以不同的方式控制着记忆单元<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-54-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -463.0516853480245 433.5 496.10337069604896" style="width: 1.042ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63"></use></g></svg></span><script type="math/tex" id="MathJax-Element-54">c</script>,如图3所示:</p><p align="center" data-anchor-id="gztx">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/understand_sentiment/image/lstm.png" width="600"><br>
图3. 时刻<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-55-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -647.0516853480245 361.5 679.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use></g></svg></span><script type="math/tex" id="MathJax-Element-55">t</script>的LSTM [7]
</p><p data-anchor-id="jbb7">LSTM通过给简单的循环神经网络增加记忆及控制门的方式,增强了其处理远距离依赖问题的能力。类似原理的改进还有Gated Recurrent Unit (GRU)[<a href="#参考文献">8</a>],其设计更为简洁一些。<strong>这些改进虽然各有不同,但是它们的宏观描述却与简单的循环神经网络一样(如图2所示),即隐状态依据当前输入及前一时刻的隐状态来改变,不断地循环这一过程直至输入处理完毕:</strong></p><div class="md-section-divider"></div><p data-anchor-id="p2ku"><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-56-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 10818.469099556627 1042.103370696049" style="width: 25.135ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="815" y="-213"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="1209" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-52" x="2266" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65" x="3025" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63" x="3492" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-72" x="3925" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-75" x="4377" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-72" x="4949" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65" x="5401" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="5867" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="6468" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="6829" y="0"></use><g transform="translate(7219,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="809" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="8147" y="0"></use><g transform="translate(8592,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><g transform="translate(576,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1140" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="10428" y="0"></use></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-56"> h_t=Recrurent(x_t,h_{t-1})</script></p><p data-anchor-id="dmbf">其中,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-57-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 4563.5 746.103370696049" style="width: 10.656ex; height: 1.737ex; vertical-align: -0.232ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-52"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65" x="759" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63" x="1226" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-72" x="1659" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-75" x="2111" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-72" x="2683" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65" x="3135" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="3601" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="4202" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-57">Recrurent</script>可以表示简单的循环神经网络、GRU或LSTM。</p><div class="md-section-divider"></div><h3 data-anchor-id="ub28" id="栈式双向lstmstacked-bidirectional-lstm">栈式双向LSTM(Stacked Bidirectional LSTM)</h3><p data-anchor-id="af1z">对于正常顺序的循环神经网络,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-58-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 932.1191013989369 893.5886520702876" style="width: 2.201ex; height: 2.085ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="815" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-58">h_t</script>包含了<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-59-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -647.0516853480245 361.5 679.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use></g></svg></span><script type="math/tex" id="MathJax-Element-59">t</script>时刻之前的输入信息,也就是上文信息。同样,为了得到下文信息,我们可以使用反方向(将输入逆序处理)的循环神经网络。结合构建深层循环神经网络的方法(深层神经网络往往能得到更抽象和高级的特征表示),我们可以通过构建更加强有力的基于LSTM的栈式双向循环神经网络[<a href="#参考文献">9</a>],来对时序数据进行建模。 </p><p data-anchor-id="ozsi">如图4所示(以三层为例),奇数层LSTM正向,偶数层LSTM反向,高一层的LSTM使用低一层LSTM及之前所有层的信息作为输入,对最高层LSTM序列使用时间维度上的最大池化即可得到文本的定长向量表示(这一表示充分融合了文本的上下文信息,并且对文本进行了深层次抽象),最后我们将文本表示连接至softmax构建分类模型。</p><p align="center" data-anchor-id="9fkt">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/understand_sentiment/image/stacked_lstm.jpg" width="450"><br>
图4. 栈式双向LSTM用于文本分类
</p><div class="md-section-divider"></div><h2 data-anchor-id="9vcj" id="数据准备">数据准备</h2><div class="md-section-divider"></div><h3 data-anchor-id="e990" id="数据介绍与下载">数据介绍与下载</h3><p data-anchor-id="ffq3">我们以<a href="http://ai.stanford.edu/%7Eamaas/data/sentiment/" target="_blank">IMDB情感分析数据集</a>为例进行介绍。IMDB数据集的训练集和测试集分别包含25000个已标注过的电影评论。其中,负面评论的得分小于等于4,正面评论的得分大于等于7,满分10分。您可以使用下面的脚本下载 IMDB 数椐集和<a href="http://www.statmt.org/moses/" target="_blank">Moses</a>工具:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="qzag"><ol class="linenums"><li class="L0"><code class="language-bash"><span class="pun">./</span><span class="pln">data</span><span class="pun">/</span><span class="pln">get_imdb</span><span class="pun">.</span><span class="pln">sh</span></code></li></ol></pre><p data-anchor-id="j28h">如果数椐获取成功,您将在目录<code>data</code>中看到下面的文件:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="n1ex"><ol class="linenums"><li class="L0"><code><span class="pln">aclImdb get_imdb</span><span class="pun">.</span><span class="pln">sh imdb mosesdecoder</span><span class="pun">-</span><span class="pln">master</span></code></li></ol></pre><ul data-anchor-id="et3j">
<li>aclImdb: 从外部网站上下载的原始数椐集。</li>
<li>imdb: 仅包含训练和测试数椐集。</li>
<li>mosesdecoder-master: Moses 工具。</li>
</ul><div class="md-section-divider"></div><h3 data-anchor-id="8fo5" id="数据预处理">数据预处理</h3><p data-anchor-id="b7p7">我们使用的预处理脚本为<code>preprocess.py</code>。该脚本会调用Moses工具中的<code>tokenizer.perl</code>脚本来切分单词和标点符号,并会将训练集随机打乱排序再构建字典。注意:我们只使用已标注的训练集和测试集。执行下面的命令就可以预处理数椐:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="hwxu"><ol class="linenums"><li class="L0"><code><span class="pln">data_dir</span><span class="pun">=</span><span class="str">"./data/imdb"</span></code></li><li class="L1"><code><span class="pln">python preprocess</span><span class="pun">.</span><span class="pln">py </span><span class="pun">-</span><span class="pln">i $data_dir</span></code></li></ol></pre><p data-anchor-id="6ow1">运行成功后目录<code>./data/pre-imdb</code> 结构如下:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="vc73"><ol class="linenums"><li class="L0"><code><span class="pln">dict</span><span class="pun">.</span><span class="pln">txt labels</span><span class="pun">.</span><span class="pln">list test</span><span class="pun">.</span><span class="pln">list test_part_000 train</span><span class="pun">.</span><span class="pln">list train_part_000</span></code></li></ol></pre><ul data-anchor-id="zwbh">
<li>test_part_000 和 train_part_000: 所有标记的测试集和训练集,训练集已经随机打乱。</li>
<li>train.list 和 test.list: 训练集和测试集文件列表。</li>
<li>dict.txt: 利用训练集生成的字典。</li>
<li>labels.list: 类别标签列表,标签0表示负面评论,标签1表示正面评论。</li>
</ul><div class="md-section-divider"></div><h3 data-anchor-id="86w9" id="提供数据给paddlepaddle">提供数据给PaddlePaddle</h3><p data-anchor-id="c84a">PaddlePaddle可以读取Python写的传输数据脚本,下面<code>dataprovider.py</code>文件给出了完整例子,主要包括两部分:</p><ul data-anchor-id="dnuq">
<li>hook: 定义文本信息、类别Id的数据类型。文本被定义为整数序列<code>integer_value_sequence</code>,类别被定义为整数<code>integer_value</code></li>
<li>process: 按行读取以<code>'\t\t'</code>分隔的类别ID和文本信息,并用yield关键字返回。</li>
</ul><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="b4ki"><ol class="linenums"><li class="L0"><code class="language-python"><span class="kwd">from</span><span class="pln"> paddle</span><span class="pun">.</span><span class="pln">trainer</span><span class="pun">.</span><span class="typ">PyDataProvider2</span><span class="pln"> </span><span class="kwd">import</span><span class="pln"> </span><span class="pun">*</span></code></li><li class="L1"><code class="language-python"></code></li><li class="L2"><code class="language-python"><span class="kwd">def</span><span class="pln"> hook</span><span class="pun">(</span><span class="pln">settings</span><span class="pun">,</span><span class="pln"> dictionary</span><span class="pun">,</span><span class="pln"> </span><span class="pun">**</span><span class="pln">kwargs</span><span class="pun">):</span></code></li><li class="L3"><code class="language-python"><span class="pln"> settings</span><span class="pun">.</span><span class="pln">word_dict </span><span class="pun">=</span><span class="pln"> dictionary</span></code></li><li class="L4"><code class="language-python"><span class="pln"> settings</span><span class="pun">.</span><span class="pln">input_types </span><span class="pun">=</span><span class="pln"> </span><span class="pun">{</span></code></li><li class="L5"><code class="language-python"><span class="pln"> </span><span class="str">'word'</span><span class="pun">:</span><span class="pln"> integer_value_sequence</span><span class="pun">(</span><span class="pln">len</span><span class="pun">(</span><span class="pln">settings</span><span class="pun">.</span><span class="pln">word_dict</span><span class="pun">)),</span></code></li><li class="L6"><code class="language-python"><span class="pln"> </span><span class="str">'label'</span><span class="pun">:</span><span class="pln"> integer_value</span><span class="pun">(</span><span class="lit">2</span><span class="pun">)</span></code></li><li class="L7"><code class="language-python"><span class="pln"> </span><span class="pun">}</span></code></li><li class="L8"><code class="language-python"><span class="pln"> settings</span><span class="pun">.</span><span class="pln">logger</span><span class="pun">.</span><span class="pln">info</span><span class="pun">(</span><span class="str">'dict len : %d'</span><span class="pln"> </span><span class="pun">%</span><span class="pln"> </span><span class="pun">(</span><span class="pln">len</span><span class="pun">(</span><span class="pln">settings</span><span class="pun">.</span><span class="pln">word_dict</span><span class="pun">)))</span></code></li><li class="L9"><code class="language-python"></code></li><li class="L0"><code class="language-python"></code></li><li class="L1"><code class="language-python"><span class="lit">@provider</span><span class="pun">(</span><span class="pln">init_hook</span><span class="pun">=</span><span class="pln">hook</span><span class="pun">)</span></code></li><li class="L2"><code class="language-python"><span class="kwd">def</span><span class="pln"> process</span><span class="pun">(</span><span class="pln">settings</span><span class="pun">,</span><span class="pln"> file_name</span><span class="pun">):</span></code></li><li class="L3"><code class="language-python"><span class="pln"> </span><span class="kwd">with</span><span class="pln"> open</span><span class="pun">(</span><span class="pln">file_name</span><span class="pun">,</span><span class="pln"> </span><span class="str">'r'</span><span class="pun">)</span><span class="pln"> </span><span class="kwd">as</span><span class="pln"> fdata</span><span class="pun">:</span></code></li><li class="L4"><code class="language-python"><span class="pln"> </span><span class="kwd">for</span><span class="pln"> line_count</span><span class="pun">,</span><span class="pln"> line </span><span class="kwd">in</span><span class="pln"> enumerate</span><span class="pun">(</span><span class="pln">fdata</span><span class="pun">):</span></code></li><li class="L5"><code class="language-python"><span class="pln"> label</span><span class="pun">,</span><span class="pln"> comment </span><span class="pun">=</span><span class="pln"> line</span><span class="pun">.</span><span class="pln">strip</span><span class="pun">().</span><span class="pln">split</span><span class="pun">(</span><span class="str">'\t\t'</span><span class="pun">)</span></code></li><li class="L6"><code class="language-python"><span class="pln"> label </span><span class="pun">=</span><span class="pln"> int</span><span class="pun">(</span><span class="pln">label</span><span class="pun">)</span></code></li><li class="L7"><code class="language-python"><span class="pln"> words </span><span class="pun">=</span><span class="pln"> comment</span><span class="pun">.</span><span class="pln">split</span><span class="pun">()</span></code></li><li class="L8"><code class="language-python"><span class="pln"> word_slot </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[</span></code></li><li class="L9"><code class="language-python"><span class="pln"> settings</span><span class="pun">.</span><span class="pln">word_dict</span><span class="pun">[</span><span class="pln">w</span><span class="pun">]</span><span class="pln"> </span><span class="kwd">for</span><span class="pln"> w </span><span class="kwd">in</span><span class="pln"> words </span><span class="kwd">if</span><span class="pln"> w </span><span class="kwd">in</span><span class="pln"> settings</span><span class="pun">.</span><span class="pln">word_dict</span></code></li><li class="L0"><code class="language-python"><span class="pln"> </span><span class="pun">]</span></code></li><li class="L1"><code class="language-python"><span class="pln"> </span><span class="kwd">yield</span><span class="pln"> </span><span class="pun">{</span></code></li><li class="L2"><code class="language-python"><span class="pln"> </span><span class="str">'word'</span><span class="pun">:</span><span class="pln"> word_slot</span><span class="pun">,</span></code></li><li class="L3"><code class="language-python"><span class="pln"> </span><span class="str">'label'</span><span class="pun">:</span><span class="pln"> label</span></code></li><li class="L4"><code class="language-python"><span class="pln"> </span><span class="pun">}</span></code></li></ol></pre><div class="md-section-divider"></div><h2 data-anchor-id="1cwk" id="模型配置说明">模型配置说明</h2><p data-anchor-id="cd1s"><code>trainer_config.py</code> 是一个配置文件的例子。</p><div class="md-section-divider"></div><h3 data-anchor-id="hhxy" id="数据定义">数据定义</h3><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="e3a9"><ol class="linenums"><li class="L0"><code class="language-python"><span class="kwd">from</span><span class="pln"> os</span><span class="pun">.</span><span class="pln">path </span><span class="kwd">import</span><span class="pln"> join </span><span class="kwd">as</span><span class="pln"> join_path</span></code></li><li class="L1"><code class="language-python"><span class="kwd">from</span><span class="pln"> paddle</span><span class="pun">.</span><span class="pln">trainer_config_helpers </span><span class="kwd">import</span><span class="pln"> </span><span class="pun">*</span></code></li><li class="L2"><code class="language-python"><span class="com"># 是否是测试模式</span></code></li><li class="L3"><code class="language-python"><span class="pln">is_test </span><span class="pun">=</span><span class="pln"> get_config_arg</span><span class="pun">(</span><span class="str">'is_test'</span><span class="pun">,</span><span class="pln"> bool</span><span class="pun">,</span><span class="pln"> </span><span class="kwd">False</span><span class="pun">)</span></code></li><li class="L4"><code class="language-python"><span class="com"># 是否是预测模式</span></code></li><li class="L5"><code class="language-python"><span class="pln">is_predict </span><span class="pun">=</span><span class="pln"> get_config_arg</span><span class="pun">(</span><span class="str">'is_predict'</span><span class="pun">,</span><span class="pln"> bool</span><span class="pun">,</span><span class="pln"> </span><span class="kwd">False</span><span class="pun">)</span></code></li><li class="L6"><code class="language-python"></code></li><li class="L7"><code class="language-python"><span class="com"># 数据路径</span></code></li><li class="L8"><code class="language-python"><span class="pln">data_dir </span><span class="pun">=</span><span class="pln"> </span><span class="str">"./data/pre-imdb"</span></code></li><li class="L9"><code class="language-python"><span class="com"># 文件名</span></code></li><li class="L0"><code class="language-python"><span class="pln">train_list </span><span class="pun">=</span><span class="pln"> </span><span class="str">"train.list"</span></code></li><li class="L1"><code class="language-python"><span class="pln">test_list </span><span class="pun">=</span><span class="pln"> </span><span class="str">"test.list"</span></code></li><li class="L2"><code class="language-python"><span class="pln">dict_file </span><span class="pun">=</span><span class="pln"> </span><span class="str">"dict.txt"</span></code></li><li class="L3"><code class="language-python"></code></li><li class="L4"><code class="language-python"><span class="com"># 字典大小</span></code></li><li class="L5"><code class="language-python"><span class="pln">dict_dim </span><span class="pun">=</span><span class="pln"> len</span><span class="pun">(</span><span class="pln">open</span><span class="pun">(</span><span class="pln">join_path</span><span class="pun">(</span><span class="pln">data_dir</span><span class="pun">,</span><span class="pln"> </span><span class="str">"dict.txt"</span><span class="pun">)).</span><span class="pln">readlines</span><span class="pun">())</span></code></li><li class="L6"><code class="language-python"><span class="com"># 类别个数</span></code></li><li class="L7"><code class="language-python"><span class="pln">class_dim </span><span class="pun">=</span><span class="pln"> len</span><span class="pun">(</span><span class="pln">open</span><span class="pun">(</span><span class="pln">join_path</span><span class="pun">(</span><span class="pln">data_dir</span><span class="pun">,</span><span class="pln"> </span><span class="str">'labels.list'</span><span class="pun">)).</span><span class="pln">readlines</span><span class="pun">())</span></code></li><li class="L8"><code class="language-python"></code></li><li class="L9"><code class="language-python"><span class="kwd">if</span><span class="pln"> </span><span class="kwd">not</span><span class="pln"> is_predict</span><span class="pun">:</span></code></li><li class="L0"><code class="language-python"><span class="pln"> train_list </span><span class="pun">=</span><span class="pln"> join_path</span><span class="pun">(</span><span class="pln">data_dir</span><span class="pun">,</span><span class="pln"> train_list</span><span class="pun">)</span></code></li><li class="L1"><code class="language-python"><span class="pln"> test_list </span><span class="pun">=</span><span class="pln"> join_path</span><span class="pun">(</span><span class="pln">data_dir</span><span class="pun">,</span><span class="pln"> test_list</span><span class="pun">)</span></code></li><li class="L2"><code class="language-python"><span class="pln"> dict_file </span><span class="pun">=</span><span class="pln"> join_path</span><span class="pun">(</span><span class="pln">data_dir</span><span class="pun">,</span><span class="pln"> dict_file</span><span class="pun">)</span></code></li><li class="L3"><code class="language-python"><span class="pln"> train_list </span><span class="pun">=</span><span class="pln"> train_list </span><span class="kwd">if</span><span class="pln"> </span><span class="kwd">not</span><span class="pln"> is_test </span><span class="kwd">else</span><span class="pln"> </span><span class="kwd">None</span></code></li><li class="L4"><code class="language-python"><span class="pln"> </span><span class="com"># 构造字典</span></code></li><li class="L5"><code class="language-python"><span class="pln"> word_dict </span><span class="pun">=</span><span class="pln"> dict</span><span class="pun">()</span></code></li><li class="L6"><code class="language-python"><span class="pln"> </span><span class="kwd">with</span><span class="pln"> open</span><span class="pun">(</span><span class="pln">dict_file</span><span class="pun">,</span><span class="pln"> </span><span class="str">'r'</span><span class="pun">)</span><span class="pln"> </span><span class="kwd">as</span><span class="pln"> f</span><span class="pun">:</span></code></li><li class="L7"><code class="language-python"><span class="pln"> </span><span class="kwd">for</span><span class="pln"> i</span><span class="pun">,</span><span class="pln"> line </span><span class="kwd">in</span><span class="pln"> enumerate</span><span class="pun">(</span><span class="pln">open</span><span class="pun">(</span><span class="pln">dict_file</span><span class="pun">,</span><span class="pln"> </span><span class="str">'r'</span><span class="pun">)):</span></code></li><li class="L8"><code class="language-python"><span class="pln"> word_dict</span><span class="pun">[</span><span class="pln">line</span><span class="pun">.</span><span class="pln">split</span><span class="pun">(</span><span class="str">'\t'</span><span class="pun">)[</span><span class="lit">0</span><span class="pun">]]</span><span class="pln"> </span><span class="pun">=</span><span class="pln"> i</span></code></li><li class="L9"><code class="language-python"><span class="pln"> </span><span class="com"># 通过define_py_data_sources2函数从dataprovider.py中读取数据</span></code></li><li class="L0"><code class="language-python"><span class="pln"> define_py_data_sources2</span><span class="pun">(</span></code></li><li class="L1"><code class="language-python"><span class="pln"> train_list</span><span class="pun">,</span></code></li><li class="L2"><code class="language-python"><span class="pln"> test_list</span><span class="pun">,</span></code></li><li class="L3"><code class="language-python"><span class="pln"> module</span><span class="pun">=</span><span class="str">"dataprovider"</span><span class="pun">,</span></code></li><li class="L4"><code class="language-python"><span class="pln"> obj</span><span class="pun">=</span><span class="str">"process"</span><span class="pun">,</span><span class="pln"> </span><span class="com"># 指定生成数据的函数。</span></code></li><li class="L5"><code class="language-python"><span class="pln"> args</span><span class="pun">={</span><span class="str">'dictionary'</span><span class="pun">:</span><span class="pln"> word_dict</span><span class="pun">})</span><span class="pln"> </span><span class="com"># 额外的参数,这里指定词典。</span></code></li></ol></pre><div class="md-section-divider"></div><h3 data-anchor-id="9w3s" id="算法配置">算法配置</h3><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="ddij"><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">settings</span><span class="pun">(</span></code></li><li class="L1"><code class="language-python"><span class="pln"> batch_size</span><span class="pun">=</span><span class="lit">128</span><span class="pun">,</span></code></li><li class="L2"><code class="language-python"><span class="pln"> learning_rate</span><span class="pun">=</span><span class="lit">2e-3</span><span class="pun">,</span></code></li><li class="L3"><code class="language-python"><span class="pln"> learning_method</span><span class="pun">=</span><span class="typ">AdamOptimizer</span><span class="pun">(),</span></code></li><li class="L4"><code class="language-python"><span class="pln"> regularization</span><span class="pun">=</span><span class="pln">L2Regularization</span><span class="pun">(</span><span class="lit">8e-4</span><span class="pun">),</span></code></li><li class="L5"><code class="language-python"><span class="pln"> gradient_clipping_threshold</span><span class="pun">=</span><span class="lit">25</span><span class="pun">)</span></code></li></ol></pre><ul data-anchor-id="7rlj">
<li>设置batch size大小为128。</li>
<li>设置全局学习率。</li>
<li>使用adam优化。</li>
<li>设置L2正则。</li>
<li>设置梯度截断(clipping)阈值。</li>
</ul><div class="md-section-divider"></div><h3 data-anchor-id="dnjq" id="模型结构">模型结构</h3><p data-anchor-id="y1sz">我们用PaddlePaddle实现了两种文本分类算法,分别基于上文所述的<a href="#文本卷积神经网络(CNN)">文本卷积神经网络</a>和[栈式双向LSTM](#栈式双向LSTM(Stacked Bidirectional LSTM))。</p><div class="md-section-divider"></div><h4 data-anchor-id="kpk4" id="文本卷积神经网络的实现">文本卷积神经网络的实现</h4><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="j6wp"><ol class="linenums"><li class="L0"><code class="language-python"><span class="kwd">def</span><span class="pln"> convolution_net</span><span class="pun">(</span><span class="pln">input_dim</span><span class="pun">,</span></code></li><li class="L1"><code class="language-python"><span class="pln"> class_dim</span><span class="pun">=</span><span class="lit">2</span><span class="pun">,</span></code></li><li class="L2"><code class="language-python"><span class="pln"> emb_dim</span><span class="pun">=</span><span class="lit">128</span><span class="pun">,</span></code></li><li class="L3"><code class="language-python"><span class="pln"> hid_dim</span><span class="pun">=</span><span class="lit">128</span><span class="pun">,</span></code></li><li class="L4"><code class="language-python"><span class="pln"> is_predict</span><span class="pun">=</span><span class="kwd">False</span><span class="pun">):</span></code></li><li class="L5"><code class="language-python"><span class="pln"> </span><span class="com"># 网络输入:id表示的词序列,词典大小为input_dim</span></code></li><li class="L6"><code class="language-python"><span class="pln"> data </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="str">"word"</span><span class="pun">,</span><span class="pln"> input_dim</span><span class="pun">)</span></code></li><li class="L7"><code class="language-python"><span class="pln"> </span><span class="com"># 将id表示的词序列映射为embedding序列</span></code></li><li class="L8"><code class="language-python"><span class="pln"> emb </span><span class="pun">=</span><span class="pln"> embedding_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">data</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">emb_dim</span><span class="pun">)</span></code></li><li class="L9"><code class="language-python"><span class="pln"> </span><span class="com"># 卷积及最大化池操作,卷积核窗口大小为3</span></code></li><li class="L0"><code class="language-python"><span class="pln"> conv_3 </span><span class="pun">=</span><span class="pln"> sequence_conv_pool</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">emb</span><span class="pun">,</span><span class="pln"> context_len</span><span class="pun">=</span><span class="lit">3</span><span class="pun">,</span><span class="pln"> hidden_size</span><span class="pun">=</span><span class="pln">hid_dim</span><span class="pun">)</span></code></li><li class="L1"><code class="language-python"><span class="pln"> </span><span class="com"># 卷积及最大化池操作,卷积核窗口大小为4</span></code></li><li class="L2"><code class="language-python"><span class="pln"> conv_4 </span><span class="pun">=</span><span class="pln"> sequence_conv_pool</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">emb</span><span class="pun">,</span><span class="pln"> context_len</span><span class="pun">=</span><span class="lit">4</span><span class="pun">,</span><span class="pln"> hidden_size</span><span class="pun">=</span><span class="pln">hid_dim</span><span class="pun">)</span></code></li><li class="L3"><code class="language-python"><span class="pln"> </span><span class="com"># 将conv_3和conv_4拼接起来输入给softmax分类,类别数为class_dim</span></code></li><li class="L4"><code class="language-python"><span class="pln"> output </span><span class="pun">=</span><span class="pln"> fc_layer</span><span class="pun">(</span></code></li><li class="L5"><code class="language-python"><span class="pln"> input</span><span class="pun">=[</span><span class="pln">conv_3</span><span class="pun">,</span><span class="pln"> conv_4</span><span class="pun">],</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">class_dim</span><span class="pun">,</span><span class="pln"> act</span><span class="pun">=</span><span class="typ">SoftmaxActivation</span><span class="pun">())</span></code></li><li class="L6"><code class="language-python"></code></li><li class="L7"><code class="language-python"><span class="pln"> </span><span class="kwd">if</span><span class="pln"> </span><span class="kwd">not</span><span class="pln"> is_predict</span><span class="pun">:</span></code></li><li class="L8"><code class="language-python"><span class="pln"> lbl </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="str">"label"</span><span class="pun">,</span><span class="pln"> </span><span class="lit">1</span><span class="pun">)</span><span class="pln"> </span><span class="com">#网络输入:类别标签</span></code></li><li class="L9"><code class="language-python"><span class="pln"> outputs</span><span class="pun">(</span><span class="pln">classification_cost</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">output</span><span class="pun">,</span><span class="pln"> label</span><span class="pun">=</span><span class="pln">lbl</span><span class="pun">))</span></code></li><li class="L0"><code class="language-python"><span class="pln"> </span><span class="kwd">else</span><span class="pun">:</span></code></li><li class="L1"><code class="language-python"><span class="pln"> outputs</span><span class="pun">(</span><span class="pln">output</span><span class="pun">)</span></code></li></ol></pre><p data-anchor-id="f5l6">其中,我们仅用一个<a href="https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/trainer_config_helpers/networks.py" target="_blank"><code>sequence_conv_pool</code></a>方法就实现了卷积和池化操作,卷积核的数量为hidden_size参数。</p><div class="md-section-divider"></div><h4 data-anchor-id="pjm2" id="栈式双向lstm的实现">栈式双向LSTM的实现</h4><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="99ut"><ol class="linenums"><li class="L0"><code class="language-python"><span class="kwd">def</span><span class="pln"> stacked_lstm_net</span><span class="pun">(</span><span class="pln">input_dim</span><span class="pun">,</span></code></li><li class="L1"><code class="language-python"><span class="pln"> class_dim</span><span class="pun">=</span><span class="lit">2</span><span class="pun">,</span></code></li><li class="L2"><code class="language-python"><span class="pln"> emb_dim</span><span class="pun">=</span><span class="lit">128</span><span class="pun">,</span></code></li><li class="L3"><code class="language-python"><span class="pln"> hid_dim</span><span class="pun">=</span><span class="lit">512</span><span class="pun">,</span></code></li><li class="L4"><code class="language-python"><span class="pln"> stacked_num</span><span class="pun">=</span><span class="lit">3</span><span class="pun">,</span></code></li><li class="L5"><code class="language-python"><span class="pln"> is_predict</span><span class="pun">=</span><span class="kwd">False</span><span class="pun">):</span></code></li><li class="L6"><code class="language-python"></code></li><li class="L7"><code class="language-python"><span class="pln"> </span><span class="com"># LSTM的层数stacked_num为奇数,确保最高层LSTM正向</span></code></li><li class="L8"><code class="language-python"><span class="pln"> </span><span class="kwd">assert</span><span class="pln"> stacked_num </span><span class="pun">%</span><span class="pln"> </span><span class="lit">2</span><span class="pln"> </span><span class="pun">==</span><span class="pln"> </span><span class="lit">1</span></code></li><li class="L9"><code class="language-python"><span class="pln"> </span><span class="com"># 设置神经网络层的属性</span></code></li><li class="L0"><code class="language-python"><span class="pln"> layer_attr </span><span class="pun">=</span><span class="pln"> </span><span class="typ">ExtraLayerAttribute</span><span class="pun">(</span><span class="pln">drop_rate</span><span class="pun">=</span><span class="lit">0.5</span><span class="pun">)</span></code></li><li class="L1"><code class="language-python"><span class="pln"> </span><span class="com"># 设置参数的属性</span></code></li><li class="L2"><code class="language-python"><span class="pln"> fc_para_attr </span><span class="pun">=</span><span class="pln"> </span><span class="typ">ParameterAttribute</span><span class="pun">(</span><span class="pln">learning_rate</span><span class="pun">=</span><span class="lit">1e-3</span><span class="pun">)</span></code></li><li class="L3"><code class="language-python"><span class="pln"> lstm_para_attr </span><span class="pun">=</span><span class="pln"> </span><span class="typ">ParameterAttribute</span><span class="pun">(</span><span class="pln">initial_std</span><span class="pun">=</span><span class="lit">0.</span><span class="pun">,</span><span class="pln"> learning_rate</span><span class="pun">=</span><span class="lit">1.</span><span class="pun">)</span></code></li><li class="L4"><code class="language-python"><span class="pln"> para_attr </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[</span><span class="pln">fc_para_attr</span><span class="pun">,</span><span class="pln"> lstm_para_attr</span><span class="pun">]</span></code></li><li class="L5"><code class="language-python"><span class="pln"> bias_attr </span><span class="pun">=</span><span class="pln"> </span><span class="typ">ParameterAttribute</span><span class="pun">(</span><span class="pln">initial_std</span><span class="pun">=</span><span class="lit">0.</span><span class="pun">,</span><span class="pln"> l2_rate</span><span class="pun">=</span><span class="lit">0.</span><span class="pun">)</span></code></li><li class="L6"><code class="language-python"><span class="pln"> </span><span class="com"># 激活函数</span></code></li><li class="L7"><code class="language-python"><span class="pln"> relu </span><span class="pun">=</span><span class="pln"> </span><span class="typ">ReluActivation</span><span class="pun">()</span></code></li><li class="L8"><code class="language-python"><span class="pln"> linear </span><span class="pun">=</span><span class="pln"> </span><span class="typ">LinearActivation</span><span class="pun">()</span></code></li><li class="L9"><code class="language-python"></code></li><li class="L0"><code class="language-python"></code></li><li class="L1"><code class="language-python"><span class="pln"> </span><span class="com"># 网络输入:id表示的词序列,词典大小为input_dim</span></code></li><li class="L2"><code class="language-python"><span class="pln"> data </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="str">"word"</span><span class="pun">,</span><span class="pln"> input_dim</span><span class="pun">)</span></code></li><li class="L3"><code class="language-python"><span class="pln"> </span><span class="com"># 将id表示的词序列映射为embedding序列</span></code></li><li class="L4"><code class="language-python"><span class="pln"> emb </span><span class="pun">=</span><span class="pln"> embedding_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">data</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">emb_dim</span><span class="pun">)</span></code></li><li class="L5"><code class="language-python"></code></li><li class="L6"><code class="language-python"><span class="pln"> fc1 </span><span class="pun">=</span><span class="pln"> fc_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">emb</span><span class="pun">,</span><span class="pln"> size</span><span class="pun">=</span><span class="pln">hid_dim</span><span class="pun">,</span><span class="pln"> act</span><span class="pun">=</span><span class="pln">linear</span><span class="pun">,</span><span class="pln"> bias_attr</span><span class="pun">=</span><span class="pln">bias_attr</span><span class="pun">)</span></code></li><li class="L7"><code class="language-python"><span class="pln"> </span><span class="com"># 基于LSTM的循环神经网络</span></code></li><li class="L8"><code class="language-python"><span class="pln"> lstm1 </span><span class="pun">=</span><span class="pln"> lstmemory</span><span class="pun">(</span></code></li><li class="L9"><code class="language-python"><span class="pln"> input</span><span class="pun">=</span><span class="pln">fc1</span><span class="pun">,</span><span class="pln"> act</span><span class="pun">=</span><span class="pln">relu</span><span class="pun">,</span><span class="pln"> bias_attr</span><span class="pun">=</span><span class="pln">bias_attr</span><span class="pun">,</span><span class="pln"> layer_attr</span><span class="pun">=</span><span class="pln">layer_attr</span><span class="pun">)</span></code></li><li class="L0"><code class="language-python"></code></li><li class="L1"><code class="language-python"><span class="pln"> </span><span class="com"># 由fc_layer和lstmemory构建深度为stacked_num的栈式双向LSTM</span></code></li><li class="L2"><code class="language-python"><span class="pln"> inputs </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[</span><span class="pln">fc1</span><span class="pun">,</span><span class="pln"> lstm1</span><span class="pun">]</span></code></li><li class="L3"><code class="language-python"><span class="pln"> </span><span class="kwd">for</span><span class="pln"> i </span><span class="kwd">in</span><span class="pln"> range</span><span class="pun">(</span><span class="lit">2</span><span class="pun">,</span><span class="pln"> stacked_num </span><span class="pun">+</span><span class="pln"> </span><span class="lit">1</span><span class="pun">):</span></code></li><li class="L4"><code class="language-python"><span class="pln"> fc </span><span class="pun">=</span><span class="pln"> fc_layer</span><span class="pun">(</span></code></li><li class="L5"><code class="language-python"><span class="pln"> input</span><span class="pun">=</span><span class="pln">inputs</span><span class="pun">,</span></code></li><li class="L6"><code class="language-python"><span class="pln"> size</span><span class="pun">=</span><span class="pln">hid_dim</span><span class="pun">,</span></code></li><li class="L7"><code class="language-python"><span class="pln"> act</span><span class="pun">=</span><span class="pln">linear</span><span class="pun">,</span></code></li><li class="L8"><code class="language-python"><span class="pln"> param_attr</span><span class="pun">=</span><span class="pln">para_attr</span><span class="pun">,</span></code></li><li class="L9"><code class="language-python"><span class="pln"> bias_attr</span><span class="pun">=</span><span class="pln">bias_attr</span><span class="pun">)</span></code></li><li class="L0"><code class="language-python"><span class="pln"> lstm </span><span class="pun">=</span><span class="pln"> lstmemory</span><span class="pun">(</span></code></li><li class="L1"><code class="language-python"><span class="pln"> input</span><span class="pun">=</span><span class="pln">fc</span><span class="pun">,</span></code></li><li class="L2"><code class="language-python"><span class="pln"> </span><span class="com"># 奇数层正向,偶数层反向。</span></code></li><li class="L3"><code class="language-python"><span class="pln"> reverse</span><span class="pun">=(</span><span class="pln">i </span><span class="pun">%</span><span class="pln"> </span><span class="lit">2</span><span class="pun">)</span><span class="pln"> </span><span class="pun">==</span><span class="pln"> </span><span class="lit">0</span><span class="pun">,</span></code></li><li class="L4"><code class="language-python"><span class="pln"> act</span><span class="pun">=</span><span class="pln">relu</span><span class="pun">,</span></code></li><li class="L5"><code class="language-python"><span class="pln"> bias_attr</span><span class="pun">=</span><span class="pln">bias_attr</span><span class="pun">,</span></code></li><li class="L6"><code class="language-python"><span class="pln"> layer_attr</span><span class="pun">=</span><span class="pln">layer_attr</span><span class="pun">)</span></code></li><li class="L7"><code class="language-python"><span class="pln"> inputs </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[</span><span class="pln">fc</span><span class="pun">,</span><span class="pln"> lstm</span><span class="pun">]</span></code></li><li class="L8"><code class="language-python"></code></li><li class="L9"><code class="language-python"><span class="pln"> </span><span class="com"># 对最后一层fc_layer使用时间维度上的最大池化得到定长向量</span></code></li><li class="L0"><code class="language-python"><span class="pln"> fc_last </span><span class="pun">=</span><span class="pln"> pooling_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">inputs</span><span class="pun">[</span><span class="lit">0</span><span class="pun">],</span><span class="pln"> pooling_type</span><span class="pun">=</span><span class="typ">MaxPooling</span><span class="pun">())</span></code></li><li class="L1"><code class="language-python"><span class="pln"> </span><span class="com"># 对最后一层lstmemory使用时间维度上的最大池化得到定长向量</span></code></li><li class="L2"><code class="language-python"><span class="pln"> lstm_last </span><span class="pun">=</span><span class="pln"> pooling_layer</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">inputs</span><span class="pun">[</span><span class="lit">1</span><span class="pun">],</span><span class="pln"> pooling_type</span><span class="pun">=</span><span class="typ">MaxPooling</span><span class="pun">())</span></code></li><li class="L3"><code class="language-python"><span class="pln"> </span><span class="com"># 将fc_last和lstm_last拼接起来输入给softmax分类,类别数为class_dim</span></code></li><li class="L4"><code class="language-python"><span class="pln"> output </span><span class="pun">=</span><span class="pln"> fc_layer</span><span class="pun">(</span></code></li><li class="L5"><code class="language-python"><span class="pln"> input</span><span class="pun">=[</span><span class="pln">fc_last</span><span class="pun">,</span><span class="pln"> lstm_last</span><span class="pun">],</span></code></li><li class="L6"><code class="language-python"><span class="pln"> size</span><span class="pun">=</span><span class="pln">class_dim</span><span class="pun">,</span></code></li><li class="L7"><code class="language-python"><span class="pln"> act</span><span class="pun">=</span><span class="typ">SoftmaxActivation</span><span class="pun">(),</span></code></li><li class="L8"><code class="language-python"><span class="pln"> bias_attr</span><span class="pun">=</span><span class="pln">bias_attr</span><span class="pun">,</span></code></li><li class="L9"><code class="language-python"><span class="pln"> param_attr</span><span class="pun">=</span><span class="pln">para_attr</span><span class="pun">)</span></code></li><li class="L0"><code class="language-python"></code></li><li class="L1"><code class="language-python"><span class="pln"> </span><span class="kwd">if</span><span class="pln"> is_predict</span><span class="pun">:</span></code></li><li class="L2"><code class="language-python"><span class="pln"> outputs</span><span class="pun">(</span><span class="pln">output</span><span class="pun">)</span></code></li><li class="L3"><code class="language-python"><span class="pln"> </span><span class="kwd">else</span><span class="pun">:</span></code></li><li class="L4"><code class="language-python"><span class="pln"> outputs</span><span class="pun">(</span><span class="pln">classification_cost</span><span class="pun">(</span><span class="pln">input</span><span class="pun">=</span><span class="pln">output</span><span class="pun">,</span><span class="pln"> label</span><span class="pun">=</span><span class="pln">data_layer</span><span class="pun">(</span><span class="str">'label'</span><span class="pun">,</span><span class="pln"> </span><span class="lit">1</span><span class="pun">)))</span></code></li></ol></pre><p data-anchor-id="z02x">我们的模型配置<code>trainer_config.py</code>默认使用<code>stacked_lstm_net</code>网络,如果要使用<code>convolution_net</code>,注释相应的行即可。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="cw59"><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">stacked_lstm_net</span><span class="pun">(</span></code></li><li class="L1"><code class="language-python"><span class="pln"> dict_dim</span><span class="pun">,</span><span class="pln"> class_dim</span><span class="pun">=</span><span class="pln">class_dim</span><span class="pun">,</span><span class="pln"> stacked_num</span><span class="pun">=</span><span class="lit">3</span><span class="pun">,</span><span class="pln"> is_predict</span><span class="pun">=</span><span class="pln">is_predict</span><span class="pun">)</span></code></li><li class="L2"><code class="language-python"><span class="com"># convolution_net(dict_dim, class_dim=class_dim, is_predict=is_predict)</span></code></li></ol></pre><div class="md-section-divider"></div><h2 data-anchor-id="3lt3" id="训练模型">训练模型</h2><p data-anchor-id="egn3">使用<code>train.sh</code>脚本可以开启本地的训练:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="2rfu"><ol class="linenums"><li class="L0"><code><span class="pun">./</span><span class="pln">train</span><span class="pun">.</span><span class="pln">sh</span></code></li></ol></pre><p data-anchor-id="76gw">train.sh内容如下:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="3469"><ol class="linenums"><li class="L0"><code class="language-bash"><span class="pln">paddle train </span><span class="pun">--</span><span class="pln">config</span><span class="pun">=</span><span class="pln">trainer_config</span><span class="pun">.</span><span class="pln">py \</span></code></li><li class="L1"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">save_dir</span><span class="pun">=./</span><span class="pln">model_output \</span></code></li><li class="L2"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">job</span><span class="pun">=</span><span class="pln">train \</span></code></li><li class="L3"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">use_gpu</span><span class="pun">=</span><span class="pln">false \</span></code></li><li class="L4"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">trainer_count</span><span class="pun">=</span><span class="lit">4</span><span class="pln"> \</span></code></li><li class="L5"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">num_passes</span><span class="pun">=</span><span class="lit">10</span><span class="pln"> \</span></code></li><li class="L6"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">log_period</span><span class="pun">=</span><span class="lit">20</span><span class="pln"> \</span></code></li><li class="L7"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">dot_period</span><span class="pun">=</span><span class="lit">20</span><span class="pln"> \</span></code></li><li class="L8"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">show_parameter_stats_period</span><span class="pun">=</span><span class="lit">100</span><span class="pln"> \</span></code></li><li class="L9"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">test_all_data_in_one_period</span><span class="pun">=</span><span class="lit">1</span><span class="pln"> \</span></code></li><li class="L0"><code class="language-bash"><span class="pln"> </span><span class="lit">2</span><span class="pun">&gt;&amp;</span><span class="lit">1</span><span class="pln"> </span><span class="pun">|</span><span class="pln"> tee </span><span class="str">'train.log'</span></code></li></ol></pre><ul data-anchor-id="nl61">
<li>--config=trainer_config.py: 设置模型配置。</li>
<li>--save_dir=./model_output: 设置输出路径以保存训练完成的模型。</li>
<li>--job=train: 设置工作模式为训练。</li>
<li>--use_gpu=false: 使用CPU训练,如果您安装GPU版本的PaddlePaddle,并想使用GPU来训练可将此设置为true。</li>
<li>--trainer_count=4:设置线程数(或GPU个数)。</li>
<li>--num_passes=15: 设置pass,PaddlePaddle中的一个pass意味着对数据集中的所有样本进行一次训练。</li>
<li>--log_period=20: 每20个batch打印一次日志。</li>
<li>--show_parameter_stats_period=100: 每100个batch打印一次统计信息。</li>
<li>--test_all_data_in_one_period=1: 每次测试都测试所有数据。</li>
</ul><p data-anchor-id="abqo">如果运行成功,输出日志保存在 <code>train.log</code>中,模型保存在目录<code>model_output/</code>中。 输出日志说明如下:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="n33c"><ol class="linenums"><li class="L0"><code><span class="typ">Batch</span><span class="pun">=</span><span class="lit">20</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">2560</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">0.681644</span><span class="pln"> </span><span class="typ">CurrentCost</span><span class="pun">=</span><span class="lit">0.681644</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> classification_error_evaluator</span><span class="pun">=</span><span class="lit">0.36875</span><span class="pln"> </span><span class="typ">CurrentEval</span><span class="pun">:</span><span class="pln"> classification_error_evaluator</span><span class="pun">=</span><span class="lit">0.36875</span></code></li><li class="L1"><code><span class="pun">...</span></code></li><li class="L2"><code><span class="typ">Pass</span><span class="pun">=</span><span class="lit">0</span><span class="pln"> </span><span class="typ">Batch</span><span class="pun">=</span><span class="lit">196</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">25000</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">0.418964</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> classification_error_evaluator</span><span class="pun">=</span><span class="lit">0.1922</span></code></li><li class="L3"><code><span class="typ">Test</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">24999</span><span class="pln"> cost</span><span class="pun">=</span><span class="lit">0.39297</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> classification_error_evaluator</span><span class="pun">=</span><span class="lit">0.149406</span></code></li></ol></pre><ul data-anchor-id="vl9j">
<li>Batch=xx: 表示训练了xx个Batch。</li>
<li>samples=xx: 表示训练了xx个样本。</li>
<li>AvgCost=xx: 从第0个batch到当前batch的平均损失。</li>
<li>CurrentCost=xx: 最新log_period个batch的损失。</li>
<li>Eval: classification_error_evaluator=xx: 表示第0个batch到当前batch的分类错误。</li>
<li>CurrentEval: classification_error_evaluator: 最新log_period个batch的分类错误。</li>
<li>Pass=0: 通过所有训练集一次称为一个Pass。 0表示第一次经过训练集。</li>
</ul><div class="md-section-divider"></div><h2 data-anchor-id="1m2z" id="应用模型">应用模型</h2><div class="md-section-divider"></div><h3 data-anchor-id="948j" id="测试">测试</h3><p data-anchor-id="bhzg">测试是指使用训练出的模型评估已标记的数据集。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="8ejm"><ol class="linenums"><li class="L0"><code><span class="pun">./</span><span class="pln">test</span><span class="pun">.</span><span class="pln">sh</span></code></li></ol></pre><p data-anchor-id="lvuo">测试脚本<code>test.sh</code>的内容如下,其中函数<code>get_best_pass</code>通过对分类错误率进行排序来获得最佳模型:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="5c83"><ol class="linenums"><li class="L0"><code class="language-bash"><span class="kwd">function</span><span class="pln"> get_best_pass</span><span class="pun">()</span><span class="pln"> </span><span class="pun">{</span></code></li><li class="L1"><code class="language-bash"><span class="pln"> cat $1 </span><span class="pun">|</span><span class="pln"> grep </span><span class="pun">-</span><span class="typ">Pzo</span><span class="pln"> </span><span class="str">'Test .*\n.*pass-.*'</span><span class="pln"> </span><span class="pun">|</span><span class="pln"> \</span></code></li><li class="L2"><code class="language-bash"><span class="pln"> sed </span><span class="pun">-</span><span class="pln">r </span><span class="str">'N;s/Test.* error=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g'</span><span class="pln"> </span><span class="pun">|</span><span class="pln"> \</span></code></li><li class="L3"><code class="language-bash"><span class="pln"> sort </span><span class="pun">|</span><span class="pln"> head </span><span class="pun">-</span><span class="pln">n </span><span class="lit">1</span></code></li><li class="L4"><code class="language-bash"><span class="pun">}</span></code></li><li class="L5"><code class="language-bash"></code></li><li class="L6"><code class="language-bash"><span class="pln">log</span><span class="pun">=</span><span class="pln">train</span><span class="pun">.</span><span class="pln">log</span></code></li><li class="L7"><code class="language-bash"><span class="pln">LOG</span><span class="pun">=</span><span class="str">`get_best_pass $log`</span></code></li><li class="L8"><code class="language-bash"><span class="pln">LOG</span><span class="pun">=(</span><span class="pln">$</span><span class="pun">{</span><span class="pln">LOG</span><span class="pun">})</span></code></li><li class="L9"><code class="language-bash"><span class="pln">evaluate_pass</span><span class="pun">=</span><span class="str">"model_output/pass-${LOG[1]}"</span></code></li><li class="L0"><code class="language-bash"></code></li><li class="L1"><code class="language-bash"><span class="pln">echo </span><span class="str">'evaluating from pass '</span><span class="pln">$evaluate_pass</span></code></li><li class="L2"><code class="language-bash"></code></li><li class="L3"><code class="language-bash"><span class="pln">model_list</span><span class="pun">=./</span><span class="pln">model</span><span class="pun">.</span><span class="pln">list</span></code></li><li class="L4"><code class="language-bash"><span class="pln">touch $model_list </span><span class="pun">|</span><span class="pln"> echo $evaluate_pass </span><span class="pun">&gt;</span><span class="pln"> $model_list</span></code></li><li class="L5"><code class="language-bash"><span class="pln">net_conf</span><span class="pun">=</span><span class="pln">trainer_config</span><span class="pun">.</span><span class="pln">py</span></code></li><li class="L6"><code class="language-bash"><span class="pln">paddle train </span><span class="pun">--</span><span class="pln">config</span><span class="pun">=</span><span class="pln">$net_conf \</span></code></li><li class="L7"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">model_list</span><span class="pun">=</span><span class="pln">$model_list \</span></code></li><li class="L8"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">job</span><span class="pun">=</span><span class="pln">test \</span></code></li><li class="L9"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">use_gpu</span><span class="pun">=</span><span class="pln">false \</span></code></li><li class="L0"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">trainer_count</span><span class="pun">=</span><span class="lit">4</span><span class="pln"> \</span></code></li><li class="L1"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">config_args</span><span class="pun">=</span><span class="pln">is_test</span><span class="pun">=</span><span class="lit">1</span><span class="pln"> \</span></code></li><li class="L2"><code class="language-bash"><span class="pln"> </span><span class="lit">2</span><span class="pun">&gt;&amp;</span><span class="lit">1</span><span class="pln"> </span><span class="pun">|</span><span class="pln"> tee </span><span class="str">'test.log'</span></code></li></ol></pre><p data-anchor-id="wfxv">与训练不同,测试时需要指定<code>--job = test</code>和模型路径<code>--model_list = $model_list</code>。如果测试成功,日志将保存在<code>test.log</code>中。 在我们的测试中,最好的模型是<code>model_output/pass-00002</code>,分类错误率是0.115645:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="z1fw"><ol class="linenums"><li class="L0"><code><span class="typ">Pass</span><span class="pun">=</span><span class="lit">0</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">24999</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">0.280471</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> classification_error_evaluator</span><span class="pun">=</span><span class="lit">0.115645</span></code></li></ol></pre><div class="md-section-divider"></div><h3 data-anchor-id="wixb" id="预测">预测</h3><p data-anchor-id="vaqf"><code>predict.py</code>脚本提供了一个预测接口。预测IMDB中未标记评论的示例如下:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="rgnx"><ol class="linenums"><li class="L0"><code><span class="pun">./</span><span class="pln">predict</span><span class="pun">.</span><span class="pln">sh</span></code></li></ol></pre><p data-anchor-id="9d46">predict.sh的内容如下(注意应该确保默认模型路径<code>model_output/pass-00002</code>存在或更改为其它模型路径):</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="76le"><ol class="linenums"><li class="L0"><code class="language-bash"><span class="pln">model</span><span class="pun">=</span><span class="pln">model_output</span><span class="pun">/</span><span class="pln">pass</span><span class="pun">-</span><span class="lit">00002</span><span class="pun">/</span></code></li><li class="L1"><code class="language-bash"><span class="pln">config</span><span class="pun">=</span><span class="pln">trainer_config</span><span class="pun">.</span><span class="pln">py</span></code></li><li class="L2"><code class="language-bash"><span class="pln">label</span><span class="pun">=</span><span class="pln">data</span><span class="pun">/</span><span class="pln">pre</span><span class="pun">-</span><span class="pln">imdb</span><span class="pun">/</span><span class="pln">labels</span><span class="pun">.</span><span class="pln">list</span></code></li><li class="L3"><code class="language-bash"><span class="pln">cat </span><span class="pun">./</span><span class="pln">data</span><span class="pun">/</span><span class="pln">aclImdb</span><span class="pun">/</span><span class="pln">test</span><span class="pun">/</span><span class="pln">pos</span><span class="pun">/</span><span class="lit">10007</span><span class="pln">_10</span><span class="pun">.</span><span class="pln">txt </span><span class="pun">|</span><span class="pln"> python predict</span><span class="pun">.</span><span class="pln">py \</span></code></li><li class="L4"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">tconf</span><span class="pun">=</span><span class="pln">$config \</span></code></li><li class="L5"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">model</span><span class="pun">=</span><span class="pln">$model \</span></code></li><li class="L6"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">label</span><span class="pun">=</span><span class="pln">$label \</span></code></li><li class="L7"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">dict</span><span class="pun">=./</span><span class="pln">data</span><span class="pun">/</span><span class="pln">pre</span><span class="pun">-</span><span class="pln">imdb</span><span class="pun">/</span><span class="pln">dict</span><span class="pun">.</span><span class="pln">txt \</span></code></li><li class="L8"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">batch_size</span><span class="pun">=</span><span class="lit">1</span></code></li></ol></pre><ul data-anchor-id="ho4q">
<li><code>cat ./data/aclImdb/test/pos/10007_10.txt</code> : 输入预测样本。</li>
<li><code>predict.py</code> : 预测接口脚本。</li>
<li><code>--tconf=$config</code> : 设置网络配置。</li>
<li><code>--model=$model</code> : 设置模型路径。</li>
<li><code>--label=$label</code> : 设置标签类别字典,这个字典是整数标签和字符串标签的一个对应。</li>
<li><code>--dict=data/pre-imdb/dict.txt</code> : 设置文本数据字典文件。</li>
<li><code>--batch_size=1</code> : 预测时的batch size大小。</li>
</ul><p data-anchor-id="31ln">本示例的预测结果:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="m6m2"><ol class="linenums"><li class="L0"><code><span class="typ">Loading</span><span class="pln"> parameters </span><span class="kwd">from</span><span class="pln"> model_output</span><span class="pun">/</span><span class="kwd">pass</span><span class="pun">-</span><span class="lit">00002</span><span class="pun">/</span></code></li><li class="L1"><code><span class="pln">predicting label </span><span class="kwd">is</span><span class="pln"> pos</span></code></li></ol></pre><p data-anchor-id="681q"><code>10007_10.txt</code>在路径<code>./data/aclImdb/test/pos</code>下面,而这里预测的标签也是pos,说明预测正确。</p><div class="md-section-divider"></div><h2 data-anchor-id="3bzc" id="总结">总结</h2><p data-anchor-id="vu3c">本章我们以情感分析为例,介绍了使用深度学习的方法进行端对端的短文本分类,并且使用PaddlePaddle完成了全部相关实验。同时,我们简要介绍了两种文本处理模型:卷积神经网络和循环神经网络。在后续的章节中我们会看到这两种基本的深度学习模型在其它任务上的应用。</p><div class="md-section-divider"></div><h2 data-anchor-id="pfob" id="参考文献">参考文献</h2><ol data-anchor-id="10o8">
<li>Kim Y. <a href="http://arxiv.org/pdf/1408.5882" target="_blank">Convolutional neural networks for sentence classification</a>[J]. arXiv preprint arXiv:1408.5882, 2014.</li>
<li>Kalchbrenner N, Grefenstette E, Blunsom P. <a href="http://arxiv.org/pdf/1404.2188.pdf?utm_medium=App.net&amp;utm_source=PourOver" target="_blank">A convolutional neural network for modelling sentences</a>[J]. arXiv preprint arXiv:1404.2188, 2014.</li>
<li>Yann N. Dauphin, et al. <a href="https://arxiv.org/pdf/1612.08083v1.pdf" target="_blank">Language Modeling with Gated Convolutional Networks</a>[J] arXiv preprint arXiv:1612.08083, 2016.</li>
<li>Siegelmann H T, Sontag E D. <a href="http://research.cs.queensu.ca/home/akl/cisc879/papers/SELECTED_PAPERS_FROM_VARIOUS_SOURCES/05070215382317071.pdf" target="_blank">On the computational power of neural nets</a>[C]//Proceedings of the fifth annual workshop on Computational learning theory. ACM, 1992: 440-449.</li>
<li>Hochreiter S, Schmidhuber J. <a href="http://web.eecs.utk.edu/~itamar/courses/ECE-692/Bobby_paper1.pdf" target="_blank">Long short-term memory</a>[J]. Neural computation, 1997, 9(8): 1735-1780.</li>
<li>Bengio Y, Simard P, Frasconi P. <a href="http://www-dsi.ing.unifi.it/~paolo/ps/tnn-94-gradient.pdf" target="_blank">Learning long-term dependencies with gradient descent is difficult</a>[J]. IEEE transactions on neural networks, 1994, 5(2): 157-166.</li>
<li>Graves A. <a href="http://arxiv.org/pdf/1308.0850" target="_blank">Generating sequences with recurrent neural networks</a>[J]. arXiv preprint arXiv:1308.0850, 2013.</li>
<li>Cho K, Van Merriënboer B, Gulcehre C, et al. <a href="http://arxiv.org/pdf/1406.1078" target="_blank">Learning phrase representations using RNN encoder-decoder for statistical machine translation</a>[J]. arXiv preprint arXiv:1406.1078, 2014.</li>
<li>Zhou J, Xu W. <a href="http://www.aclweb.org/anthology/P/P15/P15-1109.pdf" target="_blank">End-to-end learning of semantic role labeling using recurrent neural networks</a>[C]//Proceedings of the Annual Meeting of the Association for Computational Linguistics. 2015.</li>
</ol><p data-anchor-id="cnzl"><br> <br>
<img src="https://i.creativecommons.org/l/by-nc-sa/4.0/88x31.png" alt="知识共享许可协议"></p><p data-anchor-id="jbzi">本教程由<a href="http://book.paddlepaddle.org" target="_blank">PaddlePaddle</a>创作,采用<a href="http://creativecommons.org/licenses/by-nc-sa/4.0/" target="_blank">知识共享 署名-非商业性使用-相同方式共享 4.0 国际 许可协议</a>进行许可。</p></div>
</body>
</html>
......@@ -528,6 +528,7 @@ marked.setOptions({
code = code.replace(/&amp;/g, "&")
code = code.replace(/&gt;/g, ">")
code = code.replace(/&lt;/g, "<")
code = code.replace(/&nbsp;/g, " ")
return hljs.highlightAuto(code, [lang]).value;
}
});
......
<!DOCTYPE html>
<html class="theme theme-white">
<head>
<meta charset="utf-8">
<title>词向量</title>
<link href="https://www.zybuluo.com/static/assets/template-theme-white.css" rel="stylesheet" media="screen">
<style type="text/css">
#wmd-preview h1 {
color: #0077bb; /* 将标题改为蓝色 */
}</style>
</head>
<body class="theme theme-white">
<div style="visibility: hidden; overflow: hidden; position: absolute; top: 0px; height: 1px; width: auto; padding: 0px; border: 0px; margin: 0px; text-align: left; text-indent: 0px; text-transform: none; line-height: normal; letter-spacing: normal; word-spacing: normal;"><div id="MathJax_SVG_Hidden"></div><svg><defs id="MathJax_SVG_glyphs"><path id="MJMATHI-65" stroke-width="1" d="M39 168Q39 225 58 272T107 350T174 402T244 433T307 442H310Q355 442 388 420T421 355Q421 265 310 237Q261 224 176 223Q139 223 138 221Q138 219 132 186T125 128Q125 81 146 54T209 26T302 45T394 111Q403 121 406 121Q410 121 419 112T429 98T420 82T390 55T344 24T281 -1T205 -11Q126 -11 83 42T39 168ZM373 353Q367 405 305 405Q272 405 244 391T199 357T170 316T154 280T149 261Q149 260 169 260Q282 260 327 284T373 353Z"></path><path id="MJMATHI-6D" stroke-width="1" d="M21 287Q22 293 24 303T36 341T56 388T88 425T132 442T175 435T205 417T221 395T229 376L231 369Q231 367 232 367L243 378Q303 442 384 442Q401 442 415 440T441 433T460 423T475 411T485 398T493 385T497 373T500 364T502 357L510 367Q573 442 659 442Q713 442 746 415T780 336Q780 285 742 178T704 50Q705 36 709 31T724 26Q752 26 776 56T815 138Q818 149 821 151T837 153Q857 153 857 145Q857 144 853 130Q845 101 831 73T785 17T716 -10Q669 -10 648 17T627 73Q627 92 663 193T700 345Q700 404 656 404H651Q565 404 506 303L499 291L466 157Q433 26 428 16Q415 -11 385 -11Q372 -11 364 -4T353 8T350 18Q350 29 384 161L420 307Q423 322 423 345Q423 404 379 404H374Q288 404 229 303L222 291L189 157Q156 26 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 112 181Q151 335 151 342Q154 357 154 369Q154 405 129 405Q107 405 92 377T69 316T57 280Q55 278 41 278H27Q21 284 21 287Z"></path><path id="MJMATHI-62" stroke-width="1" d="M73 647Q73 657 77 670T89 683Q90 683 161 688T234 694Q246 694 246 685T212 542Q204 508 195 472T180 418L176 399Q176 396 182 402Q231 442 283 442Q345 442 383 396T422 280Q422 169 343 79T173 -11Q123 -11 82 27T40 150V159Q40 180 48 217T97 414Q147 611 147 623T109 637Q104 637 101 637H96Q86 637 83 637T76 640T73 647ZM336 325V331Q336 405 275 405Q258 405 240 397T207 376T181 352T163 330L157 322L136 236Q114 150 114 114Q114 66 138 42Q154 26 178 26Q211 26 245 58Q270 81 285 114T318 219Q336 291 336 325Z"></path><path id="MJMATHI-64" stroke-width="1" d="M366 683Q367 683 438 688T511 694Q523 694 523 686Q523 679 450 384T375 83T374 68Q374 26 402 26Q411 27 422 35Q443 55 463 131Q469 151 473 152Q475 153 483 153H487H491Q506 153 506 145Q506 140 503 129Q490 79 473 48T445 8T417 -8Q409 -10 393 -10Q359 -10 336 5T306 36L300 51Q299 52 296 50Q294 48 292 46Q233 -10 172 -10Q117 -10 75 30T33 157Q33 205 53 255T101 341Q148 398 195 420T280 442Q336 442 364 400Q369 394 369 396Q370 400 396 505T424 616Q424 629 417 632T378 637H357Q351 643 351 645T353 664Q358 683 366 683ZM352 326Q329 405 277 405Q242 405 210 374T160 293Q131 214 119 129Q119 126 119 118T118 106Q118 61 136 44T179 26Q233 26 290 98L298 109L352 326Z"></path><path id="MJMATHI-69" stroke-width="1" d="M184 600Q184 624 203 642T247 661Q265 661 277 649T290 619Q290 596 270 577T226 557Q211 557 198 567T184 600ZM21 287Q21 295 30 318T54 369T98 420T158 442Q197 442 223 419T250 357Q250 340 236 301T196 196T154 83Q149 61 149 51Q149 26 166 26Q175 26 185 29T208 43T235 78T260 137Q263 149 265 151T282 153Q302 153 302 143Q302 135 293 112T268 61T223 11T161 -11Q129 -11 102 10T74 74Q74 91 79 106T122 220Q160 321 166 341T173 380Q173 404 156 404H154Q124 404 99 371T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Z"></path><path id="MJMATHI-6E" stroke-width="1" d="M21 287Q22 293 24 303T36 341T56 388T89 425T135 442Q171 442 195 424T225 390T231 369Q231 367 232 367L243 378Q304 442 382 442Q436 442 469 415T503 336T465 179T427 52Q427 26 444 26Q450 26 453 27Q482 32 505 65T540 145Q542 153 560 153Q580 153 580 145Q580 144 576 130Q568 101 554 73T508 17T439 -10Q392 -10 371 17T350 73Q350 92 386 193T423 345Q423 404 379 404H374Q288 404 229 303L222 291L189 157Q156 26 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 112 180T152 343Q153 348 153 366Q153 405 129 405Q91 405 66 305Q60 285 60 284Q58 278 41 278H27Q21 284 21 287Z"></path><path id="MJMATHI-67" stroke-width="1" d="M311 43Q296 30 267 15T206 0Q143 0 105 45T66 160Q66 265 143 353T314 442Q361 442 401 394L404 398Q406 401 409 404T418 412T431 419T447 422Q461 422 470 413T480 394Q480 379 423 152T363 -80Q345 -134 286 -169T151 -205Q10 -205 10 -137Q10 -111 28 -91T74 -71Q89 -71 102 -80T116 -111Q116 -121 114 -130T107 -144T99 -154T92 -162L90 -164H91Q101 -167 151 -167Q189 -167 211 -155Q234 -144 254 -122T282 -75Q288 -56 298 -13Q311 35 311 43ZM384 328L380 339Q377 350 375 354T369 368T359 382T346 393T328 402T306 405Q262 405 221 352Q191 313 171 233T151 117Q151 38 213 38Q269 38 323 108L331 118L384 328Z"></path><path id="MJMAIN-28" stroke-width="1" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path id="MJMAIN-29" stroke-width="1" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path><path id="MJMAIN-3D" stroke-width="1" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path id="MJMAIN-5B" stroke-width="1" d="M118 -250V750H255V710H158V-210H255V-250H118Z"></path><path id="MJMAIN-30" stroke-width="1" d="M96 585Q152 666 249 666Q297 666 345 640T423 548Q460 465 460 320Q460 165 417 83Q397 41 362 16T301 -15T250 -22Q224 -22 198 -16T137 16T82 83Q39 165 39 320Q39 494 96 585ZM321 597Q291 629 250 629Q208 629 178 597Q153 571 145 525T137 333Q137 175 145 125T181 46Q209 16 250 16Q290 16 318 46Q347 76 354 130T362 333Q362 478 354 524T321 597Z"></path><path id="MJMAIN-2E" stroke-width="1" d="M78 60Q78 84 95 102T138 120Q162 120 180 104T199 61Q199 36 182 18T139 0T96 17T78 60Z"></path><path id="MJMAIN-33" stroke-width="1" d="M127 463Q100 463 85 480T69 524Q69 579 117 622T233 665Q268 665 277 664Q351 652 390 611T430 522Q430 470 396 421T302 350L299 348Q299 347 308 345T337 336T375 315Q457 262 457 175Q457 96 395 37T238 -22Q158 -22 100 21T42 130Q42 158 60 175T105 193Q133 193 151 175T169 130Q169 119 166 110T159 94T148 82T136 74T126 70T118 67L114 66Q165 21 238 21Q293 21 321 74Q338 107 338 175V195Q338 290 274 322Q259 328 213 329L171 330L168 332Q166 335 166 348Q166 366 174 366Q202 366 232 371Q266 376 294 413T322 525V533Q322 590 287 612Q265 626 240 626Q208 626 181 615T143 592T132 580H135Q138 579 143 578T153 573T165 566T175 555T183 540T186 520Q186 498 172 481T127 463Z"></path><path id="MJMAIN-2C" stroke-width="1" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path><path id="MJMAIN-34" stroke-width="1" d="M462 0Q444 3 333 3Q217 3 199 0H190V46H221Q241 46 248 46T265 48T279 53T286 61Q287 63 287 115V165H28V211L179 442Q332 674 334 675Q336 677 355 677H373L379 671V211H471V165H379V114Q379 73 379 66T385 54Q393 47 442 46H471V0H462ZM293 211V545L74 212L183 211H293Z"></path><path id="MJMAIN-32" stroke-width="1" d="M109 429Q82 429 66 447T50 491Q50 562 103 614T235 666Q326 666 387 610T449 465Q449 422 429 383T381 315T301 241Q265 210 201 149L142 93L218 92Q375 92 385 97Q392 99 409 186V189H449V186Q448 183 436 95T421 3V0H50V19V31Q50 38 56 46T86 81Q115 113 136 137Q145 147 170 174T204 211T233 244T261 278T284 308T305 340T320 369T333 401T340 431T343 464Q343 527 309 573T212 619Q179 619 154 602T119 569T109 550Q109 549 114 549Q132 549 151 535T170 489Q170 464 154 447T109 429Z"></path><path id="MJMAIN-2212" stroke-width="1" d="M84 237T84 250T98 270H679Q694 262 694 250T679 230H98Q84 237 84 250Z"></path><path id="MJMAIN-31" stroke-width="1" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path><path id="MJMAIN-35" stroke-width="1" d="M164 157Q164 133 148 117T109 101H102Q148 22 224 22Q294 22 326 82Q345 115 345 210Q345 313 318 349Q292 382 260 382H254Q176 382 136 314Q132 307 129 306T114 304Q97 304 95 310Q93 314 93 485V614Q93 664 98 664Q100 666 102 666Q103 666 123 658T178 642T253 634Q324 634 389 662Q397 666 402 666Q410 666 410 648V635Q328 538 205 538Q174 538 149 544L139 546V374Q158 388 169 396T205 412T256 420Q337 420 393 355T449 201Q449 109 385 44T229 -22Q148 -22 99 32T50 154Q50 178 61 192T84 210T107 214Q132 214 148 197T164 157Z"></path><path id="MJMAIN-5D" stroke-width="1" d="M22 710V750H159V-250H22V-210H119V710H22Z"></path><path id="MJMAIN-36" stroke-width="1" d="M42 313Q42 476 123 571T303 666Q372 666 402 630T432 550Q432 525 418 510T379 495Q356 495 341 509T326 548Q326 592 373 601Q351 623 311 626Q240 626 194 566Q147 500 147 364L148 360Q153 366 156 373Q197 433 263 433H267Q313 433 348 414Q372 400 396 374T435 317Q456 268 456 210V192Q456 169 451 149Q440 90 387 34T253 -22Q225 -22 199 -14T143 16T92 75T56 172T42 313ZM257 397Q227 397 205 380T171 335T154 278T148 216Q148 133 160 97T198 39Q222 21 251 21Q302 21 329 59Q342 77 347 104T352 209Q352 289 347 316T329 361Q302 397 257 397Z"></path><path id="MJMATHI-58" stroke-width="1" d="M42 0H40Q26 0 26 11Q26 15 29 27Q33 41 36 43T55 46Q141 49 190 98Q200 108 306 224T411 342Q302 620 297 625Q288 636 234 637H206Q200 643 200 645T202 664Q206 677 212 683H226Q260 681 347 681Q380 681 408 681T453 682T473 682Q490 682 490 671Q490 670 488 658Q484 643 481 640T465 637Q434 634 411 620L488 426L541 485Q646 598 646 610Q646 628 622 635Q617 635 609 637Q594 637 594 648Q594 650 596 664Q600 677 606 683H618Q619 683 643 683T697 681T738 680Q828 680 837 683H845Q852 676 852 672Q850 647 840 637H824Q790 636 763 628T722 611T698 593L687 584Q687 585 592 480L505 384Q505 383 536 304T601 142T638 56Q648 47 699 46Q734 46 734 37Q734 35 732 23Q728 7 725 4T711 1Q708 1 678 1T589 2Q528 2 496 2T461 1Q444 1 444 10Q444 11 446 25Q448 35 450 39T455 44T464 46T480 47T506 54Q523 62 523 64Q522 64 476 181L429 299Q241 95 236 84Q232 76 232 72Q232 53 261 47Q262 47 267 47T273 46Q276 46 277 46T280 45T283 42T284 35Q284 26 282 19Q279 6 276 4T261 1Q258 1 243 1T201 2T142 2Q64 2 42 0Z"></path><path id="MJMAIN-7C" stroke-width="1" d="M139 -249H137Q125 -249 119 -235V251L120 737Q130 750 139 750Q152 750 159 735V-235Q151 -249 141 -249H139Z"></path><path id="MJMATHI-56" stroke-width="1" d="M52 648Q52 670 65 683H76Q118 680 181 680Q299 680 320 683H330Q336 677 336 674T334 656Q329 641 325 637H304Q282 635 274 635Q245 630 242 620Q242 618 271 369T301 118L374 235Q447 352 520 471T595 594Q599 601 599 609Q599 633 555 637Q537 637 537 648Q537 649 539 661Q542 675 545 679T558 683Q560 683 570 683T604 682T668 681Q737 681 755 683H762Q769 676 769 672Q769 655 760 640Q757 637 743 637Q730 636 719 635T698 630T682 623T670 615T660 608T652 599T645 592L452 282Q272 -9 266 -16Q263 -18 259 -21L241 -22H234Q216 -22 216 -15Q213 -9 177 305Q139 623 138 626Q133 637 76 637H59Q52 642 52 648Z"></path><path id="MJMAIN-D7" stroke-width="1" d="M630 29Q630 9 609 9Q604 9 587 25T493 118L389 222L284 117Q178 13 175 11Q171 9 168 9Q160 9 154 15T147 29Q147 36 161 51T255 146L359 250L255 354Q174 435 161 449T147 471Q147 480 153 485T168 490Q173 490 175 489Q178 487 284 383L389 278L493 382Q570 459 587 475T609 491Q630 491 630 471Q630 464 620 453T522 355L418 250L522 145Q606 61 618 48T630 29Z"></path><path id="MJMATHI-6A" stroke-width="1" d="M297 596Q297 627 318 644T361 661Q378 661 389 651T403 623Q403 595 384 576T340 557Q322 557 310 567T297 596ZM288 376Q288 405 262 405Q240 405 220 393T185 362T161 325T144 293L137 279Q135 278 121 278H107Q101 284 101 286T105 299Q126 348 164 391T252 441Q253 441 260 441T272 442Q296 441 316 432Q341 418 354 401T367 348V332L318 133Q267 -67 264 -75Q246 -125 194 -164T75 -204Q25 -204 7 -183T-12 -137Q-12 -110 7 -91T53 -71Q70 -71 82 -81T95 -112Q95 -148 63 -167Q69 -168 77 -168Q111 -168 139 -140T182 -74L193 -32Q204 11 219 72T251 197T278 308T289 365Q289 372 288 376Z"></path><path id="MJMATHI-55" stroke-width="1" d="M107 637Q73 637 71 641Q70 643 70 649Q70 673 81 682Q83 683 98 683Q139 681 234 681Q268 681 297 681T342 682T362 682Q378 682 378 672Q378 670 376 658Q371 641 366 638H364Q362 638 359 638T352 638T343 637T334 637Q295 636 284 634T266 623Q265 621 238 518T184 302T154 169Q152 155 152 140Q152 86 183 55T269 24Q336 24 403 69T501 205L552 406Q599 598 599 606Q599 633 535 637Q511 637 511 648Q511 650 513 660Q517 676 519 679T529 683Q532 683 561 682T645 680Q696 680 723 681T752 682Q767 682 767 672Q767 650 759 642Q756 637 737 637Q666 633 648 597Q646 592 598 404Q557 235 548 205Q515 105 433 42T263 -22Q171 -22 116 34T60 167V183Q60 201 115 421Q164 622 164 628Q164 635 107 637Z"></path><path id="MJMATHI-53" stroke-width="1" d="M308 24Q367 24 416 76T466 197Q466 260 414 284Q308 311 278 321T236 341Q176 383 176 462Q176 523 208 573T273 648Q302 673 343 688T407 704H418H425Q521 704 564 640Q565 640 577 653T603 682T623 704Q624 704 627 704T632 705Q645 705 645 698T617 577T585 459T569 456Q549 456 549 465Q549 471 550 475Q550 478 551 494T553 520Q553 554 544 579T526 616T501 641Q465 662 419 662Q362 662 313 616T263 510Q263 480 278 458T319 427Q323 425 389 408T456 390Q490 379 522 342T554 242Q554 216 546 186Q541 164 528 137T492 78T426 18T332 -20Q320 -22 298 -22Q199 -22 144 33L134 44L106 13Q83 -14 78 -18T65 -22Q52 -22 52 -14Q52 -11 110 221Q112 227 130 227H143Q149 221 149 216Q149 214 148 207T144 186T142 153Q144 114 160 87T203 47T255 29T308 24Z"></path><path id="MJMATHI-54" stroke-width="1" d="M40 437Q21 437 21 445Q21 450 37 501T71 602L88 651Q93 669 101 677H569H659Q691 677 697 676T704 667Q704 661 687 553T668 444Q668 437 649 437Q640 437 637 437T631 442L629 445Q629 451 635 490T641 551Q641 586 628 604T573 629Q568 630 515 631Q469 631 457 630T439 622Q438 621 368 343T298 60Q298 48 386 46Q418 46 427 45T436 36Q436 31 433 22Q429 4 424 1L422 0Q419 0 415 0Q410 0 363 1T228 2Q99 2 64 0H49Q43 6 43 9T45 27Q49 40 55 46H83H94Q174 46 189 55Q190 56 191 56Q196 59 201 76T241 233Q258 301 269 344Q339 619 339 625Q339 630 310 630H279Q212 630 191 624Q146 614 121 583T67 467Q60 445 57 441T43 437H40Z"></path><path id="MJMAIN-2217" stroke-width="1" d="M229 286Q216 420 216 436Q216 454 240 464Q241 464 245 464T251 465Q263 464 273 456T283 436Q283 419 277 356T270 286L328 328Q384 369 389 372T399 375Q412 375 423 365T435 338Q435 325 425 315Q420 312 357 282T289 250L355 219L425 184Q434 175 434 161Q434 146 425 136T401 125Q393 125 383 131T328 171L270 213Q283 79 283 63Q283 53 276 44T250 35Q231 35 224 44T216 63Q216 80 222 143T229 213L171 171Q115 130 110 127Q106 124 100 124Q87 124 76 134T64 161Q64 166 64 169T67 175T72 181T81 188T94 195T113 204T138 215T170 230T210 250L74 315Q65 324 65 338Q65 353 74 363T98 374Q106 374 116 368T171 328L229 286Z"></path><path id="MJMATHI-77" stroke-width="1" d="M580 385Q580 406 599 424T641 443Q659 443 674 425T690 368Q690 339 671 253Q656 197 644 161T609 80T554 12T482 -11Q438 -11 404 5T355 48Q354 47 352 44Q311 -11 252 -11Q226 -11 202 -5T155 14T118 53T104 116Q104 170 138 262T173 379Q173 380 173 381Q173 390 173 393T169 400T158 404H154Q131 404 112 385T82 344T65 302T57 280Q55 278 41 278H27Q21 284 21 287Q21 293 29 315T52 366T96 418T161 441Q204 441 227 416T250 358Q250 340 217 250T184 111Q184 65 205 46T258 26Q301 26 334 87L339 96V119Q339 122 339 128T340 136T341 143T342 152T345 165T348 182T354 206T362 238T373 281Q402 395 406 404Q419 431 449 431Q468 431 475 421T483 402Q483 389 454 274T422 142Q420 131 420 107V100Q420 85 423 71T442 42T487 26Q558 26 600 148Q609 171 620 213T632 273Q632 306 619 325T593 357T580 385Z"></path><path id="MJMATHI-74" stroke-width="1" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path id="MJMAIN-2B" stroke-width="1" d="M56 237T56 250T70 270H369V420L370 570Q380 583 389 583Q402 583 409 568V270H707Q722 262 722 250T707 230H409V-68Q401 -82 391 -82H389H387Q375 -82 369 -68V230H70Q56 237 56 250Z"></path><path id="MJMATHI-43" stroke-width="1" d="M50 252Q50 367 117 473T286 641T490 704Q580 704 633 653Q642 643 648 636T656 626L657 623Q660 623 684 649Q691 655 699 663T715 679T725 690L740 705H746Q760 705 760 698Q760 694 728 561Q692 422 692 421Q690 416 687 415T669 413H653Q647 419 647 422Q647 423 648 429T650 449T651 481Q651 552 619 605T510 659Q484 659 454 652T382 628T299 572T226 479Q194 422 175 346T156 222Q156 108 232 58Q280 24 350 24Q441 24 512 92T606 240Q610 253 612 255T628 257Q648 257 648 248Q648 243 647 239Q618 132 523 55T319 -22Q206 -22 128 53T50 252Z"></path><path id="MJMATHI-61" stroke-width="1" d="M33 157Q33 258 109 349T280 441Q331 441 370 392Q386 422 416 422Q429 422 439 414T449 394Q449 381 412 234T374 68Q374 43 381 35T402 26Q411 27 422 35Q443 55 463 131Q469 151 473 152Q475 153 483 153H487Q506 153 506 144Q506 138 501 117T481 63T449 13Q436 0 417 -8Q409 -10 393 -10Q359 -10 336 5T306 36L300 51Q299 52 296 50Q294 48 292 46Q233 -10 172 -10Q117 -10 75 30T33 157ZM351 328Q351 334 346 350T323 385T277 405Q242 405 210 374T160 293Q131 214 119 129Q119 126 119 118T118 106Q118 61 136 44T179 26Q217 26 254 59T298 110Q300 114 325 217T351 328Z"></path><path id="MJMATHI-68" stroke-width="1" d="M137 683Q138 683 209 688T282 694Q294 694 294 685Q294 674 258 534Q220 386 220 383Q220 381 227 388Q288 442 357 442Q411 442 444 415T478 336Q478 285 440 178T402 50Q403 36 407 31T422 26Q450 26 474 56T513 138Q516 149 519 151T535 153Q555 153 555 145Q555 144 551 130Q535 71 500 33Q466 -10 419 -10H414Q367 -10 346 17T325 74Q325 90 361 192T398 345Q398 404 354 404H349Q266 404 205 306L198 293L164 158Q132 28 127 16Q114 -11 83 -11Q69 -11 59 -2T48 16Q48 30 121 320L195 616Q195 629 188 632T149 637H128Q122 643 122 645T124 664Q129 683 137 683Z"></path><path id="MJMATHI-3B8" stroke-width="1" d="M35 200Q35 302 74 415T180 610T319 704Q320 704 327 704T339 705Q393 701 423 656Q462 596 462 495Q462 380 417 261T302 66T168 -10H161Q125 -10 99 10T60 63T41 130T35 200ZM383 566Q383 668 330 668Q294 668 260 623T204 521T170 421T157 371Q206 370 254 370L351 371Q352 372 359 404T375 484T383 566ZM113 132Q113 26 166 26Q181 26 198 36T239 74T287 161T335 307L340 324H145Q145 321 136 286T120 208T113 132Z"></path><path id="MJMATHI-78" stroke-width="1" d="M52 289Q59 331 106 386T222 442Q257 442 286 424T329 379Q371 442 430 442Q467 442 494 420T522 361Q522 332 508 314T481 292T458 288Q439 288 427 299T415 328Q415 374 465 391Q454 404 425 404Q412 404 406 402Q368 386 350 336Q290 115 290 78Q290 50 306 38T341 26Q378 26 414 59T463 140Q466 150 469 151T485 153H489Q504 153 504 145Q504 144 502 134Q486 77 440 33T333 -11Q263 -11 227 52Q186 -10 133 -10H127Q78 -10 57 16T35 71Q35 103 54 123T99 143Q142 143 142 101Q142 81 130 66T107 46T94 41L91 40Q91 39 97 36T113 29T132 26Q168 26 194 71Q203 87 217 139T245 247T261 313Q266 340 266 352Q266 380 251 392T217 404Q177 404 142 372T93 290Q91 281 88 280T72 278H58Q52 284 52 289Z"></path><path id="MJMATHI-57" stroke-width="1" d="M436 683Q450 683 486 682T553 680Q604 680 638 681T677 682Q695 682 695 674Q695 670 692 659Q687 641 683 639T661 637Q636 636 621 632T600 624T597 615Q597 603 613 377T629 138L631 141Q633 144 637 151T649 170T666 200T690 241T720 295T759 362Q863 546 877 572T892 604Q892 619 873 628T831 637Q817 637 817 647Q817 650 819 660Q823 676 825 679T839 682Q842 682 856 682T895 682T949 681Q1015 681 1034 683Q1048 683 1048 672Q1048 666 1045 655T1038 640T1028 637Q1006 637 988 631T958 617T939 600T927 584L923 578L754 282Q586 -14 585 -15Q579 -22 561 -22Q546 -22 542 -17Q539 -14 523 229T506 480L494 462Q472 425 366 239Q222 -13 220 -15T215 -19Q210 -22 197 -22Q178 -22 176 -15Q176 -12 154 304T131 622Q129 631 121 633T82 637H58Q51 644 51 648Q52 671 64 683H76Q118 680 176 680Q301 680 313 683H323Q329 677 329 674T327 656Q322 641 318 637H297Q236 634 232 620Q262 160 266 136L501 550L499 587Q496 629 489 632Q483 636 447 637Q428 637 422 639T416 648Q416 650 418 660Q419 664 420 669T421 676T424 680T428 682T436 683Z"></path><path id="MJMATHI-50" stroke-width="1" d="M287 628Q287 635 230 637Q206 637 199 638T192 648Q192 649 194 659Q200 679 203 681T397 683Q587 682 600 680Q664 669 707 631T751 530Q751 453 685 389Q616 321 507 303Q500 302 402 301H307L277 182Q247 66 247 59Q247 55 248 54T255 50T272 48T305 46H336Q342 37 342 35Q342 19 335 5Q330 0 319 0Q316 0 282 1T182 2Q120 2 87 2T51 1Q33 1 33 11Q33 13 36 25Q40 41 44 43T67 46Q94 46 127 49Q141 52 146 61Q149 65 218 339T287 628ZM645 554Q645 567 643 575T634 597T609 619T560 635Q553 636 480 637Q463 637 445 637T416 636T404 636Q391 635 386 627Q384 621 367 550T332 412T314 344Q314 342 395 342H407H430Q542 342 590 392Q617 419 631 471T645 554Z"></path><path id="MJSZ1-2211" stroke-width="1" d="M61 748Q64 750 489 750H913L954 640Q965 609 976 579T993 533T999 516H979L959 517Q936 579 886 621T777 682Q724 700 655 705T436 710H319Q183 710 183 709Q186 706 348 484T511 259Q517 250 513 244L490 216Q466 188 420 134T330 27L149 -187Q149 -188 362 -188Q388 -188 436 -188T506 -189Q679 -189 778 -162T936 -43Q946 -27 959 6H999L913 -249L489 -250Q65 -250 62 -248Q56 -246 56 -239Q56 -234 118 -161Q186 -81 245 -11L428 206Q428 207 242 462L57 717L56 728Q56 744 61 748Z"></path><path id="MJMATHI-4A" stroke-width="1" d="M447 625Q447 637 354 637H329Q323 642 323 645T325 664Q329 677 335 683H352Q393 681 498 681Q541 681 568 681T605 682T619 682Q633 682 633 672Q633 670 630 658Q626 642 623 640T604 637Q552 637 545 623Q541 610 483 376Q420 128 419 127Q397 64 333 21T195 -22Q137 -22 97 8T57 88Q57 130 80 152T132 174Q177 174 182 130Q182 98 164 80T123 56Q115 54 115 53T122 44Q148 15 197 15Q235 15 271 47T324 130Q328 142 387 380T447 625Z"></path><path id="MJSZ2-2211" stroke-width="1" d="M60 948Q63 950 665 950H1267L1325 815Q1384 677 1388 669H1348L1341 683Q1320 724 1285 761Q1235 809 1174 838T1033 881T882 898T699 902H574H543H251L259 891Q722 258 724 252Q725 250 724 246Q721 243 460 -56L196 -356Q196 -357 407 -357Q459 -357 548 -357T676 -358Q812 -358 896 -353T1063 -332T1204 -283T1307 -196Q1328 -170 1348 -124H1388Q1388 -125 1381 -145T1356 -210T1325 -294L1267 -449L666 -450Q64 -450 61 -448Q55 -446 55 -439Q55 -437 57 -433L590 177Q590 178 557 222T452 366T322 544L56 909L55 924Q55 945 60 948Z"></path><path id="MJMATHI-4E" stroke-width="1" d="M234 637Q231 637 226 637Q201 637 196 638T191 649Q191 676 202 682Q204 683 299 683Q376 683 387 683T401 677Q612 181 616 168L670 381Q723 592 723 606Q723 633 659 637Q635 637 635 648Q635 650 637 660Q641 676 643 679T653 683Q656 683 684 682T767 680Q817 680 843 681T873 682Q888 682 888 672Q888 650 880 642Q878 637 858 637Q787 633 769 597L620 7Q618 0 599 0Q585 0 582 2Q579 5 453 305L326 604L261 344Q196 88 196 79Q201 46 268 46H278Q284 41 284 38T282 19Q278 6 272 0H259Q228 2 151 2Q123 2 100 2T63 2T46 1Q31 1 31 10Q31 14 34 26T39 40Q41 46 62 46Q130 49 150 85Q154 91 221 362L289 634Q287 635 234 637Z"></path><path id="MJMATHI-63" stroke-width="1" d="M34 159Q34 268 120 355T306 442Q362 442 394 418T427 355Q427 326 408 306T360 285Q341 285 330 295T319 325T330 359T352 380T366 386H367Q367 388 361 392T340 400T306 404Q276 404 249 390Q228 381 206 359Q162 315 142 235T121 119Q121 73 147 50Q169 26 205 26H209Q321 26 394 111Q403 121 406 121Q410 121 419 112T429 98T420 83T391 55T346 25T282 0T202 -11Q127 -11 81 37T34 159Z"></path><path id="MJMATHI-79" stroke-width="1" d="M21 287Q21 301 36 335T84 406T158 442Q199 442 224 419T250 355Q248 336 247 334Q247 331 231 288T198 191T182 105Q182 62 196 45T238 27Q261 27 281 38T312 61T339 94Q339 95 344 114T358 173T377 247Q415 397 419 404Q432 431 462 431Q475 431 483 424T494 412T496 403Q496 390 447 193T391 -23Q363 -106 294 -155T156 -205Q111 -205 77 -183T43 -117Q43 -95 50 -80T69 -58T89 -48T106 -45Q150 -45 150 -87Q150 -107 138 -122T115 -142T102 -147L99 -148Q101 -153 118 -160T152 -167H160Q177 -167 186 -165Q219 -156 247 -127T290 -65T313 -9T321 21L315 17Q309 13 296 6T270 -6Q250 -11 231 -11Q185 -11 150 11T104 82Q103 89 103 113Q103 170 138 262T173 379Q173 380 173 381Q173 390 173 393T169 400T158 404H154Q131 404 112 385T82 344T65 302T57 280Q55 278 41 278H27Q21 284 21 287Z"></path><path id="MJMATHI-6B" stroke-width="1" d="M121 647Q121 657 125 670T137 683Q138 683 209 688T282 694Q294 694 294 686Q294 679 244 477Q194 279 194 272Q213 282 223 291Q247 309 292 354T362 415Q402 442 438 442Q468 442 485 423T503 369Q503 344 496 327T477 302T456 291T438 288Q418 288 406 299T394 328Q394 353 410 369T442 390L458 393Q446 405 434 405H430Q398 402 367 380T294 316T228 255Q230 254 243 252T267 246T293 238T320 224T342 206T359 180T365 147Q365 130 360 106T354 66Q354 26 381 26Q429 26 459 145Q461 153 479 153H483Q499 153 499 144Q499 139 496 130Q455 -11 378 -11Q333 -11 305 15T277 90Q277 108 280 121T283 145Q283 167 269 183T234 206T200 217T182 220H180Q168 178 159 139T145 81T136 44T129 20T122 7T111 -2Q98 -11 83 -11Q66 -11 57 -1T48 16Q48 26 85 176T158 471L195 616Q196 629 188 632T149 637H144Q134 637 131 637T124 640T121 647Z"></path><path id="MJMATHI-6C" stroke-width="1" d="M117 59Q117 26 142 26Q179 26 205 131Q211 151 215 152Q217 153 225 153H229Q238 153 241 153T246 151T248 144Q247 138 245 128T234 90T214 43T183 6T137 -11Q101 -11 70 11T38 85Q38 97 39 102L104 360Q167 615 167 623Q167 626 166 628T162 632T157 634T149 635T141 636T132 637T122 637Q112 637 109 637T101 638T95 641T94 647Q94 649 96 661Q101 680 107 682T179 688Q194 689 213 690T243 693T254 694Q266 694 266 686Q266 675 193 386T118 83Q118 81 118 75T117 65V59Z"></path><path id="MJMATHI-6F" stroke-width="1" d="M201 -11Q126 -11 80 38T34 156Q34 221 64 279T146 380Q222 441 301 441Q333 441 341 440Q354 437 367 433T402 417T438 387T464 338T476 268Q476 161 390 75T201 -11ZM121 120Q121 70 147 48T206 26Q250 26 289 58T351 142Q360 163 374 216T388 308Q388 352 370 375Q346 405 306 405Q243 405 195 347Q158 303 140 230T121 120Z"></path><path id="MJMATHI-73" stroke-width="1" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"></path><path id="MJMATHI-66" stroke-width="1" d="M118 -162Q120 -162 124 -164T135 -167T147 -168Q160 -168 171 -155T187 -126Q197 -99 221 27T267 267T289 382V385H242Q195 385 192 387Q188 390 188 397L195 425Q197 430 203 430T250 431Q298 431 298 432Q298 434 307 482T319 540Q356 705 465 705Q502 703 526 683T550 630Q550 594 529 578T487 561Q443 561 443 603Q443 622 454 636T478 657L487 662Q471 668 457 668Q445 668 434 658T419 630Q412 601 403 552T387 469T380 433Q380 431 435 431Q480 431 487 430T498 424Q499 420 496 407T491 391Q489 386 482 386T428 385H372L349 263Q301 15 282 -47Q255 -132 212 -173Q175 -205 139 -205Q107 -205 81 -186T55 -132Q55 -95 76 -78T118 -61Q162 -61 162 -103Q162 -122 151 -136T127 -157L118 -162Z"></path><path id="MJMATHI-7A" stroke-width="1" d="M347 338Q337 338 294 349T231 360Q211 360 197 356T174 346T162 335T155 324L153 320Q150 317 138 317Q117 317 117 325Q117 330 120 339Q133 378 163 406T229 440Q241 442 246 442Q271 442 291 425T329 392T367 375Q389 375 411 408T434 441Q435 442 449 442H462Q468 436 468 434Q468 430 463 420T449 399T432 377T418 358L411 349Q368 298 275 214T160 106L148 94L163 93Q185 93 227 82T290 71Q328 71 360 90T402 140Q406 149 409 151T424 153Q443 153 443 143Q443 138 442 134Q425 72 376 31T278 -11Q252 -11 232 6T193 40T155 57Q111 57 76 -3Q70 -11 59 -11H54H41Q35 -5 35 -2Q35 13 93 84Q132 129 225 214T340 322Q352 338 347 338Z"></path><path id="MJMATHI-44" stroke-width="1" d="M287 628Q287 635 230 637Q207 637 200 638T193 647Q193 655 197 667T204 682Q206 683 403 683Q570 682 590 682T630 676Q702 659 752 597T803 431Q803 275 696 151T444 3L430 1L236 0H125H72Q48 0 41 2T33 11Q33 13 36 25Q40 41 44 43T67 46Q94 46 127 49Q141 52 146 61Q149 65 218 339T287 628ZM703 469Q703 507 692 537T666 584T629 613T590 629T555 636Q553 636 541 636T512 636T479 637H436Q392 637 386 627Q384 623 313 339T242 52Q242 48 253 48T330 47Q335 47 349 47T373 46Q499 46 581 128Q617 164 640 212T683 339T703 469Z"></path><path id="MJSZ2-220F" stroke-width="1" d="M220 812Q220 813 218 819T214 829T208 840T199 853T185 866T166 878T140 887T107 893T66 896H56V950H1221V896H1211Q1080 896 1058 812V-311Q1076 -396 1211 -396H1221V-450H725V-396H735Q864 -396 888 -314Q889 -312 889 -311V896H388V292L389 -311Q405 -396 542 -396H552V-450H56V-396H66Q195 -396 219 -314Q220 -312 220 -311V812Z"></path><path id="MJMAIN-3B" stroke-width="1" d="M78 370Q78 394 95 412T138 430Q162 430 180 414T199 371Q199 346 182 328T139 310T96 327T78 370ZM78 60Q78 85 94 103T137 121Q202 121 202 8Q202 -44 183 -94T144 -169T118 -194Q115 -194 106 -186T95 -174Q94 -171 107 -155T137 -107T160 -38Q161 -32 162 -22T165 -4T165 4Q165 5 161 4T142 0Q110 0 94 18T78 60Z"></path><path id="MJMATHI-52" stroke-width="1" d="M230 637Q203 637 198 638T193 649Q193 676 204 682Q206 683 378 683Q550 682 564 680Q620 672 658 652T712 606T733 563T739 529Q739 484 710 445T643 385T576 351T538 338L545 333Q612 295 612 223Q612 212 607 162T602 80V71Q602 53 603 43T614 25T640 16Q668 16 686 38T712 85Q717 99 720 102T735 105Q755 105 755 93Q755 75 731 36Q693 -21 641 -21H632Q571 -21 531 4T487 82Q487 109 502 166T517 239Q517 290 474 313Q459 320 449 321T378 323H309L277 193Q244 61 244 59Q244 55 245 54T252 50T269 48T302 46H333Q339 38 339 37T336 19Q332 6 326 0H311Q275 2 180 2Q146 2 117 2T71 2T50 1Q33 1 33 10Q33 12 36 24Q41 43 46 45Q50 46 61 46H67Q94 46 127 49Q141 52 146 61Q149 65 218 339T287 628Q287 635 230 637ZM630 554Q630 586 609 608T523 636Q521 636 500 636T462 637H440Q393 637 386 627Q385 624 352 494T319 361Q319 360 388 360Q466 361 492 367Q556 377 592 426Q608 449 619 486T630 554Z"></path></defs></svg></div><div id="wmd-preview" class="wmd-preview wmd-preview-full-reader"><div class="md-section-divider"></div><div class="md-section-divider"></div><h1 data-anchor-id="dhit" id="词向量">词向量</h1><div class="md-section-divider"></div><h2 data-anchor-id="m18t" id="背景介绍">背景介绍</h2><p data-anchor-id="wdbq">本章我们介绍词的向量表征,也称为word embedding。词向量是自然语言处理中常见的一个操作,是搜索引擎、广告系统、推荐系统等互联网服务背后常见的基础技术。</p><p data-anchor-id="fspi">在这些互联网服务里,我们经常要比较两个词或者两段文本之间的相关性。为了做这样的比较,我们往往先要把词表示成计算机适合处理的方式。最自然的方式恐怕莫过于向量空间模型(vector space model)。 <br>
在这种方式里,每个词被表示成一个实数向量(one-hot vector),其长度为字典大小,每个维度对应一个字典里的每个词,除了这个词对应维度上的值是1,其他元素都是0。</p><p data-anchor-id="ptyd">One-hot vector虽然自然,但是用处有限。比如,在互联网广告系统里,如果用户输入的query是“母亲节”,而有一个广告的关键词是“康乃馨”。虽然按照常理,我们知道这两个词之间是有联系的——母亲节通常应该送给母亲一束康乃馨;但是这两个词对应的one-hot vectors之间的距离度量,无论是欧氏距离还是余弦相似度(cosine similarity),由于其向量正交,都认为这两个词毫无相关性。 得出这种与我们相悖的结论的根本原因是:每个词本身的信息量都太小。所以,仅仅给定两个词,不足以让我们准确判别它们是否相关。要想精确计算相关性,我们还需要更多的信息——从大量数据里通过机器学习方法归纳出来的知识。</p><p data-anchor-id="4ldn">在机器学习领域里,各种“知识”被各种模型表示,词向量模型(word embedding model)就是其中的一类。通过词向量模型可将一个 one-hot vector映射到一个维度更低的实数向量(embedding vector),如<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-1-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -881.1077362650129 34573.229850654214 1152.1594216130375" style="width: 80.27ex; height: 2.664ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6D" x="466" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62" x="1345" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65" x="1774" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-64" x="2241" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-64" x="2764" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="3288" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="3633" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-67" x="4234" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="4714" y="0"></use><g transform="translate(5104,0)"><text font-family="STIXGeneral,'Arial Unicode MS',serif" font-style="" font-weight="" stroke="none" transform="scale(49.87111969111969) matrix(1 0 0 -1 0 0)"></text></g><g transform="translate(5901,0)"><text font-family="STIXGeneral,'Arial Unicode MS',serif" font-style="" font-weight="" stroke="none" transform="scale(49.87111969111969) matrix(1 0 0 -1 0 0)"></text></g><g transform="translate(6698,0)"><text font-family="STIXGeneral,'Arial Unicode MS',serif" font-style="" font-weight="" stroke="none" transform="scale(49.87111969111969) matrix(1 0 0 -1 0 0)"></text></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="7495" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="8162" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5B" x="9219" y="0"></use><g transform="translate(9497,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-30"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="500" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-33" x="779" y="0"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="10777" y="0"></use><g transform="translate(11222,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-34"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="500" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="779" y="0"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="12501" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="12946" y="0"></use><g transform="translate(13725,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="500" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-35" x="779" y="0"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="15004" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="15450" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="15895" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="16340" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5D" x="16785" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="17064" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65" x="17509" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6D" x="17975" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62" x="18854" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65" x="19283" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-64" x="19750" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-64" x="20273" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="20797" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="21142" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-67" x="21743" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="22223" y="0"></use><g transform="translate(22613,0)"><text font-family="STIXGeneral,'Arial Unicode MS',serif" font-style="" font-weight="" stroke="none" transform="scale(49.87111969111969) matrix(1 0 0 -1 0 0)"></text></g><g transform="translate(23410,0)"><text font-family="STIXGeneral,'Arial Unicode MS',serif" font-style="" font-weight="" stroke="none" transform="scale(49.87111969111969) matrix(1 0 0 -1 0 0)"></text></g><g transform="translate(24207,0)"><text font-family="STIXGeneral,'Arial Unicode MS',serif" font-style="" font-weight="" stroke="none" transform="scale(49.87111969111969) matrix(1 0 0 -1 0 0)"></text></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="25004" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="25671" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5B" x="26728" y="0"></use><g transform="translate(27006,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-30"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="500" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="779" y="0"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="28286" y="0"></use><g transform="translate(28731,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-35"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="500" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-36" x="779" y="0"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="30010" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="30456" y="0"></use><g transform="translate(31234,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="500" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-33" x="779" y="0"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="32514" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="32959" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="33404" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="33849" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5D" x="34294" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-1">embedding(母亲节) = [0.3, 4.2, -1.5, ...], embedding(康乃馨) = [0.2, 5.6, -2.3, ...]</script>。在这个映射到的实数向量表示中,希望两个语义(或用法)上相似的词对应的词向量“更像”,这样如“母亲节”和“康乃馨”的对应词向量的余弦相似度就不再为零了。</p><p data-anchor-id="o9kt">词向量模型可以是概率模型、共生矩阵(co-occurrence matrix)模型或神经元网络模型。在用神经网络求词向量之前,传统做法是统计一个词语的共生矩阵<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-2-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 852.5 725.103370696049" style="width: 1.969ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58"></use></g></svg></span><script type="math/tex" id="MathJax-Element-2">X</script><span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-3-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 852.5 725.103370696049" style="width: 1.969ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58"></use></g></svg></span><script type="math/tex" id="MathJax-Element-3">X</script>是一个<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-4-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 3875.9444444444443 1041.103370696049" style="width: 9.035ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-56" x="278" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C" x="1048" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-D7" x="1548" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C" x="2549" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-56" x="2827" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C" x="3597" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-4">|V| \times |V|</script> 大小的矩阵,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-5-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 1464.486940139403 1019.0602608392912" style="width: 3.359ex; height: 2.317ex; vertical-align: -0.811ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58"></use><g transform="translate(828,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6A" x="345" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-5">X_{ij}</script>表示在所有语料中,词汇表<code>V</code>(vocabulary)中第i个词和第j个词同时出现的词数,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-6-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 1326.5 1041.103370696049" style="width: 3.127ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-56" x="278" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C" x="1048" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-6">|V|</script>为词汇表的大小。对<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-7-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 852.5 725.103370696049" style="width: 1.969ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58"></use></g></svg></span><script type="math/tex" id="MathJax-Element-7">X</script>做矩阵分解(如奇异值分解,Singular Value Decomposition [<a href="#参考文献">5</a>]),得到的<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-8-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 767.5 747.103370696049" style="width: 1.737ex; height: 1.737ex; vertical-align: -0.232ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-55"></use></g></svg></span><script type="math/tex" id="MathJax-Element-8">U</script>即视为所有词的词向量:</p><div class="md-section-divider"></div><p data-anchor-id="5ptg"><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-9-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -912.3620829925038 5023.062282901478 955.4137683405282" style="width: 11.699ex; height: 2.201ex; vertical-align: -0.232ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-58"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="1130" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-55" x="2186" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-53" x="2954" y="0"></use><g transform="translate(3599,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-56"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-54" x="1167" y="583"></use></g></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-9">X = USV^T</script></p><p data-anchor-id="1duu">但这样的传统做法有很多问题:<br> <br>
1) 由于很多词没有出现,导致矩阵极其稀疏,因此需要对词频做额外处理来达到好的矩阵分解效果;<br> <br>
2) 矩阵非常大,维度太高(通常达到<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-10-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -885.6723233290762 3854.758332412179 928.7240086771006" style="width: 8.919ex; height: 2.201ex; vertical-align: -0.232ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-30" x="500" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-36" x="1415" y="557"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2217" x="1677" y="0"></use><g transform="translate(2399,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-30" x="500" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-36" x="1415" y="557"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-10">10^6*10^6</script>的数量级);<br> <br>
3) 需要手动去掉停用词(如although, a,...),不然这些频繁出现的词也会影响矩阵分解的效果。</p><p data-anchor-id="py8y">基于神经网络的模型不需要计算存储一个在全语料上统计的大表,而是通过学习语义信息得到词向量,因此能很好地解决以上问题。在本章里,我们将展示基于神经网络训练词向量的细节,以及如何用PaddlePaddle训练一个词向量模型。</p><div class="md-section-divider"></div><h2 data-anchor-id="55gr" id="效果展示">效果展示</h2><p data-anchor-id="3g8o">本章中,当词向量训练好后,我们可以用数据可视化算法t-SNE[<a href="#参考文献">4</a>]画出词语特征在二维上的投影(如下图所示)。从图中可以看出,语义相关的词语(如a, the, these; big, huge)在投影上距离很近,语意无关的词(如say, business; decision, japan)在投影上的距离很远。</p><p align="center" data-anchor-id="0r21">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/word2vec/image/2d_similarity.png" width="400"><br>
图1. 词向量的二维投影
</p><p data-anchor-id="gfle">另一方面,我们知道两个向量的余弦值在<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-11-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 2781.666666666667 1042.103370696049" style="width: 6.486ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5B"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="278" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1057" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="1557" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="2002" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5D" x="2503" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-11">[-1,1]</script>的区间内:两个完全相同的向量余弦值为1, 两个相互垂直的向量之间余弦值为0,两个方向完全相反的向量余弦值为-1,即相关性和余弦值大小成正比。因此我们还可以计算两个词向量的余弦相似度:</p><pre data-anchor-id="db1w"><code>similarity: 0.899180685161
please input two words: big huge
please input two words: from company
similarity: -0.0997506977351
</code></pre><p data-anchor-id="9gdo">以上结果可以通过运行<code>calculate_dis.py</code>, 加载字典里的单词和对应训练特征结果得到,我们将在<a href="#应用模型">应用模型</a>中详细描述用法。</p><div class="md-section-divider"></div><h2 data-anchor-id="9dax" id="模型概览">模型概览</h2><p data-anchor-id="opsy">在这里我们介绍三个训练词向量的模型:N-gram模型,CBOW模型和Skip-gram模型,它们的中心思想都是通过上下文得到一个词出现的概率。对于N-gram模型,我们会先介绍语言模型的概念,并在之后的<a href="#训练模型">训练模型</a>中,带大家用PaddlePaddle实现它。而后两个模型,是近年来最有名的神经元词向量模型,由 Tomas Mikolov 在Google 研发[<a href="#参考文献">3</a>],虽然它们很浅很简单,但训练效果很好。</p><div class="md-section-divider"></div><h3 data-anchor-id="ws9a" id="语言模型">语言模型</h3><p data-anchor-id="c4ld">在介绍词向量模型之前,我们先来引入一个概念:语言模型。 <br>
语言模型旨在为语句的联合概率函数<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-147-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 6241.397004663122 1042.103370696049" style="width: 14.479ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-50"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="751" y="0"></use><g transform="translate(1141,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1013" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="2311" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="2756" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="3201" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="3646" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="4092" y="0"></use><g transform="translate(4537,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-54" x="1013" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="5851" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-147">P(w_1, ..., w_T)</script>建模, 其中<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-148-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 1060.805392899952 642.5886520702876" style="width: 2.432ex; height: 1.506ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="1013" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-148">w_i</script>表示句子中的第i个词。语言模型的目标是,希望模型对有意义的句子赋予大概率,对没意义的句子赋予小概率。 <br>
这样的模型可以应用于很多领域,如机器翻译、语音识别、信息检索、词性标注、手写识别等,它们都希望能得到一个连续序列的概率。 以信息检索为例,当你在搜索“how long is a football bame”时(bame是一个医学名词),搜索引擎会提示你是否希望搜索“how long is a football game”, 这是因为根据语言模型计算出“how long is a football bame”的概率很低,而与bame近似的,可能引起错误的词中,game会使该句生成的概率最大。</p><p data-anchor-id="rc88">对语言模型的目标概率<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-149-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 6241.397004663122 1042.103370696049" style="width: 14.479ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-50"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="751" y="0"></use><g transform="translate(1141,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1013" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="2311" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="2756" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="3201" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="3646" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="4092" y="0"></use><g transform="translate(4537,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-54" x="1013" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="5851" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-149">P(w_1, ..., w_T)</script>,如果假设文本中每个词都是相互独立的,则整句话的联合概率可以表示为其中所有词语条件概率的乘积,即:</p><div class="md-section-divider"></div><p data-anchor-id="dfb9"><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-150-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -1750.4700829925036 11623.23832828428 2968.313272766194" style="width: 26.988ex; height: 6.95ex; vertical-align: -2.896ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-50"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="751" y="0"></use><g transform="translate(1141,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1013" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="2311" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="2756" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="3201" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="3646" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="4092" y="0"></use><g transform="translate(4537,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-54" x="1013" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="5851" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="6519" y="0"></use><g transform="translate(7575,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ2-220F"></use><g transform="translate(59,-1090)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1140" y="0"></use></g><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-54" x="551" y="1627"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-50" x="9020" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="9772" y="0"></use><g transform="translate(10161,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="1013" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="11233" y="0"></use></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-150">P(w_1, ..., w_T) = \prod_{t=1}^TP(w_t)</script></p><p data-anchor-id="yhkz">然而我们知道语句中的每个词出现的概率都与其前面的词紧密相关, 所以实际上通常用条件概率表示语言模型:</p><div class="md-section-divider"></div><p data-anchor-id="12am"><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-151-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -1750.4700829925036 17274.487280138008 2968.313272766194" style="width: 40.077ex; height: 6.95ex; vertical-align: -2.896ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-50"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="751" y="0"></use><g transform="translate(1141,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1013" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="2311" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="2756" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="3201" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="3646" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="4092" y="0"></use><g transform="translate(4537,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-54" x="1013" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="5851" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="6519" y="0"></use><g transform="translate(7575,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ2-220F"></use><g transform="translate(59,-1090)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1140" y="0"></use></g><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-54" x="551" y="1627"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-50" x="9020" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="9772" y="0"></use><g transform="translate(10161,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="1013" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C" x="11233" y="0"></use><g transform="translate(11512,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1013" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="12682" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="13127" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="13572" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="14018" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="14463" y="0"></use><g transform="translate(14908,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><g transform="translate(716,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1140" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="16884" y="0"></use></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-151">P(w_1, ..., w_T) = \prod_{t=1}^TP(w_t | w_1, ... , w_{t-1})</script></p><div class="md-section-divider"></div><h3 data-anchor-id="4noy" id="n-gram-neural-model">N-gram neural model</h3><p data-anchor-id="pu2s">在计算语言学中,n-gram是一种重要的文本表示方法,表示一个文本中连续的n个项。基于具体的应用场景,每一项可以是一个字母、单词或者音节。 n-gram模型也是统计语言模型中的一种重要方法,用n-gram训练语言模型时,一般用每个n-gram的历史n-1个词语组成的内容来预测第n个词。</p><p data-anchor-id="06pk">Yoshua Bengio等科学家就于2003年在著名论文 Neural Probabilistic Language Models [<a href="#参考文献">1</a>] 中介绍如何学习一个神经元网络表示的词向量模型。文中的神经概率语言模型(Neural Network Language Model,NNLM)通过一个线性映射和一个非线性隐层连接,同时学习了语言模型和词向量,即通过学习大量语料得到词语的向量表达,通过这些向量得到整个句子的概率。用这种方法学习语言模型可以克服维度灾难(curse of dimensionality),即训练和测试数据不同导致的模型不准。注意:由于“神经概率语言模型”说法较为泛泛,我们在这里不用其NNLM的本名,考虑到其具体做法,本文中称该模型为N-gram neural model。</p><p data-anchor-id="1924">我们在上文中已经讲到用条件概率建模语言模型,即一句话中第<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-152-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -647.0516853480245 361.5 679.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use></g></svg></span><script type="math/tex" id="MathJax-Element-152">t</script>个词的概率和该句话的前<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-153-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -687.0516853480245 2084.9444444444443 719.103370696049" style="width: 4.865ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="583" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1584" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-153">t-1</script>个词相关。可实际上越远的词语其实对该词的影响越小,那么如果考虑一个n-gram, 每个词都只受其前面<code>n-1</code>个词的影响,则有:</p><div class="md-section-divider"></div><p data-anchor-id="4lor"><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-154-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -1750.4700829925036 21477.364603150127 2940.029001518732" style="width: 49.923ex; height: 6.834ex; vertical-align: -2.896ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-50"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="751" y="0"></use><g transform="translate(1141,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1013" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="2311" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="2756" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="3201" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="3646" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="4092" y="0"></use><g transform="translate(4537,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-54" x="1013" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="5851" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="6519" y="0"></use><g transform="translate(7575,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ2-220F"></use><g transform="translate(23,-1062)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="1140" y="0"></use></g><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-54" x="551" y="1627"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-50" x="9020" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="9772" y="0"></use><g transform="translate(10161,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="1013" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C" x="11233" y="0"></use><g transform="translate(11512,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><g transform="translate(716,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1140" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="13488" y="0"></use><g transform="translate(13933,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><g transform="translate(716,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="1140" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="15910" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="16355" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="16800" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="17245" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="17691" y="0"></use><g transform="translate(18136,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><g transform="translate(716,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="1140" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="1740" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="2519" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="21087" y="0"></use></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-154">P(w_1, ..., w_T) = \prod_{t=n}^TP(w_t|w_{t-1}, w_{t-2}, ..., w_{t-n+1})</script></p><p data-anchor-id="j5b6">给定一些真实语料,这些语料中都是有意义的句子,N-gram模型的优化目标则是最大化目标函数:</p><div class="md-section-divider"></div><p data-anchor-id="lokt"><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-155-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -1363.5596853480245 16988.68114617269 2553.1186038742526" style="width: 39.498ex; height: 5.907ex; vertical-align: -2.896ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><g transform="translate(120,0)"><rect stroke="none" width="824" height="60" x="0" y="220"></rect><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="162" y="676"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-54" x="60" y="-686"></use></g><g transform="translate(1231,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ2-2211"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="840" y="-1501"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66" x="2842" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="3392" y="0"></use><g transform="translate(3782,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="1013" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="4854" y="0"></use><g transform="translate(5299,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><g transform="translate(716,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1140" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="7276" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="7721" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="8166" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="8611" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="9056" y="0"></use><g transform="translate(9501,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><g transform="translate(716,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="1140" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="1740" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="2519" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3B" x="12453" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3B8" x="12898" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="13368" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="13979" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-52" x="14980" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="15740" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3B8" x="16129" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="16599" y="0"></use></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-155">\frac{1}{T}\sum_t f(w_t, w_{t-1}, ..., w_{t-n+1};\theta) + R(\theta)</script></p><p data-anchor-id="gdcu">其中<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-156-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 10000.73670172825 1042.103370696049" style="width: 23.282ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="550" y="0"></use><g transform="translate(940,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="1013" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="2012" y="0"></use><g transform="translate(2457,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><g transform="translate(716,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1140" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="4433" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="4878" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="5324" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="5769" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="6214" y="0"></use><g transform="translate(6659,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><g transform="translate(716,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="1140" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="1740" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="2519" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="9611" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-156">f(w_t, w_{t-1}, ..., w_{t-n+1})</script>表示根据历史n-1个词得到当前词<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-157-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 1072.119101398937 642.5886520702876" style="width: 2.432ex; height: 1.506ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="1013" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-157">w_t</script>的条件概率,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-158-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 2008 1042.103370696049" style="width: 4.633ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-52"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="759" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3B8" x="1149" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="1618" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-158">R(\theta)</script>表示参数正则项。</p><p align="center" data-anchor-id="28o8">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/word2vec/image/nnlm.png" width="500"><br>
图2. N-gram神经网络模型
</p><p data-anchor-id="sos1">图2展示了N-gram神经网络模型,从下往上看,该模型分为以下几个部分:</p><ul data-anchor-id="pgmt">
<li><p>对于每个样本,模型输入<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-116-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 6708.784266995979 692.7932335345324" style="width: 15.637ex; height: 1.622ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><g transform="translate(716,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="1140" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="1740" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="2519" y="0"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="2951" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="3396" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="3841" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="4287" y="0"></use><g transform="translate(4732,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><g transform="translate(716,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1140" y="0"></use></g></g></g></svg></span><script type="math/tex" id="MathJax-Element-116">w_{t-n+1},...w_{t-1}</script>, 输出句子第t个词为字典中<code>|V|</code>个词的概率。</p>
<p>每个输入词<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-117-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 6708.784266995979 692.7932335345324" style="width: 15.637ex; height: 1.622ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><g transform="translate(716,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="1140" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="1740" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="2519" y="0"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="2951" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="3396" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="3841" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="4287" y="0"></use><g transform="translate(4732,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><g transform="translate(716,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1140" y="0"></use></g></g></g></svg></span><script type="math/tex" id="MathJax-Element-117">w_{t-n+1},...w_{t-1}</script>首先通过映射矩阵映射到词向量<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-118-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 9787.78426699598 1042.103370696049" style="width: 22.703ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-43"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="760" y="0"></use><g transform="translate(1150,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><g transform="translate(716,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="1140" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="1740" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="2519" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="4101" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="4491" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="4936" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="5381" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="5826" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-43" x="6271" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="7032" y="0"></use><g transform="translate(7421,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><g transform="translate(716,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1140" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="9398" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-118">C(w_{t-n+1}),...C(w_{t-1})</script></p></li>
<li><p>然后所有词语的词向量连接成一个大向量,并经过一个非线性映射得到历史词语的隐层表示:</p>
<p><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-119-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -912.3620829925038 14125.85950420255 1183.4137683405283" style="width: 32.78ex; height: 2.78ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-67"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="758" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-55" x="1814" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="2582" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-61" x="2943" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="3473" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-68" x="4073" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="4650" y="0"></use><g transform="translate(5039,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3B8"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-54" x="663" y="583"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78" x="6107" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="6901" y="0"></use><g transform="translate(7902,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="607" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="8786" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="9397" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57" x="10398" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78" x="11447" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="12241" y="0"></use><g transform="translate(13242,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="607" y="-213"></use></g></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-119">g=Utanh(\theta^Tx + b_1) + Wx + b_2</script></p>
<p>其中,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-120-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -463.0516853480245 572.5 495.10337069604896" style="width: 1.274ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use></g></svg></span><script type="math/tex" id="MathJax-Element-120">x</script>为所有词语的词向量连接成的大向量,表示文本历史特征;<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-121-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -726.0516853480245 469.5 757.103370696049" style="width: 1.042ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3B8"></use></g></svg></span><script type="math/tex" id="MathJax-Element-121">\theta</script><span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-122-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 767.5 747.103370696049" style="width: 1.737ex; height: 1.737ex; vertical-align: -0.232ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-55"></use></g></svg></span><script type="math/tex" id="MathJax-Element-122">U</script><span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-123-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 883.406943983867 885.8104774772355" style="width: 2.085ex; height: 2.085ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="607" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-123">b_1</script><span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-124-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 883.406943983867 885.8104774772355" style="width: 2.085ex; height: 2.085ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-62"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="607" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-124">b_2</script><span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-125-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 1048.5 747.103370696049" style="width: 2.432ex; height: 1.737ex; vertical-align: -0.232ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-57"></use></g></svg></span><script type="math/tex" id="MathJax-Element-125">W</script>分别为词向量层到隐层连接的参数。<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-126-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -463.0516853480245 480.5 689.103370696049" style="width: 1.158ex; height: 1.622ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-67"></use></g></svg></span><script type="math/tex" id="MathJax-Element-126">g</script>表示未经归一化的所有输出单词概率,<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-127-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -463.0516853480245 821.8053928999522 689.103370696049" style="width: 1.853ex; height: 1.622ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-67"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="675" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-127">g_i</script>表示未经归一化的字典中第<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-128-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -682.0516853480245 345.5 714.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use></g></svg></span><script type="math/tex" id="MathJax-Element-128">i</script>个单词的输出概率。</p></li>
<li><p>根据softmax的定义,通过归一化<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-129-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -463.0516853480245 821.8053928999522 689.103370696049" style="width: 1.853ex; height: 1.622ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-67"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="675" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-129">g_i</script>, 生成目标词<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-130-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 1072.119101398937 642.5886520702876" style="width: 2.432ex; height: 1.506ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="1013" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-130">w_t</script>的概率为:</p>
<p><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-131-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -1435.388886338094 14331.771838110326 2784.5222294013847" style="width: 33.243ex; height: 6.486ex; vertical-align: -3.243ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-50"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="751" y="0"></use><g transform="translate(1141,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="1013" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C" x="2213" y="0"></use><g transform="translate(2491,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1013" y="-213"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="3662" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="4107" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="4552" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="4997" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="5442" y="0"></use><g transform="translate(5887,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><g transform="translate(716,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="1140" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="1740" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="2519" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="8839" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="9506" y="0"></use><g transform="translate(10683,0)"><rect stroke="none" width="3528" height="60" x="0" y="220"></rect><g transform="translate(982,676)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65"></use><g transform="translate(466,425)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-67"></use><g transform="translate(337,-171)"><use transform="scale(0.5000000000000001)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><use transform="scale(0.5000000000000001)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="716" y="-283"></use></g></g></g><g transform="translate(60,-1017)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ1-2211"></use><g transform="translate(1056,521)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-56" x="278" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C" x="1048" y="0"></use></g><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="1494" y="-430"></use><g transform="translate(2261,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65"></use><g transform="translate(466,288)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-67"></use><use transform="scale(0.5000000000000001)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="675" y="-342"></use></g></g></g></g></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-131">P(w_t | w_1, ..., w_{t-n+1}) = \frac{e^{g_{w_t}}}{\sum_i^{|V|} e^{g_i}}</script></p></li>
<li><p>整个网络的损失值(cost)为多类分类交叉熵,用公式表示为</p>
<p><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-132-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -1889.9765733157587 15959.067928333125 3108.5268698706354" style="width: 37.066ex; height: 7.181ex; vertical-align: -2.896ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-4A"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="633" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-3B8" x="1023" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="1492" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="2159" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="3216" y="0"></use><g transform="translate(4161,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ2-2211"></use><g transform="translate(147,-1090)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="345" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1124" y="0"></use></g><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-4E" x="577" y="1627"></use></g><g transform="translate(5772,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJSZ2-2211"></use><g transform="translate(116,-1090)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="433" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1212" y="0"></use></g><g transform="translate(253,1238)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-56" x="278" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C" x="1048" y="0"></use></g></g><g transform="translate(7383,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="706" y="499"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6B" x="693" y="-463"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6C" x="8342" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6F" x="8641" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-67" x="9126" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="9607" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-73" x="9996" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6F" x="10466" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66" x="10951" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="11502" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6D" x="11863" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-61" x="12742" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78" x="13271" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="13844" y="0"></use><g transform="translate(14233,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-67"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="680" y="499"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6B" x="675" y="-463"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="15180" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="15569" y="0"></use></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-132">J(\theta) = -\sum_{i=1}^N\sum_{c=1}^{|V|}y_k^{i}log(softmax(g_k^i))</script> </p>
<p>其中<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-133-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -841.0848558677576 959.2561863887845 1196.6178355146712" style="width: 2.201ex; height: 2.78ex; vertical-align: -0.927ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="706" y="499"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6B" x="693" y="-463"></use></g></svg></span><script type="math/tex" id="MathJax-Element-133">y_k^i</script>表示第<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-134-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -682.0516853480245 345.5 714.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69"></use></g></svg></span><script type="math/tex" id="MathJax-Element-134">i</script>个样本第<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-135-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.0516853480245 521.5 747.103370696049" style="width: 1.158ex; height: 1.737ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6B"></use></g></svg></span><script type="math/tex" id="MathJax-Element-135">k</script>类的真实标签(0或1),<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-136-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -841.0848558677576 5572.756186388784 1196.6178355146712" style="width: 12.973ex; height: 2.78ex; vertical-align: -0.927ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-73"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6F" x="469" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-66" x="955" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="1505" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6D" x="1867" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-61" x="2745" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78" x="3275" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-28" x="3847" y="0"></use><g transform="translate(4237,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-67"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-69" x="680" y="499"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6B" x="675" y="-463"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-29" x="5183" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-136">softmax(g_k^i)</script>表示第i个样本第k类softmax输出的概率。</p></li>
</ul><div class="md-section-divider"></div><h3 data-anchor-id="3q5r" id="continuous-bag-of-words-modelcbow">Continuous Bag-of-Words model(CBOW)</h3><p data-anchor-id="bp5b">CBOW模型通过一个词的上下文(各N个词)预测当前词。当N=2时,模型如下图所示:</p><p align="center" data-anchor-id="90s9">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/word2vec/image/cbow.png" width="250"><br>
图3. CBOW模型
</p><p data-anchor-id="119r">具体来说,不考虑上下文的词语输入顺序,CBOW是用上下文词语的词向量的均值来预测当前词。即:</p><div class="md-section-divider"></div><p data-anchor-id="dhc9"><span class="MathJax_Preview"></span><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-137-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -1280.5596853480245 15974.423587035015 1987.5623706960491" style="width: 37.066ex; height: 4.633ex; vertical-align: -1.737ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6F" x="433" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="919" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="1519" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65" x="1881" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78" x="2347" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="2920" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="3559" y="0"></use><g transform="translate(4735,0)"><rect stroke="none" width="11118" height="60" x="0" y="220"></rect><g transform="translate(60,676)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><g transform="translate(572,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1140" y="0"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="2054" y="0"></use><g transform="translate(3055,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><g transform="translate(572,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="1140" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="5110" y="0"></use><g transform="translate(6110,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><g transform="translate(572,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1140" y="0"></use></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="8165" y="0"></use><g transform="translate(9166,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><g transform="translate(572,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32" x="1140" y="0"></use></g></g></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-34" x="5309" y="-686"></use></g></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-137">context = \frac{x_{t-1} + x_{t-2} + x_{t+1} + x_{t+2}}{4}</script></p><p data-anchor-id="lgwh">其中<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-138-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -463.0516853480245 928.1191013989369 641.5886520702876" style="width: 2.201ex; height: 1.506ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="809" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-138">x_t</script>为第<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-139-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -647.0516853480245 361.5 679.103370696049" style="width: 0.811ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use></g></svg></span><script type="math/tex" id="MathJax-Element-139">t</script>个词的词向量,分类分数(score)向量 <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-140-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 6796.5 747.103370696049" style="width: 15.753ex; height: 1.737ex; vertical-align: -0.232ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-7A"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="746" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-55" x="1802" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2217" x="2792" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-63" x="3515" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6F" x="3948" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="4434" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="5034" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-65" x="5396" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-78" x="5862" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="6435" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-140">z=U*context</script>,最终的分类<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-141-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 497.5 690.103370696049" style="width: 1.158ex; height: 1.622ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-79"></use></g></svg></span><script type="math/tex" id="MathJax-Element-141">y</script>采用softmax,损失函数采用多类分类交叉熵。</p><div class="md-section-divider"></div><h3 data-anchor-id="wsii" id="skip-gram-model">Skip-gram model</h3><p data-anchor-id="h50z">CBOW的好处是对上下文词语的分布在词向量上进行了平滑,去掉了噪声,因此在小数据集上很有效。而Skip-gram的方法中,用一个词预测其上下文,得到了当前词上下文的很多样本,因此可用于更大的数据集。</p><p align="center" data-anchor-id="er4j">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/word2vec/image/skipgram.png" width="250"><br>
图4. Skip-gram模型
</p><p data-anchor-id="jvq7">如上图所示,Skip-gram模型的具体做法是,将一个词的词向量映射到<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-142-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -687.0516853480245 1101 719.103370696049" style="width: 2.548ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="500" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-142">2n</script>个词的词向量(<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-143-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -687.0516853480245 1101 719.103370696049" style="width: 2.548ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="500" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-143">2n</script>表示当前输入词的前后各<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-144-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 600.5 496.10337069604896" style="width: 1.39ex; height: 1.158ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E"></use></g></svg></span><script type="math/tex" id="MathJax-Element-144">n</script>个词),然后分别通过softmax得到这<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-145-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -687.0516853480245 1101 719.103370696049" style="width: 2.548ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-32"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="500" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-145">2n</script>个词的分类损失值之和。</p><div class="md-section-divider"></div><h2 data-anchor-id="c7x8" id="数据准备">数据准备</h2><div class="md-section-divider"></div><h3 data-anchor-id="wd82" id="数据介绍与下载">数据介绍与下载</h3><p data-anchor-id="lr97">本教程使用Penn Tree Bank (PTB)数据集。PTB数据集较小,训练速度快,应用于Mikolov的公开语言模型训练工具[<a href="#参考文献">2</a>]中。其统计情况如下:</p><p align="center" data-anchor-id="gy7r">
</p><table data-anchor-id="5chj" class="table table-striped-white table-bordered">
<tbody><tr>
<td>训练数据</td>
<td>验证数据</td>
<td>测试数据</td>
</tr>
<tr>
<td>ptb.train.txt</td>
<td>ptb.valid.txt</td>
<td>ptb.test.txt</td>
</tr>
<tr>
<td>42068句</td>
<td>3370句</td>
<td>3761句</td>
</tr>
</tbody></table><p></p><p data-anchor-id="yk3g">执行以下命令,可下载该数据集,并分别将训练数据和验证数据输入<code>train.list</code><code>test.list</code>文件中,供PaddlePaddle训练时使用。</p><pre data-anchor-id="mb8r"><code>./data/getdata.sh
</code></pre><div class="md-section-divider"></div><h3 data-anchor-id="w42y" id="提供数据给paddlepaddle">提供数据给PaddlePaddle</h3><ol data-anchor-id="st0b">
<li><p>使用initializer函数进行dataprovider的初始化,包括字典的建立(build_dict函数中)和PaddlePaddle输入字段的格式定义。注意:这里N为n-gram模型中的<code>n</code>, 本章代码中,定义<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-176-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -704.0516853480245 2723.0555555555557 747.103370696049" style="width: 6.371ex; height: 1.737ex; vertical-align: -0.232ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-4E"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-3D" x="1166" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-35" x="2222" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-176">N=5</script>, 表示在PaddlePaddle训练时,每条数据的前4个词用来预测第5个词。大家也可以根据自己的数据和需求自行调整N,但调整的同时要在模型配置文件中加入/减少相应输入字段。</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"><span class="kwd">from</span><span class="pln"> paddle</span><span class="pun">.</span><span class="pln">trainer</span><span class="pun">.</span><span class="typ">PyDataProvider2</span><span class="pln"> </span><span class="kwd">import</span><span class="pln"> </span><span class="pun">*</span></code></li><li class="L1"><code class="language-python"><span class="kwd">import</span><span class="pln"> collections</span></code></li><li class="L2"><code class="language-python"><span class="kwd">import</span><span class="pln"> logging</span></code></li><li class="L3"><code class="language-python"><span class="kwd">import</span><span class="pln"> pdb</span></code></li><li class="L4"><code class="language-python"></code></li><li class="L5"><code class="language-python"><span class="pln">logging</span><span class="pun">.</span><span class="pln">basicConfig</span><span class="pun">(</span></code></li><li class="L6"><code class="language-python"><span class="pln"> format</span><span class="pun">=</span><span class="str">'[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s'</span><span class="pun">,</span><span class="pln"> </span><span class="pun">)</span></code></li><li class="L7"><code class="language-python"><span class="pln">logger </span><span class="pun">=</span><span class="pln"> logging</span><span class="pun">.</span><span class="pln">getLogger</span><span class="pun">(</span><span class="str">'paddle'</span><span class="pun">)</span></code></li><li class="L8"><code class="language-python"><span class="pln">logger</span><span class="pun">.</span><span class="pln">setLevel</span><span class="pun">(</span><span class="pln">logging</span><span class="pun">.</span><span class="pln">INFO</span><span class="pun">)</span></code></li><li class="L9"><code class="language-python"></code></li><li class="L0"><code class="language-python"><span class="pln">N </span><span class="pun">=</span><span class="pln"> </span><span class="lit">5</span><span class="pln"> </span><span class="com"># Ngram</span></code></li><li class="L1"><code class="language-python"><span class="pln">cutoff </span><span class="pun">=</span><span class="pln"> </span><span class="lit">50</span><span class="pln"> </span><span class="com"># select words with frequency &gt; cutoff to dictionary</span></code></li><li class="L2"><code class="language-python"><span class="kwd">def</span><span class="pln"> build_dict</span><span class="pun">(</span><span class="pln">ftrain</span><span class="pun">,</span><span class="pln"> fdict</span><span class="pun">):</span></code></li><li class="L3"><code class="language-python"><span class="pln"> sentences </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[]</span></code></li><li class="L4"><code class="language-python"><span class="pln"> </span><span class="kwd">with</span><span class="pln"> open</span><span class="pun">(</span><span class="pln">ftrain</span><span class="pun">)</span><span class="pln"> </span><span class="kwd">as</span><span class="pln"> fin</span><span class="pun">:</span></code></li><li class="L5"><code class="language-python"><span class="pln"> </span><span class="kwd">for</span><span class="pln"> line </span><span class="kwd">in</span><span class="pln"> fin</span><span class="pun">:</span></code></li><li class="L6"><code class="language-python"><span class="pln"> line </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[</span><span class="str">'&lt;s&gt;'</span><span class="pun">]</span><span class="pln"> </span><span class="pun">+</span><span class="pln"> line</span><span class="pun">.</span><span class="pln">strip</span><span class="pun">().</span><span class="pln">split</span><span class="pun">()</span><span class="pln"> </span><span class="pun">+</span><span class="pln"> </span><span class="pun">[</span><span class="str">'&lt;e&gt;'</span><span class="pun">]</span></code></li><li class="L7"><code class="language-python"><span class="pln"> sentences </span><span class="pun">+=</span><span class="pln"> line</span></code></li><li class="L8"><code class="language-python"><span class="pln"> wordfreq </span><span class="pun">=</span><span class="pln"> collections</span><span class="pun">.</span><span class="typ">Counter</span><span class="pun">(</span><span class="pln">sentences</span><span class="pun">)</span></code></li><li class="L9"><code class="language-python"><span class="pln"> wordfreq </span><span class="pun">=</span><span class="pln"> filter</span><span class="pun">(</span><span class="kwd">lambda</span><span class="pln"> x</span><span class="pun">:</span><span class="pln"> x</span><span class="pun">[</span><span class="lit">1</span><span class="pun">]</span><span class="pln"> </span><span class="pun">&gt;</span><span class="pln"> cutoff</span><span class="pun">,</span><span class="pln"> wordfreq</span><span class="pun">.</span><span class="pln">items</span><span class="pun">())</span></code></li><li class="L0"><code class="language-python"><span class="pln"> dictionary </span><span class="pun">=</span><span class="pln"> sorted</span><span class="pun">(</span><span class="pln">wordfreq</span><span class="pun">,</span><span class="pln"> key </span><span class="pun">=</span><span class="pln"> </span><span class="kwd">lambda</span><span class="pln"> x</span><span class="pun">:</span><span class="pln"> </span><span class="pun">(-</span><span class="pln">x</span><span class="pun">[</span><span class="lit">1</span><span class="pun">],</span><span class="pln"> x</span><span class="pun">[</span><span class="lit">0</span><span class="pun">]))</span></code></li><li class="L1"><code class="language-python"><span class="pln"> words</span><span class="pun">,</span><span class="pln"> _ </span><span class="pun">=</span><span class="pln"> list</span><span class="pun">(</span><span class="pln">zip</span><span class="pun">(*</span><span class="pln">dictionary</span><span class="pun">))</span></code></li><li class="L2"><code class="language-python"><span class="pln"> </span><span class="kwd">for</span><span class="pln"> word </span><span class="kwd">in</span><span class="pln"> words</span><span class="pun">:</span></code></li><li class="L3"><code class="language-python"><span class="pln"> </span><span class="kwd">print</span><span class="pln"> </span><span class="pun">&gt;&gt;</span><span class="pln"> fdict</span><span class="pun">,</span><span class="pln"> word</span></code></li><li class="L4"><code class="language-python"><span class="pln"> word_idx </span><span class="pun">=</span><span class="pln"> dict</span><span class="pun">(</span><span class="pln">zip</span><span class="pun">(</span><span class="pln">words</span><span class="pun">,</span><span class="pln"> xrange</span><span class="pun">(</span><span class="pln">len</span><span class="pun">(</span><span class="pln">words</span><span class="pun">))))</span></code></li><li class="L5"><code class="language-python"><span class="pln"> logger</span><span class="pun">.</span><span class="pln">info</span><span class="pun">(</span><span class="str">"Dictionary size=%s"</span><span class="pln"> </span><span class="pun">%</span><span class="pln">len</span><span class="pun">(</span><span class="pln">words</span><span class="pun">))</span></code></li><li class="L6"><code class="language-python"><span class="pln"> </span><span class="kwd">return</span><span class="pln"> word_idx</span></code></li><li class="L7"><code class="language-python"></code></li><li class="L8"><code class="language-python"><span class="kwd">def</span><span class="pln"> initializer</span><span class="pun">(</span><span class="pln">settings</span><span class="pun">,</span><span class="pln"> srcText</span><span class="pun">,</span><span class="pln"> dictfile</span><span class="pun">,</span><span class="pln"> </span><span class="pun">**</span><span class="pln">xargs</span><span class="pun">):</span></code></li><li class="L9"><code class="language-python"><span class="pln"> </span><span class="kwd">with</span><span class="pln"> open</span><span class="pun">(</span><span class="pln">dictfile</span><span class="pun">,</span><span class="pln"> </span><span class="str">'w'</span><span class="pun">)</span><span class="pln"> </span><span class="kwd">as</span><span class="pln"> fdict</span><span class="pun">:</span></code></li><li class="L0"><code class="language-python"><span class="pln"> settings</span><span class="pun">.</span><span class="pln">dicts </span><span class="pun">=</span><span class="pln"> build_dict</span><span class="pun">(</span><span class="pln">srcText</span><span class="pun">,</span><span class="pln"> fdict</span><span class="pun">)</span></code></li><li class="L1"><code class="language-python"><span class="pln"> input_types </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[]</span></code></li><li class="L2"><code class="language-python"><span class="pln"> </span><span class="kwd">for</span><span class="pln"> i </span><span class="kwd">in</span><span class="pln"> xrange</span><span class="pun">(</span><span class="pln">N</span><span class="pun">):</span></code></li><li class="L3"><code class="language-python"><span class="pln"> input_types</span><span class="pun">.</span><span class="pln">append</span><span class="pun">(</span><span class="pln">integer_value</span><span class="pun">(</span><span class="pln">len</span><span class="pun">(</span><span class="pln">settings</span><span class="pun">.</span><span class="pln">dicts</span><span class="pun">)))</span></code></li><li class="L4"><code class="language-python"><span class="pln"> settings</span><span class="pun">.</span><span class="pln">input_types </span><span class="pun">=</span><span class="pln"> input_types</span></code></li></ol></pre></li>
<li><p>使用process函数中将数据逐一提供给PaddlePaddle。具体来说,将每句话前面补上N-1个开始符号 <code>&lt;s&gt;</code>, 末尾补上一个结束符号 <code>&lt;e&gt;</code>,然后以N为窗口大小,从头到尾每次向右滑动窗口并生成一条数据。</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"><span class="lit">@provider</span><span class="pun">(</span><span class="pln">init_hook</span><span class="pun">=</span><span class="pln">initializer</span><span class="pun">)</span></code></li><li class="L1"><code class="language-python"><span class="kwd">def</span><span class="pln"> process</span><span class="pun">(</span><span class="pln">settings</span><span class="pun">,</span><span class="pln"> filename</span><span class="pun">):</span></code></li><li class="L2"><code class="language-python"><span class="pln"> UNKID </span><span class="pun">=</span><span class="pln"> settings</span><span class="pun">.</span><span class="pln">dicts</span><span class="pun">[</span><span class="str">'&lt;unk&gt;'</span><span class="pun">]</span></code></li><li class="L3"><code class="language-python"><span class="pln"> </span><span class="kwd">with</span><span class="pln"> open</span><span class="pun">(</span><span class="pln">filename</span><span class="pun">)</span><span class="pln"> </span><span class="kwd">as</span><span class="pln"> fin</span><span class="pun">:</span></code></li><li class="L4"><code class="language-python"><span class="pln"> </span><span class="kwd">for</span><span class="pln"> line </span><span class="kwd">in</span><span class="pln"> fin</span><span class="pun">:</span></code></li><li class="L5"><code class="language-python"><span class="pln"> line </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[</span><span class="str">'&lt;s&gt;'</span><span class="pun">]*(</span><span class="pln">N</span><span class="pun">-</span><span class="lit">1</span><span class="pun">)</span><span class="pln"> </span><span class="pun">+</span><span class="pln"> line</span><span class="pun">.</span><span class="pln">strip</span><span class="pun">().</span><span class="pln">split</span><span class="pun">()</span><span class="pln"> </span><span class="pun">+</span><span class="pln"> </span><span class="pun">[</span><span class="str">'&lt;e&gt;'</span><span class="pun">]</span></code></li><li class="L6"><code class="language-python"><span class="pln"> line </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[</span><span class="pln">settings</span><span class="pun">.</span><span class="pln">dicts</span><span class="pun">.</span><span class="pln">get</span><span class="pun">(</span><span class="pln">w</span><span class="pun">,</span><span class="pln"> UNKID</span><span class="pun">)</span><span class="pln"> </span><span class="kwd">for</span><span class="pln"> w </span><span class="kwd">in</span><span class="pln"> line</span><span class="pun">]</span></code></li><li class="L7"><code class="language-python"><span class="pln"> </span><span class="kwd">for</span><span class="pln"> i </span><span class="kwd">in</span><span class="pln"> range</span><span class="pun">(</span><span class="pln">N</span><span class="pun">,</span><span class="pln"> len</span><span class="pun">(</span><span class="pln">line</span><span class="pun">)</span><span class="pln"> </span><span class="pun">+</span><span class="pln"> </span><span class="lit">1</span><span class="pun">):</span></code></li><li class="L8"><code class="language-python"><span class="pln"> </span><span class="kwd">yield</span><span class="pln"> line</span><span class="pun">[</span><span class="pln">i</span><span class="pun">-</span><span class="pln">N</span><span class="pun">:</span><span class="pln"> i</span><span class="pun">]</span></code></li></ol></pre>
<p>如"I have a dream" 一句提供了5条数据:</p>
<blockquote class="white-blockquote">
<p><code>&lt;s&gt; &lt;s&gt; &lt;s&gt; &lt;s&gt; I</code><br> <br>
<code>&lt;s&gt; &lt;s&gt; &lt;s&gt; I have</code><br> <br>
<code>&lt;s&gt; &lt;s&gt; I have a</code><br> <br>
<code>&lt;s&gt; I have a dream</code><br> <br>
<code>I have a dream &lt;e&gt;</code><br></p>
</blockquote></li>
</ol><div class="md-section-divider"></div><h2 data-anchor-id="zswv" id="模型配置说明">模型配置说明</h2><div class="md-section-divider"></div><h3 data-anchor-id="nsvl" id="数据定义">数据定义</h3><p data-anchor-id="f0ft">通过<code>define_py_data_sources2</code>函数从dataprovider中读入数据,其中args指定了训练文本(srcText)和词汇表(dictfile)。</p><pre data-anchor-id="5itu"><code>from paddle.trainer_config_helpers import *
import math
args = {'srcText': 'data/simple-examples/data/ptb.train.txt',
'dictfile': 'data/vocabulary.txt'}
define_py_data_sources2(
train_list="data/train.list",
test_list="data/test.list",
module="dataprovider",
obj="process",
args=args)
</code></pre><div class="md-section-divider"></div><h3 data-anchor-id="r5c0" id="算法配置">算法配置</h3><p data-anchor-id="lu7d">在这里,我们指定了模型的训练参数, L2正则项系数、学习率和batch size。</p><pre data-anchor-id="tav4"><code>settings(
batch_size=100, regularization=L2Regularization(8e-4), learning_rate=3e-3)
</code></pre><div class="md-section-divider"></div><h3 data-anchor-id="vp4s" id="模型结构">模型结构</h3><p data-anchor-id="kt63">本配置的模型结构如下图所示:</p><p align="center" data-anchor-id="f73k">
<img src="https://raw.githubusercontent.com/PaddlePaddle/book/develop/word2vec/image/ngram.png" width="400"><br>
图5. 模型配置中的N-gram神经网络模型
</p><ol data-anchor-id="blyd">
<li><p>定义参数维度和和数据输入。</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">dictsize </span><span class="pun">=</span><span class="pln"> </span><span class="lit">1953</span><span class="pln"> </span><span class="com"># 字典大小</span></code></li><li class="L1"><code class="language-python"><span class="pln">embsize </span><span class="pun">=</span><span class="pln"> </span><span class="lit">32</span><span class="pln"> </span><span class="com"># 词向量维度</span></code></li><li class="L2"><code class="language-python"><span class="pln">hiddensize </span><span class="pun">=</span><span class="pln"> </span><span class="lit">256</span><span class="pln"> </span><span class="com"># 隐层维度</span></code></li><li class="L3"><code class="language-python"></code></li><li class="L4"><code class="language-python"><span class="pln">firstword </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="pln">name </span><span class="pun">=</span><span class="pln"> </span><span class="str">"firstw"</span><span class="pun">,</span><span class="pln"> size </span><span class="pun">=</span><span class="pln"> dictsize</span><span class="pun">)</span></code></li><li class="L5"><code class="language-python"><span class="pln">secondword </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="pln">name </span><span class="pun">=</span><span class="pln"> </span><span class="str">"secondw"</span><span class="pun">,</span><span class="pln"> size </span><span class="pun">=</span><span class="pln"> dictsize</span><span class="pun">)</span></code></li><li class="L6"><code class="language-python"><span class="pln">thirdword </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="pln">name </span><span class="pun">=</span><span class="pln"> </span><span class="str">"thirdw"</span><span class="pun">,</span><span class="pln"> size </span><span class="pun">=</span><span class="pln"> dictsize</span><span class="pun">)</span></code></li><li class="L7"><code class="language-python"><span class="pln">fourthword </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="pln">name </span><span class="pun">=</span><span class="pln"> </span><span class="str">"fourthw"</span><span class="pun">,</span><span class="pln"> size </span><span class="pun">=</span><span class="pln"> dictsize</span><span class="pun">)</span></code></li><li class="L8"><code class="language-python"><span class="pln">nextword </span><span class="pun">=</span><span class="pln"> data_layer</span><span class="pun">(</span><span class="pln">name </span><span class="pun">=</span><span class="pln"> </span><span class="str">"fifthw"</span><span class="pun">,</span><span class="pln"> size </span><span class="pun">=</span><span class="pln"> dictsize</span><span class="pun">)</span></code></li></ol></pre></li>
<li><p><span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-177-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 1072.119101398937 642.5886520702876" style="width: 2.432ex; height: 1.506ex; vertical-align: -0.463ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74" x="1013" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-177">w_t</script>之前的<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-178-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -687.0516853480245 2323.9444444444443 719.103370696049" style="width: 5.444ex; height: 1.622ex; vertical-align: -0.116ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="822" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1823" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-178">n-1</script>个词 <span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-179-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.0516853480245 6708.784266995979 692.7932335345324" style="width: 15.637ex; height: 1.622ex; vertical-align: -0.579ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><g transform="translate(716,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-6E" x="1140" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2B" x="1740" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="2519" y="0"></use></g><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="2951" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="3396" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="3841" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2E" x="4287" y="0"></use><g transform="translate(4732,0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-77"></use><g transform="translate(716,-150)"><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1140" y="0"></use></g></g></g></svg></span><script type="math/tex" id="MathJax-Element-179">w_{t-n+1},...w_{t-1}</script>,通过<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-180-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 3377.9444444444443 1041.103370696049" style="width: 7.876ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-56" x="278" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C" x="1048" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-D7" x="1548" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-44" x="2549" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-180">|V|\times D</script>的矩阵映射到D维词向量(本例中取D=32)。</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"><span class="kwd">def</span><span class="pln"> wordemb</span><span class="pun">(</span><span class="pln">inlayer</span><span class="pun">):</span></code></li><li class="L1"><code class="language-python"><span class="pln"> wordemb </span><span class="pun">=</span><span class="pln"> table_projection</span><span class="pun">(</span></code></li><li class="L2"><code class="language-python"><span class="pln"> input </span><span class="pun">=</span><span class="pln"> inlayer</span><span class="pun">,</span></code></li><li class="L3"><code class="language-python"><span class="pln"> size </span><span class="pun">=</span><span class="pln"> embsize</span><span class="pun">,</span></code></li><li class="L4"><code class="language-python"><span class="pln"> param_attr</span><span class="pun">=</span><span class="typ">ParamAttr</span><span class="pun">(</span><span class="pln">name </span><span class="pun">=</span><span class="pln"> </span><span class="str">"_proj"</span><span class="pun">,</span></code></li><li class="L5"><code class="language-python"><span class="pln"> initial_std</span><span class="pun">=</span><span class="lit">0.001</span><span class="pun">,</span><span class="pln"> </span><span class="com"># 参数初始化标准差</span></code></li><li class="L6"><code class="language-python"><span class="pln"> l2_rate</span><span class="pun">=</span><span class="pln"> </span><span class="lit">0</span><span class="pun">,))</span><span class="pln"> </span><span class="com"># 词向量不需要稀疏化,因此其l2_rate设为0</span></code></li><li class="L7"><code class="language-python"><span class="kwd">return</span><span class="pln"> wordemb</span></code></li><li class="L8"><code class="language-python"></code></li><li class="L9"><code class="language-python"><span class="typ">Efirst</span><span class="pln"> </span><span class="pun">=</span><span class="pln"> wordemb</span><span class="pun">(</span><span class="pln">firstword</span><span class="pun">)</span></code></li><li class="L0"><code class="language-python"><span class="typ">Esecond</span><span class="pln"> </span><span class="pun">=</span><span class="pln"> wordemb</span><span class="pun">(</span><span class="pln">secondword</span><span class="pun">)</span></code></li><li class="L1"><code class="language-python"><span class="typ">Ethird</span><span class="pln"> </span><span class="pun">=</span><span class="pln"> wordemb</span><span class="pun">(</span><span class="pln">thirdword</span><span class="pun">)</span></code></li><li class="L2"><code class="language-python"><span class="typ">Efourth</span><span class="pln"> </span><span class="pun">=</span><span class="pln"> wordemb</span><span class="pun">(</span><span class="pln">fourthword</span><span class="pun">)</span></code></li></ol></pre></li>
<li><p>接着,将这n-1个词向量经过concat_layer连接成一个大向量作为历史文本特征。</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">contextemb </span><span class="pun">=</span><span class="pln"> concat_layer</span><span class="pun">(</span><span class="pln">input </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[</span><span class="typ">Efirst</span><span class="pun">,</span><span class="pln"> </span><span class="typ">Esecond</span><span class="pun">,</span><span class="pln"> </span><span class="typ">Ethird</span><span class="pun">,</span><span class="pln"> </span><span class="typ">Efourth</span><span class="pun">])</span></code></li></ol></pre></li>
<li><p>然后,将历史文本特征经过一个全连接得到文本隐层特征。</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">hidden1 </span><span class="pun">=</span><span class="pln"> fc_layer</span><span class="pun">(</span></code></li><li class="L1"><code class="language-python"><span class="pln"> input </span><span class="pun">=</span><span class="pln"> contextemb</span><span class="pun">,</span></code></li><li class="L2"><code class="language-python"><span class="pln"> size </span><span class="pun">=</span><span class="pln"> hiddensize</span><span class="pun">,</span></code></li><li class="L3"><code class="language-python"><span class="pln"> act </span><span class="pun">=</span><span class="pln"> </span><span class="typ">SigmoidActivation</span><span class="pun">(),</span></code></li><li class="L4"><code class="language-python"><span class="pln"> layer_attr </span><span class="pun">=</span><span class="pln"> </span><span class="typ">ExtraAttr</span><span class="pun">(</span><span class="pln">drop_rate</span><span class="pun">=</span><span class="lit">0.5</span><span class="pun">),</span></code></li><li class="L5"><code class="language-python"><span class="pln"> bias_attr </span><span class="pun">=</span><span class="pln"> </span><span class="typ">ParamAttr</span><span class="pun">(</span><span class="pln">learning_rate </span><span class="pun">=</span><span class="pln"> </span><span class="lit">2</span><span class="pun">),</span></code></li><li class="L6"><code class="language-python"><span class="pln"> param_attr </span><span class="pun">=</span><span class="pln"> </span><span class="typ">ParamAttr</span><span class="pun">(</span></code></li><li class="L7"><code class="language-python"><span class="pln"> initial_std </span><span class="pun">=</span><span class="pln"> </span><span class="lit">1.</span><span class="pun">/</span><span class="pln">math</span><span class="pun">.</span><span class="pln">sqrt</span><span class="pun">(</span><span class="pln">embsize</span><span class="pun">*</span><span class="lit">8</span><span class="pun">),</span></code></li><li class="L8"><code class="language-python"><span class="pln"> learning_rate </span><span class="pun">=</span><span class="pln"> </span><span class="lit">1</span><span class="pun">))</span></code></li></ol></pre></li>
<li><p>最后,将文本隐层特征,再经过一个全连接,映射成一个<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-181-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 1326.5 1041.103370696049" style="width: 3.127ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMATHI-56" x="278" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-7C" x="1048" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-181">|V|</script>维向量,同时通过softmax归一化得到这<code>|V|</code>个词的生成概率。</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"></code></li><li class="L1"><code class="language-python"><span class="com"># use context embedding to predict nextword</span></code></li><li class="L2"><code class="language-python"></code></li><li class="L3"><code class="language-python"><span class="pln">predictword </span><span class="pun">=</span><span class="pln"> fc_layer</span><span class="pun">(</span></code></li><li class="L4"><code class="language-python"><span class="pln"> input </span><span class="pun">=</span><span class="pln"> hidden1</span><span class="pun">,</span></code></li><li class="L5"><code class="language-python"><span class="pln"> size </span><span class="pun">=</span><span class="pln"> dictsize</span><span class="pun">,</span></code></li><li class="L6"><code class="language-python"><span class="pln"> bias_attr </span><span class="pun">=</span><span class="pln"> </span><span class="typ">ParamAttr</span><span class="pun">(</span><span class="pln">learning_rate </span><span class="pun">=</span><span class="pln"> </span><span class="lit">2</span><span class="pun">),</span></code></li><li class="L7"><code class="language-python"><span class="pln"> act </span><span class="pun">=</span><span class="pln"> </span><span class="typ">SoftmaxActivation</span><span class="pun">())</span></code></li></ol></pre></li>
<li><p>网络的损失函数为多分类交叉熵,可直接调用<code>classification_cost</code>函数。</p>
<pre class="prettyprint linenums prettyprinted"><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">cost </span><span class="pun">=</span><span class="pln"> classification_cost</span><span class="pun">(</span></code></li><li class="L1"><code class="language-python"><span class="pln"> input </span><span class="pun">=</span><span class="pln"> predictword</span><span class="pun">,</span></code></li><li class="L2"><code class="language-python"><span class="pln"> label </span><span class="pun">=</span><span class="pln"> nextword</span><span class="pun">)</span></code></li><li class="L3"><code class="language-python"></code></li><li class="L4"><code class="language-python"><span class="com"># network input and output</span></code></li><li class="L5"><code class="language-python"></code></li><li class="L6"><code class="language-python"><span class="pln">outputs</span><span class="pun">(</span><span class="pln">cost</span><span class="pun">)</span></code></li></ol></pre></li>
</ol><div class="md-section-divider"></div><h2 data-anchor-id="dy9a" id="训练模型">训练模型</h2><p data-anchor-id="q1d9">模型训练命令为<code>./train.sh</code>。脚本内容如下,其中指定了总共需要执行30个pass。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="xsww"><ol class="linenums"><li class="L0"><code class="language-bash"><span class="pln">paddle train \</span></code></li><li class="L1"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">config ngram</span><span class="pun">.</span><span class="pln">py \</span></code></li><li class="L2"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">use_gpu</span><span class="pun">=</span><span class="lit">1</span><span class="pln"> \</span></code></li><li class="L3"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">dot_period</span><span class="pun">=</span><span class="lit">100</span><span class="pln"> \</span></code></li><li class="L4"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">log_period</span><span class="pun">=</span><span class="lit">3000</span><span class="pln"> \</span></code></li><li class="L5"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">test_period</span><span class="pun">=</span><span class="lit">0</span><span class="pln"> \</span></code></li><li class="L6"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">save_dir</span><span class="pun">=</span><span class="pln">model \</span></code></li><li class="L7"><code class="language-bash"><span class="pln"> </span><span class="pun">--</span><span class="pln">num_passes</span><span class="pun">=</span><span class="lit">30</span></code></li></ol></pre><p data-anchor-id="wz3h">一个pass的训练日志如下所示:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="q4ci"><ol class="linenums"><li class="L0"><code class="language-text"><span class="pun">.............................</span></code></li><li class="L1"><code class="language-text"><span class="pln">I1222 </span><span class="lit">09</span><span class="pun">:</span><span class="lit">27</span><span class="pun">:</span><span class="lit">16.477841</span><span class="pln"> </span><span class="lit">12590</span><span class="pln"> </span><span class="typ">TrainerInternal</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">162</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Batch</span><span class="pun">=</span><span class="lit">3000</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">300000</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">5.36135</span><span class="pln"> </span><span class="typ">CurrentCost</span><span class="pun">=</span><span class="lit">5.36135</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> classification_error_evaluator</span><span class="pun">=</span><span class="lit">0.818653</span><span class="pln"> </span><span class="typ">CurrentEval</span><span class="pun">:</span><span class="pln"> </span><span class="kwd">class</span></code></li><li class="L2"><code class="language-text"><span class="pln">ification_error_evaluator</span><span class="pun">=</span><span class="lit">0.818653</span><span class="pln"> </span></code></li><li class="L3"><code class="language-text"><span class="pun">.............................</span></code></li><li class="L4"><code class="language-text"><span class="pln">I1222 </span><span class="lit">09</span><span class="pun">:</span><span class="lit">27</span><span class="pun">:</span><span class="lit">22.416700</span><span class="pln"> </span><span class="lit">12590</span><span class="pln"> </span><span class="typ">TrainerInternal</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">162</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Batch</span><span class="pun">=</span><span class="lit">6000</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">600000</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">5.29301</span><span class="pln"> </span><span class="typ">CurrentCost</span><span class="pun">=</span><span class="lit">5.22467</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> classification_error_evaluator</span><span class="pun">=</span><span class="lit">0.814542</span><span class="pln"> </span><span class="typ">CurrentEval</span><span class="pun">:</span><span class="pln"> </span><span class="kwd">class</span></code></li><li class="L5"><code class="language-text"><span class="pln">ification_error_evaluator</span><span class="pun">=</span><span class="lit">0.81043</span><span class="pln"> </span></code></li><li class="L6"><code class="language-text"><span class="pun">.............................</span></code></li><li class="L7"><code class="language-text"><span class="pln">I1222 </span><span class="lit">09</span><span class="pun">:</span><span class="lit">27</span><span class="pun">:</span><span class="lit">28.343756</span><span class="pln"> </span><span class="lit">12590</span><span class="pln"> </span><span class="typ">TrainerInternal</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">162</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Batch</span><span class="pun">=</span><span class="lit">9000</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">900000</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">5.22494</span><span class="pln"> </span><span class="typ">CurrentCost</span><span class="pun">=</span><span class="lit">5.08876</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> classification_error_evaluator</span><span class="pun">=</span><span class="lit">0.810088</span><span class="pln"> </span><span class="typ">CurrentEval</span><span class="pun">:</span><span class="pln"> </span><span class="kwd">class</span></code></li><li class="L8"><code class="language-text"><span class="pln">ification_error_evaluator</span><span class="pun">=</span><span class="lit">0.80118</span><span class="pln"> </span></code></li><li class="L9"><code class="language-text"><span class="pun">..</span><span class="pln">I1222 </span><span class="lit">09</span><span class="pun">:</span><span class="lit">27</span><span class="pun">:</span><span class="lit">29.128582</span><span class="pln"> </span><span class="lit">12590</span><span class="pln"> </span><span class="typ">TrainerInternal</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">179</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Pass</span><span class="pun">=</span><span class="lit">0</span><span class="pln"> </span><span class="typ">Batch</span><span class="pun">=</span><span class="lit">9296</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">929600</span><span class="pln"> </span><span class="typ">AvgCost</span><span class="pun">=</span><span class="lit">5.21786</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> classification_error_evaluator</span><span class="pun">=</span><span class="lit">0.809647</span><span class="pln"> </span></code></li><li class="L0"><code class="language-text"><span class="pln">I1222 </span><span class="lit">09</span><span class="pun">:</span><span class="lit">27</span><span class="pun">:</span><span class="lit">29.627616</span><span class="pln"> </span><span class="lit">12590</span><span class="pln"> </span><span class="typ">Tester</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">111</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Test</span><span class="pln"> samples</span><span class="pun">=</span><span class="lit">73760</span><span class="pln"> cost</span><span class="pun">=</span><span class="lit">4.9594</span><span class="pln"> </span><span class="typ">Eval</span><span class="pun">:</span><span class="pln"> classification_error_evaluator</span><span class="pun">=</span><span class="lit">0.79676</span><span class="pln"> </span></code></li><li class="L1"><code class="language-text"><span class="pln">I1222 </span><span class="lit">09</span><span class="pun">:</span><span class="lit">27</span><span class="pun">:</span><span class="lit">29.627713</span><span class="pln"> </span><span class="lit">12590</span><span class="pln"> </span><span class="typ">GradientMachine</span><span class="pun">.</span><span class="pln">cpp</span><span class="pun">:</span><span class="lit">112</span><span class="pun">]</span><span class="pln"> </span><span class="typ">Saving</span><span class="pln"> parameters to model</span><span class="pun">/</span><span class="kwd">pass</span><span class="pun">-</span><span class="lit">00000</span></code></li></ol></pre><p data-anchor-id="c8a8">经过30个pass,我们将得到平均错误率为classification_error_evaluator=0.735611。</p><div class="md-section-divider"></div><h2 data-anchor-id="yxh4" id="应用模型">应用模型</h2><p data-anchor-id="yl22">训练模型后,我们可以加载模型参数,用训练出来的词向量初始化其他模型,也可以将模型参数从二进制格式转换成文本格式进行后续应用。</p><div class="md-section-divider"></div><h3 data-anchor-id="cs4d" id="初始化其他模型">初始化其他模型</h3><p data-anchor-id="6o6n">训练好的模型参数可以用来初始化其他模型。具体方法如下: <br>
在PaddlePaddle 训练命令行中,用<code>--init_model_path</code> 来定义初始化模型的位置,用<code>--load_missing_parameter_strategy</code>指定除了词向量以外的新模型其他参数的初始化策略。注意,新模型需要和原模型共享被初始化参数的参数名。</p><div class="md-section-divider"></div><h3 data-anchor-id="30rz" id="查看词向量">查看词向量</h3><p data-anchor-id="6uwn">PaddlePaddle训练出来的参数为二进制格式,存储在对应训练pass的文件夹下。这里我们提供了文件<code>format_convert.py</code>用来互转PaddlePaddle训练结果的二进制文件和文本格式特征文件。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="5lc2"><ol class="linenums"><li class="L0"><code class="language-bash"><span class="pln">python format_convert</span><span class="pun">.</span><span class="pln">py </span><span class="pun">--</span><span class="pln">b2t </span><span class="pun">-</span><span class="pln">i INPUT </span><span class="pun">-</span><span class="pln">o OUTPUT </span><span class="pun">-</span><span class="pln">d DIM</span></code></li></ol></pre><p data-anchor-id="lloy">其中,INPUT是输入的(二进制)词向量模型名称,OUTPUT是输出的文本模型名称,DIM是词向量参数维度。</p><p data-anchor-id="whak">用法如:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="dxfg"><ol class="linenums"><li class="L0"><code class="language-bash"><span class="pln">python format_convert</span><span class="pun">.</span><span class="pln">py </span><span class="pun">--</span><span class="pln">b2t </span><span class="pun">-</span><span class="pln">i model</span><span class="pun">/</span><span class="pln">pass</span><span class="pun">-</span><span class="lit">00029</span><span class="pun">/</span><span class="pln">_proj </span><span class="pun">-</span><span class="pln">o model</span><span class="pun">/</span><span class="pln">pass</span><span class="pun">-</span><span class="lit">00029</span><span class="pun">/</span><span class="pln">_proj</span><span class="pun">.</span><span class="pln">txt </span><span class="pun">-</span><span class="pln">d </span><span class="lit">32</span></code></li></ol></pre><p data-anchor-id="5lbk">转换后得到的文本文件如下:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="x602"><ol class="linenums"><li class="L0"><code class="language-text"><span class="lit">0</span><span class="pun">,</span><span class="lit">4</span><span class="pun">,</span><span class="lit">62496</span></code></li><li class="L1"><code class="language-text"><span class="pun">-</span><span class="lit">0.7444070</span><span class="pun">,-</span><span class="lit">0.1846171</span><span class="pun">,-</span><span class="lit">1.5771370</span><span class="pun">,</span><span class="lit">0.7070392</span><span class="pun">,</span><span class="lit">2.1963732</span><span class="pun">,-</span><span class="lit">0.0091410</span><span class="pun">,</span><span class="pln"> </span><span class="pun">......</span></code></li><li class="L2"><code class="language-text"><span class="pun">-</span><span class="lit">0.0721337</span><span class="pun">,-</span><span class="lit">0.2429973</span><span class="pun">,-</span><span class="lit">0.0606297</span><span class="pun">,</span><span class="lit">0.1882059</span><span class="pun">,-</span><span class="lit">0.2072131</span><span class="pun">,-</span><span class="lit">0.7661019</span><span class="pun">,</span><span class="pln"> </span><span class="pun">......</span></code></li><li class="L3"><code class="language-text"><span class="pun">......</span></code></li></ol></pre><p data-anchor-id="bwj3">其中,第一行是PaddlePaddle 输出文件的格式说明,包含3个属性:<br> <br>
1) PaddlePaddle的版本号,本例中为0;<br> <br>
2) 浮点数占用的字节数,本例中为4;<br> <br>
3) 总计的参数个数, 本例中为62496(即1953*32);<br> <br>
第二行及之后的每一行都按顺序表示字典里一个词的特征,用逗号分隔。</p><div class="md-section-divider"></div><h3 data-anchor-id="erwq" id="修改词向量">修改词向量</h3><p data-anchor-id="0r1s">我们可以对词向量进行修改,并转换成PaddlePaddle参数二进制格式,方法: </p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="ap7e"><ol class="linenums"><li class="L0"><code class="language-bash"><span class="pln">python format_convert</span><span class="pun">.</span><span class="pln">py </span><span class="pun">--</span><span class="pln">t2b </span><span class="pun">-</span><span class="pln">i INPUT </span><span class="pun">-</span><span class="pln">o OUTPUT</span></code></li></ol></pre><p data-anchor-id="gekd">其中,INPUT是输入的输入的文本词向量模型名称,OUTPUT是输出的二进制词向量模型名称</p><p data-anchor-id="u3bh">输入的文本格式如下(注意,不包含上面二进制转文本后第一行的格式说明):</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="oi6u"><ol class="linenums"><li class="L0"><code class="language-text"><span class="pun">-</span><span class="lit">0.7444070</span><span class="pun">,-</span><span class="lit">0.1846171</span><span class="pun">,-</span><span class="lit">1.5771370</span><span class="pun">,</span><span class="lit">0.7070392</span><span class="pun">,</span><span class="lit">2.1963732</span><span class="pun">,-</span><span class="lit">0.0091410</span><span class="pun">,</span><span class="pln"> </span><span class="pun">......</span></code></li><li class="L1"><code class="language-text"><span class="pun">-</span><span class="lit">0.0721337</span><span class="pun">,-</span><span class="lit">0.2429973</span><span class="pun">,-</span><span class="lit">0.0606297</span><span class="pun">,</span><span class="lit">0.1882059</span><span class="pun">,-</span><span class="lit">0.2072131</span><span class="pun">,-</span><span class="lit">0.7661019</span><span class="pun">,</span><span class="pln"> </span><span class="pun">......</span></code></li><li class="L2"><code class="language-text"><span class="pun">......</span></code></li></ol></pre><div class="md-section-divider"></div><h3 data-anchor-id="nhld" id="计算词语之间的余弦距离">计算词语之间的余弦距离</h3><p data-anchor-id="xy8a">两个向量之间的距离可以用余弦值来表示,余弦值在<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-48-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -771.0516853480245 2781.666666666667 1042.103370696049" style="width: 6.486ex; height: 2.432ex; vertical-align: -0.695ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5B"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2212" x="278" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="1057" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-2C" x="1557" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-31" x="2002" y="0"></use><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#MJMAIN-5D" x="2503" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-48">[-1,1]</script>的区间内,向量间余弦值越大,其距离越近。这里我们在<code>calculate_dis.py</code>中实现不同词语的距离度量。 <br>
用法如下:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="gro8"><ol class="linenums"><li class="L0"><code class="language-bash"><span class="pln">python calculate_dis</span><span class="pun">.</span><span class="pln">py VOCABULARY EMBEDDINGLAYER</span><span class="str">` </span></code></li></ol></pre><p data-anchor-id="76pm">其中,<code>VOCABULARY</code>是字典,<code>EMBEDDINGLAYER</code>是词向量模型,示例如下:</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="oz8x"><ol class="linenums"><li class="L0"><code class="language-bash"><span class="pln">python calculate_dis</span><span class="pun">.</span><span class="pln">py data</span><span class="pun">/</span><span class="pln">vocabulary</span><span class="pun">.</span><span class="pln">txt model</span><span class="pun">/</span><span class="pln">pass</span><span class="pun">-</span><span class="lit">00029</span><span class="pun">/</span><span class="pln">_proj</span><span class="pun">.</span><span class="pln">txt</span></code></li></ol></pre><div class="md-section-divider"></div><h2 data-anchor-id="dwdp" id="总结">总结</h2><p data-anchor-id="n608">本章中,我们介绍了词向量、语言模型和词向量的关系、以及如何通过训练神经网络模型获得词向量。在信息检索中,我们可以根据向量间的余弦夹角,来判断query和文档关键词这二者间的相关性。在句法分析和语义分析中,训练好的词向量可以用来初始化模型,以得到更好的效果。在文档分类中,有了词向量之后,可以用聚类的方法将文档中同义词进行分组。希望大家在本章后能够自行运用词向量进行相关领域的研究。</p><div class="md-section-divider"></div><h2 data-anchor-id="y44m" id="参考文献">参考文献</h2><ol data-anchor-id="y9fc">
<li>Bengio Y, Ducharme R, Vincent P, et al. <a href="http://www.jmlr.org/papers/volume3/bengio03a/bengio03a.pdf" target="_blank">A neural probabilistic language model</a>[J]. journal of machine learning research, 2003, 3(Feb): 1137-1155.</li>
<li>Mikolov T, Kombrink S, Deoras A, et al. <a href="http://www.fit.vutbr.cz/~imikolov/rnnlm/rnnlm-demo.pdf" target="_blank">Rnnlm-recurrent neural network language modeling toolkit</a>[C]//Proc. of the 2011 ASRU Workshop. 2011: 196-201.</li>
<li>Mikolov T, Chen K, Corrado G, et al. <a href="https://arxiv.org/pdf/1301.3781.pdf" target="_blank">Efficient estimation of word representations in vector space</a>[J]. arXiv preprint arXiv:1301.3781, 2013.</li>
<li>Maaten L, Hinton G. <a href="https://lvdmaaten.github.io/publications/papers/JMLR_2008.pdf" target="_blank">Visualizing data using t-SNE</a>[J]. Journal of Machine Learning Research, 2008, 9(Nov): 2579-2605.</li>
<li><a href="https://en.wikipedia.org/wiki/Singular_value_decomposition" target="_blank">https://en.wikipedia.org/wiki/Singular_value_decomposition</a></li>
</ol><p data-anchor-id="t8qd"><br> <br>
<img src="https://i.creativecommons.org/l/by-nc-sa/4.0/88x31.png" alt="知识共享许可协议"></p><p data-anchor-id="lof9">本教程由<a href="http://book.paddlepaddle.org" target="_blank">PaddlePaddle</a>创作,采用<a href="http://creativecommons.org/licenses/by-nc-sa/4.0/" target="_blank">知识共享 署名-非商业性使用-相同方式共享 4.0 国际 许可协议</a>进行许可。</p></div>
</body>
</html>
......@@ -521,6 +521,7 @@ marked.setOptions({
code = code.replace(/&amp;/g, "&")
code = code.replace(/&gt;/g, ">")
code = code.replace(/&lt;/g, "<")
code = code.replace(/&nbsp;/g, " ")
return hljs.highlightAuto(code, [lang]).value;
}
});
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册