N{K}K0=Qni1M?kwYgWWXfjlmi=*+Gf(&8K1gCq_aKIeovZ`vYS6m_F1Vdvk51vXLdfwSkP*EC^$xBw)G
zZx}@HazxLW!_UV9k~R1nbG4?xC@hes(TW%6Go^;DGr&r^oA~Q|(VsSF`jV+LVfpm8
zmv3-C4$?2~Fu#w9*5FR(^?sV%DvKt3q4aQ94VaCNSP%tsPU=0B%QCR#6g`J3N{7Zy
zfArEbV1e3AhBEvD=shPAlBbf4&UFpU*ndgUF|vL*VU<<%e;nAQ(UH
zAq&pFwPd6y)mr+x+4Tcn2q^W%7ap#hcX%XS+yj!p0oZjvE-UosCzz5fHzq1^t+!?Y
zcQV&jNBd0<>xeoHj{iMK*|#s%fX;Y;Y?YuHFKjmfb{q$~8HON)9na61MD-ZV(S7ni
zA*;`=oWJ>~gqNadILy>4eKZny(tF(5*n|6W$uWS{=-NNif9&>}Sn?DqQH!^4kpq;YS+NLn<&1~-ScH5P<4&m(I%u$6y6`s{syHsm*S
zuzCQIc*IT{EBB`Z`~C9?AGF9PG%}Ss=1Ud~U-2+0D_LXB(lkLB)^p#z=$d46M`UW1noZ^mM>$Op%g&3iY
z1H<7&%)Ceol~zyLgN0V#vCf^ut${pxuUvRG!2zpQG%L$z!ANZVe+2N}|Tb>
z#^1_a0*-3sZ{Pqs%uHL6;w>!38NX*FD8Fbb3snH>yPkF2V~$r82E
z=e!vMxN%ZP9rD}$6pL#|Qz7sMMI*QA1++vz;
zz$Y2N6G%thIQn1f{SXao1dT2b(qGjU%Ia&K^RKdas|7Ar#{I|!gOBfTHYI$ejk{Kb
z6MU^vq^CY%CE|A>jfM`Y9F8jyaCRb2iK@*?YYHHM7;_rhiFvu!frgO~tAsFMSJ
zW9;9tIt<^2>t&Jj&e_82bm$(?pb{=Zsia~P&b7l)Nch`D0>24K<3vO%i(3qXX&snmZ!^{{=`f088B~YbI
zCD2Kkze{QJ-zcYW?)NgpivBM92YTyf<`)~en^FiV^;iB(p;(Aw-e9q0EfGB<5kgUq
zP(#~)RlooJI&td0Big7{c*WO=r+Fp-Bc&E3|9iHWoMhv%n(!f;$_u0f;sEWEa{`(b
z-0!i2bi#-tvka^%j*Yx)Z+MOD?`#e*4siW~*G~7~d6X!rM>tP}$ctZ+SxWejNg#;4h!x$;&ZNH?Qfp&Y%Q8-WmRtj%Wn<590S99)K|_Jmqq@dj
zVY$@eqHHECiEu&6ySVQ$+T#JZMNG`fX_Evxs5-y?n6lLS^+pD~H}~J-rmIG(*VhED
z!7XbE@&QX)wHf^P)Y@awyQMWhbXvXc&s2T;$@>0gYqKb$;@G%GgrM%blsaEEG5k4+
zjrsCot9D$i1~m4E#)c0`7YKOwzEz-=+ISIJz#60*UJJOaDfNr}7DrUuXn0;uXBvLg
z+dwJrDU2YeI%
z7alw@0Fz{IS(yEqC!Fp5Q80}zQo78HL7^|(TJ^y?KrdTX5G^=oA}LH`wzXZxnJAZF
zScbKR0sbjO1%ul?50W11;P!``Ve1d1=9=CUbq(zoAJrK8*^)S@iCV?UH8yMDL|q3R
zdV!m$PK6Z4pcQLhVAHFdfixgmTw9jOorkCiw1DDYV?~nsn_FdR2;*;$2Jy0x5eFiC
zc>=kubJpEA(*J8!#;qXnf7JDg#J4t+I1cZVB%c$?*GJ6(RqVto8I7K*pfZ}%E&TR*
zzVuZJ$94sl_WbU=>RUqLGJ>3N_azh>D{S4FK>1&dj5bE>!JuruMVo_xzV=jVcQTZP
zpSZ$Xe~gVI8n9{D*Ei3?2-AHZNh=G8>)FVIF54b1P{JJc!}wX5pipkshREhm548ll
zVnnDLM6!4DMswj394
z@n|fr-~PYYh*k!|1KCdaT2f|c^RB+YD2u=mMG)%Kv
zJRvO8r1mk0z-MJXq2Hs~L)Sts8;z`qMH6+l?W;#MC?s)n-0SygW|jXhjyR*mG5-9IU+?7LY;tOPfjG4Nk6D)8}9z_AX$rKsn$
z@FNp)How43zpODZSE7>99-!!>9s!;*^V(VBD!b=LYzH<#=d82&`)r*mv&NWuoG<6)
zOX^M5vQ_?m!ot#+XyaOw$bQ`au16D0*}%tS&@qwxNBu30!X;@6AKj3hzLbHtYn0~Q
zu;Ai_;kez5w1GgS(4Z6s^;)@8^EVUCx7WZtvXivfy|5k6#4W3Z{Er2@=X71)UnM&a
zrl~lhWqJnP4o|y=2s^l|baO`cj~9hfc$C`yYi1jVct*eEciWe26&O_=;-MAus%=Q>
z@nSJK1PwKhM=|5;kPCG!hRIuHJ2z!+$G07CF8`c@N#}*`HYnHcm4&9hDJAM3vYqD1
zXL8x7q%`&e5^lR9qH>%;Tmu#`T{=6kd>7lLqsb-i6DHHPG(;K(&zo2IIt4-p%uggu$L>F)w|wnyaUy|iTVD3zd)
zbL1ngJ+5pUpy@k}mP9Qx{b2D~Im>U&?x(afMHfyjpWK|%JDFOf9gDyXo;7rc`Fu>t
zXVtBr*@`h|^jW_h%LZ;z=GcD9o{mymq*obgB+&xPXrsu<>JzA_
zb&p=Tl$`psnGmeNksI8yCFXIkFfaJ^D?Hj@aRQIhSl?_08EOOmzhMcOWrHT%96BU_;)a=$*#x&MIgea?OE+Yf%2bBxdVd_M2j`~7-7pU(&0jlKjr
z?WU9!Qab&mF+fa=F0itMU|N1VwT>AN?_PlTCZW@0TurzysmQOdT$nW!`Y?v#q3E}x
zTc!!#1@trtI{H3=U*EUwT%1(?{HpUFlj`~Mkte58RWF*>b6PKmH;kMvFuvSPd!%9&
zx~>r$GX7*ShljxwL_n?jENZ>vFL|qdXj#yh#g$$^t5Cb#n0X~;z18&)k|Q&@zb6=^
zFJ_R$D9FGge1a7vzTrjc;ZjS2YY<`6c_mhuy5${{Tq6H`0Orc$rgndLroy07YwC%5
zs>&|OQr7;Lp}8#2EdGPBlI~Po`2v$HjbMn=M-{gYGAtJ_RSLOLAKk#v6EJ$@B)$Th
zT7(cARMU)CX%j3Fev6=Z$(B^)G2!Rbg9v@g
zQk?x4;NNe%m0aDCiccvuEFJM)H-th6MHs+Cjclx$W;)w5zR!f94z!
zOZN&NuR#0yjMc>A&oa5qbiW&0cn9exh%Q?RUFP$p5u^>$sgM6dBh5*LK(CQjlhR;Z
zHY_}mjX%oh6S!ya-voV(XRq4x+v^n9ce&E*XyO9j-Xuh4i_?e%F=)0Npim?KL
zCY$!xabTEfw(&Bed0{Ey13uS$p)r>?(Y&p;r1O)vtveacaBhP#no+`aZZsP&F-2MI
zbG=L>^TlhCqQ(3uG*n@ku4hATghRb_Kx$=Fb7aQ#&>(feoFG0LrI`-Bq)H`Hg|ig|r|k#W1b>$1>+*piHP%hj63m9gKieOXyA+dnOBu
zl~_zeo-x=nH844zr!ehYpuzhdP{S>ucDxVxl^ImeBxYDCb#4fF*5}ZvPkTU;#eU;(Obnv=L2pMr1m5qM#Vfu>??4_<;L_Kl`%M5
zAQ=)|b#MDE#BQeJ#yW7AS(cT-Z^(YYDP)DS;-yS*kiq4o<`hZG_qXp+Nn-mv
z)^_sT^7M{36i#kl94^S|=DaD+r6x_mqus{aF%>XC+)qK>@H94woBh}oC&g-Zsp?oN
zEK0UJU((O{XTkjJ-=KbP0k7#UvwG#K$uzr9w-0hRH8i>x-9xrbX;oi_*vHi41*w6RUu=7m0)~+N6(a2l>AE)
z^!px|hA+6CK`q)3sTlI)f%>t=<U@p3qLT=lIU`S5$zFtq&5JJX+EK6w!b7bRE*NcOZjR2IG(ZuKde$2tux%Iu4Y%
z@+9p&GUsV)H^jUb4zp=Q0*Eahgjc`k+~I+*$yP5WJ{RaB6Pmulq=_(7pjnqUt^Mx
z@5)4LDF`xs(ac)u0Sm3bDoQd;;Jzbcj$3Sc;8_`K=@hV+pJux(PX?C5TyCsj36lI@
z2!Z!n78bKrg->~gvg=QJp)h-7#99rC5d=5}!d6;)Kr;jjYkr#%0#r!_A)>Zn8$^Y*
zZ68QdBKiE=VCBX^Q+4wi}oDNo=0
zBY1$hd;?@(fkmySD%f|Q2a$3{2?(QvwG?P4U5IN{dwr_2zQ%+#Mbhk3AOvTeJL6gi
zS934!od;43Pl+kP@EN~5Lb*D|A^n+(
zU1FbS2`CX?_K%|y5WTGAt#wUAHR2U>w(oGMM18nj{*67^Rsf%LRhoL1tISIVC4rD;WkR5G#`e
zMOFy{*mF&wjH-qKe545CD1Rw%Gpm6fundeeP4qfREU63tGTXEppnbtT@25godKqWy
zFoa-UsDtpr#kN!Q$Im8NQ!i4Ff-qA|2x*~l!8bg~>O5ax=9eQvTs>tJ
z;|a@oa?#Dzf$feLfr6a5d#q6cOEf5asHzxYc)7)I{z25$qaC5-wE
zJf@omi3&tm=jT|Ey77h#Y*H@(ft14A0pUG6U+{b$Z_h;dq4U-2wuP|Rl3-Q$0c)$i
zv`9aNl2s!@~@MYS0fAbjJ7CV`d>AWEn$}_^a05h9lPopl*iR
z0Zm9zMIdYQQmlPVpl}y%pF`KSR3JqH5S9o!^Sy2Ld0D0UHy2zw}WNWpJdXB!N~bz;c^ki?N$L)0IDR|2Bw5kk;ID4Xdu
zKa|V#dXjIB4i-Ba24ltAKi}fbt|YK508DSc{k3Xx2m~e9wb|+QCwM^QmR<0AtS|_B04K0J
zqZ+=DY3aNkGVeA98(Dzekf@~3TiBIkrkGQQ}c7+1dT(b1uMJ-
z81(kWKO>^n9Imu&xi=C<$==;>ukl)HY-l61oybPnvaTuvvA{9J@_-14^GNic@B{fQ
zdNUWQW)QFwkW@g28vf@GD$BR$nw
zyI7dQO=}e;-@}3}%ctAr@$tBIKqAe7tLqbpbbX)#kNG;lZ>giOg`1YHw5hk-SmVyL
zY43-H!uzyqElRv~N<_DN(^41taf}p-d8%x6=!M8qBejVG=vgXxoI{oF+fAmU*kzg=WA}v+S>&|E%&j1-A)`d|E2)BVzYw5O
zgQo%DfL6E@MU;pad6@b%^{wL=9hdcRYp&jeFwVG{&6xlieDKE96+kx1RKflljv3A*
zcG(qAe$P~C$vopenhX9GK7`;^uk2#;`7XdQo`w`$pKxHK>wRy1IM*D}xE}MUg=;8o790aJR$F4y^CAxlK8*xS;
z8nbVux2UDY6&wXM%BxG;=UB)n6qDk;{hqEE
z$+m!;J|Ktn(nFcu{M6k5cAi*F!)jKOX}xP3*Y%Mn3A!)o{^E$UBO&L#Y6G?mTy)2Y
zk#cZ@SO7G*`nZaBx~WqZ*n7m_`%U#w-tY#Fh^c?bjWKaL&W8W-G#KD6yNY!>VHAX%
zaBwi`t2x=yfd1DXV(kOjs2`@P`MfahBG7fQFXZaR-ao5em7?Df<#fs2M6r-VM(yab$VL@Us)pUu5oM>?Z~jr@jr{%bUm;YT8q_I~*K
zV48>D;n#`uZzL(*j{{f4i;Ci!!DB0>!RBrWhrg=wu84`_Kd~oIUefau8xg!=HlJ`|
zhD^-_xi-TWFQrr(<7aC;+@UXLN
zXWX~ZhSeQdT9r)v@M-K9YbOER)-t$Dy3%Z^0$~vr594y_KR-y$5-nlYvQrAw@6qWA
zgnUdrHoKDF>6JV{;lhYG*d7p?Ta2NDuZUGnwhFs0t9vEX)f>CoI~-~!DdJdGWhw-c
ztRi2KK~k
zFya$dL9`9t?+)q_NfxEBI6h)N9{6h#0Mt#k&>6y_xW=H5aCmI%^t2B3h=eE-ZCKd^
zn`D|eb7Oi*PdG#AGat2Gv%EKv;kq>+jWRx3$RwyiMw*zRSB$=F>nbZG%yY+4Q^McI
zN$AJ7FAmu6JY?PJ#4*4NKHKbpJEA|KQ}&^1{`|hlw008OhB5DZtzIwm#KA99G&AvN
z3`yffoBfiAxHwH0yJ+?9pG3+E9LF~JO_FGq&!k6VTb1$#BqpAE7F9OV8B#UNm?byQ
zUz=RyPi}AMDSVN!%;V_4;2r1wtg00$a>c>OAm`$LL`LLr*=PRyh*sunW12!>!~bEd
Me?_n8qJ7kV0aV@D-2eap
literal 0
HcmV?d00001
diff --git a/word2vec/image/ngram.png b/word2vec/image/ngram.png
new file mode 100644
index 0000000000000000000000000000000000000000..d137d1d8f8c3940d93ed758cd8a4c0937c495871
GIT binary patch
literal 9327
zcmeI2XH-*Nx9@Eaih?wyLnslkAOs>1DFKN>sDcU>qy@xK#E=J(5+b4ygoLVeA|Tk1
zB3&SWKte|dNCzb}A%LMIbk5fIoOhge-1Fg%JI)>BjyukWJ+iX)oNLWBv)6C`_uL7;
zVs6NLSp4vwJ$rbOMhL4td-f86L*ft@(4&yp9|e5u#abC&+>_TK`4jkZz*XN&f6t!6
zs3TjBe*xdQ9~j-l?%8v+{`aw$=JnQj&mQmrBtrk%BfI%D`&<_r+}NTJA0d{+&*!J7
zA9)4TW*}$w2zk1Z*X3BvYpZ)SD*?COZ|&Dk?@Ml&EqDt6ZMY{R~{tyScaW$
z6i~`KbS!w7+gxIAvifs(6^8kmL`p`()!N4Mx~tP|>uKvFK7BCX`GCIB=?)kk&SOd$6nHjwP5cEu^akoiq;h5f^%f8
z9^c$*x8IGczz~{(pu92r+V;`)EOeHuFTEo?&XJ8JT)#YnBX2EbUPkEA`H{AdRyK6$
zdK&k2Yu+9gFf(wjbM+d&;2_%68`>lVxc^mh{qpa&C(ur~S<-T-VvxUrm?OKS8ioH^
zF<=PiFl`q%nnV)rvNC9t(zEyjO@`F5YlmKT?ZO@H4Lm&c}t
z2eHN**VW%xknQ5z?z(oSrzLc~i8R=GF%-ALw^h!b?nt{GMDbsp-70E||Lrjx-l3kY
z?;xI_Y;PABH=2r|I}AS<$+xeeC=T#x2qzOvgL5EqVP-4BDnyfnb4rwvP~H4Hm8EN?
zl@6&5Z!}JJLCaZ9XF7N=4yFa~kBdo|p%CB9R;-OfY6<3P7fJxTXqnDbm|Eq7A73(G
zgR0F!P!q`*B}+RPmC^ckI<-aON&mTHXKZSrt^U+QmoJi64odF#i8N%3t-cL(Zt*RR
zU=J5hXd2pEQd!*{hB=i$Jc5v51HVjK}(*8|`3suDa5Sd{vlU?gQ_e+@5lfc-lol
z2aqLgx;)}eLZs8B-fjg&PUE_Z(G3rg)fK7fgkEf87acl&gHZ(|OS(Hf%}l?67*>s`
za{p?-5Jy>)-c-m5oDyG3n-jowd68g@D0NE^uAzLw-;(C@Ds{0zmm1~)Po@%j6fVT^
zLM*YF5zR>_?FYElq~lUcVX$9nP4mVr4SJR3?%*HMzkVj{2u$m5J2n$Kl|1+97UgRm
z4eC~KAotMJQ?G)R*#@_R*g0>|(br(+d49-LVCyn1DN
zx#YlTcPQgf{VSU{h!OarB?X?FYE4LrdW4j}fcS0ukxu4*&
z#7i2YGtLS6T}d`g>UZPbDp1Er$_`F#f2#4xsjUdDTbas;(y4|L(|22yXsJRfC>h#|
z3A3AG_|h{(@T>XvnwIICPJxp_Z(C{gg%6bWk-~J{sQi6u)KunCV4=AwGn27!X%QEp
zcVY+PeD5sD?EO0Z+4+`2sHvn)``aZc@1)3nd-S8QlAFGgwSIXQY-B3(RH=;EUCbC}
z=Lv|G5O*5VgiOBBaK66bU4(n$srC~Fek;f-`KiL^O?||N6(Dvyq$_k@bF@EnzTR2U
z5OQvP9blgn-p`~>IDdi&Bozcss|meWQfw7cjU*uNu%(*QNMrV~gTNtk*u@R{0L>1`{_;_9E-l6V;^LEE=)4?c*jP@bI$+Zq2n#ZCpS
zu2(r%c0n(HF9`3Lh-en(PM*-e=}~Vndn$@s?6RQmn-e{g4WADy=y?9DF}()sVyTgo
z72^aRXctG7DtXF~_G8k)wK69Q1~C(#tM2gmCtc<%pK>{SNj*FVvM(iZ20U9O7oCN*
zGL>JLV073`fM22izAGdo71yn@fe|zfd7*ITo%adFPiQESWUA8zQLu(iW=X)LPyS@Z
zHz5t^{HMM`5!P8~!m)eeav!>&h{FKgdV%&|$)Qbdzo7|$@H_xofTy3t0muTRya_J^%hd-;Fgy-rIy$O$d3yz@Q#AM~~24-G~*!%gVroDq<x=GfO#zEj|ff~X$D
zrM5*iqxeFpNrL+N0wtT*rC2t2ImuZfTo@bf`8I0ah`PRJ+{l_zEp>&*)DEPpV{9c+
z9wMInm!mn2zQ-;L6lUT{X%|RZ9bvd7wNjh+Wl-Mp@8pov2=mNV-=*Pa3BE(%%Yx45
zvdDqI*WfI5hOcGaT#n=)i}efa$q$Gz7wp_5V%zTO;U{4k-707yBpGMb=}U^xok{5Q
z+tdVQ=R^Zh9}nk?BP_-^ERR6KQ?;hoNcZA|d_8>Q)nx_$9?me-8JezKRf$P`YvL#h
zV=coZ1fKWmor_p9+F)vd;M>%~ZV={2YNjqpDdE)l(%&(9$08DqDlW+APrWdlhaAOC
z%##LluGxjl#6I$T7JZ%Fth*dkU*)z*Jbp-_k%E;Pa#lNioXh6JRkyRiFK!hxVHD3w
zn-K4uJ{48zDtHzNm2SUsS!WC#HQu4+RhGv)*%gW_;$2-|kgk93e~c=GL-;95T$EIX
z{|xJlFOu@?2hk9I9u_)29tOAt2{rEhC<&gY^I7ShpXyGXe+ERDBv-ej)?)M|{*o#f
z7W<%;BiVIMP}9e+@|!xw*BrLl?7QTsy}l;*BQ6#DiLaNGFrRgHgSJPp6&E%Gm#|UX
z$VjJriO)>`oE&rNd=8X1TV1hjpW55rQ$B*g?kbTjH2$j6GooN-@GiWO@o8(u_+2IP
zzheu2yG6fZjC;g>Y488~R$fou=^`kvKt=n1)@^{00|(zM0Mj%P6EXmI_7xFaEB;-y
z9RQg0Hl|ZAoeo-Rk{ZcPN2=mR8Df
zKO#NFMb6oZ23FIaVw%b;{FbLOUz1QK`^-@LZL9r#bh!b&r@q^kHq7s|IoFr3Y&`3t
znp$WU;*MJwu4pW4#{!ab;N8_X!eiEq{#ZSF{~j=jads6#@XqXOd3fCVK?6f^?a{2v
z@UUhnkY9yR?nlGfy1rqPBW@o(L}c?CI0ZDN51M)Cg55_3JdJ8ezPFi{L
z8ppLS?2|@Yc36}WeR(j|S?Tkwy?P4?WWQm{P$&3aR6GSGL24{K0mwpG)9;CIWfc^o
zd7Lt*!ljxOPKw7;vrQ6&$L`0qdOJ09XINWb1jRVFpd7VgWhRIHVsl>&gQkz+9(2^b
zpP+Tdst!y#H%$|+_ZX}sod3HKV@r1+3ltFyYGJ*juKaxCVl;7#+~t*L{V$j0gRwge
zN$SAXa+zFzx$W(lW&cVWe{jQ5f^7iHG=q&g`*s0KJ#gjrtXdsX6v+&pkZH^k{Tz
zzrscfcalFlEe?@ms_G%ZC4@X3dvJR>Z7GlmmY@{z1yOhSOoGH_1y)xK#)*mXNFu78
zl_4qhf|1Nc6P~sgENn(dF0ba^!Iqvl+U3=tRCmXHnG!Bv|8Dp
z-P~guw$pOJE*+0PD=EJ%Gia5-FC?Pxy^#cj=)i
zO$UA$DZ8u3nkTUHvO!?qN5rddQB3t@>QjVOYn-Y#`ZOJDYK#3r#ulJ+LarO%d
z>cx{=Ss)75t`I=E#aJ!dk6hf&ig;+C`5yh?EHi~+(D11$VtN>DF!Z>yxVVq0!rj|9MAila!^4rPE+uxZ&09X
zO8IgmLhlK;n~Of!Dz@RmTUYo>{Z1>zNL=#B>PmtTZ-JS4cM15WLd~u5=irtgMc0%-
zrlWRGLb%Gf;hl;*XR4(wN}}`KYb#P-O@gz~riZxa6@6NQj_ChlntL?E#-o;!H^o@jlNcXo`8SILXGCM1HMxF=R4N~*>10;*Q(47X9e
zrubC=fFp!I@Irh8gd-S$;VJ;fa9{1MhHj|cR-H+Nl>j%;ixA}!j0piwaRVAs=U=e*
zFP{JPY3PH+N_g%=1PA|p*`#;2_o4A%7gXHg%B0qn>>joeT{JYO>v2v-58L?vgXV9=
z*gm@PUB35o;c8LMQaEr+uq!c_vQMU}K#5#X;>b4(-CrM;UvHOk@Hw~OrN5PGr`_^K
zzpH=5^M>sBh4Nbd^(hKzZr}&~bcc=hjSNaO4a;
zu+yTgW?5}eX7*k^a0A5EY|VVyp#?4Iv~CwEc@8N1bh%pFVd7~bL%c9`Q&|#`UCbSl
znR{(i7o-Kh`C`J~ryNRjyeH0kPn>Zo)(`9?co%FJ6w3oSPKldYH-}N57$ticjGu6%
zs&~QV-pS7*sZ8Zq?+W||YV)n(3UNF@QShP1raXK{Z5aidEo7y!13NwY4b{8(JuOp3
z=KEa-%s(dNKzPiu_L`w2W0*#nA4ZQyw;^XMBH1uVY&7y^%mgwoFv_3e8rO7KsAZG(
z>Q?MSNiE!0jnLX4@5*`-5%+=?%9_BEwx7OxnyFlxO`^EX_x$*0rn1Vf=J&=l$)7U3
z3I3WoF+hRFRGlVhZLg(YAHpXU*WYMyZYzzK7laQvN>;R6l86*g1uMxj@j{gireqjdrTH8q<-2SNQs=_a$x9coEa#8q%=S+&OUk_qDI~`%Zk92)M
z8MW&nwE0T6^dyAFI6W&sUM^ftLp3XOdwY0*XqRi4XEt8*POgZ}FT2T+Z6sYivOcD5yHSGH$O(b&B8HL)Z;IC=insw
zZ=yi)(NPLSNk;GaAziIfQ(<>D?0B!!;a+hH%LcZ`_*KHdUyIvlaH{Ar)+~?{{HQjlul3tbA_z
zIBMP?3g^i#@!B1`^fB@o@G!4jwbspy!!LdG`x2=yHJ*1*#Nma&G}8@r;f4!h*w?Yd
zG6hxnseABP@@3bc#i*L|o9l+@+A7fox!R>^@PLKQqTc{sO1JY{>QH9Q=f
z`)}moIfF0kwUj^xxQ%8nWz^$R+%x$3(Aip~f_mmUVGPr+%|LL9b@Ue@Y=i#BqVxV}RYk6$HmT(bwUX$a!n2v7J-d8Z
z79`|T^+wU=Tv$m4es+9KmJ~*ow5U4x!X)uZT=f*Q>8wPY+A1
zQh$c=suy7xPCrIR_ah0cwNNtHQcx4AY3COAc-{0%Qv`ufsy
zCgQ1Sz((Xmn_DnjY!-`G@_f7<|3RihnazjM23I~pvw@-)G+@$xEyG2ki3_~h1ywg7
zY3*LG$psQ!Kj11}`20}Ju~sb~L64w`t2@@m!qYvm-r2Y$TeJ|ah}?g&IT<(MY~iOa
zJFD1QT>MT-qM*~mpSkO`J93ac@~rBI;qDSMxMp`bLb&WlmC63tp?l&f`gBqAb*I#t
zs#6~Z60RdtT{4wq>+Z_e@hnN{`b^5Fs8+-dpNg1h*Tr^sJdJ5=;cB^l2}n`RWZ|HFY8vzae?YbGo{@);iX&2-76Fgd13t
z)jGqwUY{3(2P*6X0@)K96fKQb&C45E$}O+W&FywX&d}?~+Qf2q@%bLR$W456FRUOJ
zxZt1y%~hor4;g(eZuMQm95jdPJ-GySZ$TDM%1a5JpX{N-xnW~c
z1<@O7k+P8P4aDCw3VbqDr?081%0^1-2vlyaGwJBQ8u>^W{*~NH?tb>2H
z?B+gy3R!!zc1S?I2cPA2MXT2fvnfiw)n^5Lb1nZ*)?}9fZyu5a&wF3t&_$ZnXwEZ#SLs&gQP)I@EEt)U*R9cr~1Dlz~Cm=yLw6~2ka
z9gM^Sk&Ts?p(8O~Ou&SlOl=THEj|99;QuG{{}(SC-}Ah^Igq#WhP}OUw|3v>jrOoJ
zW@nKabbf)Dd7!2%5OWtOPYa-dO7Q=A(>xb#TlEN2T2`j<=dHT4dZqVdOVwG;05)?=
z%QIWK;rK|EAJ$SeB@KqXD>nDRCjZ=!5nPg!a)MgS2s(dqqpjkGY4mqMjcuM27J0eY
zmv3($Z=@_|2|+#wK*2dcj86KcAklbGK^D_v%Q4`=_otXwo?3R$Q~?^51u@%>iWc8nq2pFWq`K6C#E
zB86-3iFbMjG|wr9YzzOl=atd#F
zK%9n{xgKNNGeFI&Rd+%i6&0{S&|RdKlkd@G${{z$zx;~xRuGyLJ6CJDP%A1kIr6k^
z$~UtC8Y`H|94v8PZaA(IUT&U|W%_jH_8?6M3Aa9ykyX3e9{!*&$!(avqlk5(r{1GG
zct3z7s@ST0zqnKlJ$UXe<#BdK?vrpsGQ?=&XhTTz#mpz=((wtc*p@_F!{DF+-8@#F
zn)5LIk~6JPF3Ych0sa&o$5*6d!6
z@B;CPUYsY*6*Sk&5V#F*EAWq{A?n{Y1}PTyBsi+vjdCh=6wO;8bzo@KIfaggg8XPc
zqL_r+#-ReCCUWAc-2I{92f?~1xTKS?hMqvH@F&Lyaer$l3X00oMA|3y> fo, newHead
+
+ bytes = 4 * int(paraDim)
+ format = "%df" % int(paraDim)
+ context = fi.read(bytes)
+ line = 0
+
+ while context:
+ numbers = struct.unpack(format, context)
+ lst = []
+ for i in numbers:
+ lst.append('%8.7f' % i)
+ print >> fo, ','.join(lst)
+ context = fi.read(bytes)
+ line += 1
+ fi.close()
+ fo.close()
+ print "binary2text finish, total", line, "lines"
+
+def get_para_count(input):
+ """
+ Compute the total number of embedding parameters in input text file.
+ input: the name of input text file
+ """
+ numRows = 1
+ paraDim = 0
+ with open(input) as f:
+ line = f.readline()
+ paraDim = len(line.split(","))
+ for line in f:
+ numRows += 1
+ return numRows * paraDim
+
+def text2binary(input, output, paddle_head=True):
+ """
+ Convert a text parameter file of embedding model to be a binary file.
+ input: the name of input text parameter file, for example:
+ -0.7845433,1.1937413,-0.1704215,...
+ 0.0000909,0.0009465,-0.0008813,...
+ ...
+ the format is:
+ 1) it doesn't have filehead
+ 2) each line stores the same dimension of parameters,
+ the separator is commas ','
+ output: the name of output binary parameter file, the format is:
+ 1) the first 16 bytes is filehead:
+ version(4 bytes), floatSize(4 bytes), paraCount(8 bytes)
+ 2) the next (paraCount * 4) bytes is parameters, each has 4 bytes
+ """
+ fi = open(input, "r")
+ fo = open(output, "wb")
+
+ newHead = struct.pack("iil", 0, 4, get_para_count(input))
+ fo.write(newHead)
+
+ count = 0
+ for line in fi:
+ line = line.strip().split(",")
+ for i in range(0, len(line)):
+ binary_data = struct.pack("f", float(line[i]))
+ fo.write(binary_data)
+ count += 1
+ fi.close()
+ fo.close()
+ print "text2binary finish, total", count, "lines"
+
+def main():
+ """
+ Main entry for running paraconvert.py
+ """
+ usage = "usage: \n" \
+ "python %prog --b2t -i INPUT -o OUTPUT -d DIM \n" \
+ "python %prog --t2b -i INPUT -o OUTPUT"
+ parser = OptionParser(usage)
+ parser.add_option("--b2t", action="store_true",
+ help="convert parameter file of embedding model from binary to text")
+ parser.add_option("--t2b", action="store_true",
+ help="convert parameter file of embedding model from text to binary")
+ parser.add_option("-i", action="store", dest="input",
+ help="input parameter file name")
+ parser.add_option("-o", action="store", dest="output",
+ help="output parameter file name")
+ parser.add_option("-d", action="store", dest="dim",
+ help="dimension of parameter")
+ (options, args) = parser.parse_args()
+ if options.b2t:
+ binary2text(options.input, options.output, options.dim)
+ if options.t2b:
+ text2binary(options.input, options.output)
+
+if __name__ == '__main__':
+ main()
diff --git a/word2vec/train.sh b/word2vec/train.sh
new file mode 100644
index 0000000..3ddc482
--- /dev/null
+++ b/word2vec/train.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+paddle train \
+ --config Ngram.py \
+ --use_gpu=1 \
+ --dot_period=100 \
+ --log_period=3000 \
+ --test_period=0 \
+ --save_dir=model \
+ --num_passes=30
--
GitLab