From b33217d8f04ddf8d7b22de0af239a84dbf3a5e24 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Thu, 27 May 2021 19:32:24 +0800 Subject: [PATCH] refactor(dnn/cuda): refactor cutlass kernel generator for deconv operation GitOrigin-RevId: 88e962a912dc0f31b8b31787a9ab7aa1b38d1264 --- ...> implicit_gemm_deconv_cutlass_wrapper.cuinl} | 2 +- ...d_identity_s8_16x128x16_16x128x16_1_nc4hw4.cu | Bin 0 -> 1716 bytes ...ad_identity_s8_16x128x16_16x64x16_2_nc4hw4.cu | Bin 0 -> 1714 bytes ...dgrad_identity_s8_16x64x8_16x64x8_2_nc4hw4.cu | Bin 0 -> 1708 bytes ...ad_identity_s8_32x128x32_32x64x32_2_nc4hw4.cu | Bin 0 -> 1715 bytes ...ad_identity_s8_64x128x32_64x32x32_2_nc4hw4.cu | Bin 0 -> 1715 bytes ...gemm_dp4a_ncdiv4hw4_16x128x16_16x128x16_id.cu | Bin 1829 -> 0 bytes ..._gemm_dp4a_ncdiv4hw4_16x128x16_16x64x16_id.cu | Bin 1828 -> 0 bytes ...cit_gemm_dp4a_ncdiv4hw4_16x64x8_16x64x8_id.cu | Bin 1825 -> 0 bytes ..._gemm_dp4a_ncdiv4hw4_32x128x32_32x64x32_id.cu | Bin 1829 -> 0 bytes ..._gemm_dp4a_ncdiv4hw4_64x128x32_64x32x32_id.cu | Bin 1829 -> 0 bytes 11 files changed, 1 insertion(+), 1 deletion(-) rename dnn/src/cuda/convolution/backward_data/{int8/deconv_int8_implicit_gemm_cutlass_wrapper.cuinl => implicit_gemm_deconv_cutlass_wrapper.cuinl} (97%) create mode 100644 dnn/src/cuda/convolution/backward_data/int8/kimpl/cutlass_simt_s8_idgrad_identity_s8_16x128x16_16x128x16_1_nc4hw4.cu create mode 100644 dnn/src/cuda/convolution/backward_data/int8/kimpl/cutlass_simt_s8_idgrad_identity_s8_16x128x16_16x64x16_2_nc4hw4.cu create mode 100644 dnn/src/cuda/convolution/backward_data/int8/kimpl/cutlass_simt_s8_idgrad_identity_s8_16x64x8_16x64x8_2_nc4hw4.cu create mode 100644 dnn/src/cuda/convolution/backward_data/int8/kimpl/cutlass_simt_s8_idgrad_identity_s8_32x128x32_32x64x32_2_nc4hw4.cu create mode 100644 dnn/src/cuda/convolution/backward_data/int8/kimpl/cutlass_simt_s8_idgrad_identity_s8_64x128x32_64x32x32_2_nc4hw4.cu delete mode 100644 dnn/src/cuda/convolution/backward_data/int8/kimpl/deconv_int8_implicit_gemm_dp4a_ncdiv4hw4_16x128x16_16x128x16_id.cu delete mode 100644 dnn/src/cuda/convolution/backward_data/int8/kimpl/deconv_int8_implicit_gemm_dp4a_ncdiv4hw4_16x128x16_16x64x16_id.cu delete mode 100644 dnn/src/cuda/convolution/backward_data/int8/kimpl/deconv_int8_implicit_gemm_dp4a_ncdiv4hw4_16x64x8_16x64x8_id.cu delete mode 100644 dnn/src/cuda/convolution/backward_data/int8/kimpl/deconv_int8_implicit_gemm_dp4a_ncdiv4hw4_32x128x32_32x64x32_id.cu delete mode 100644 dnn/src/cuda/convolution/backward_data/int8/kimpl/deconv_int8_implicit_gemm_dp4a_ncdiv4hw4_64x128x32_64x32x32_id.cu diff --git a/dnn/src/cuda/convolution/backward_data/int8/deconv_int8_implicit_gemm_cutlass_wrapper.cuinl b/dnn/src/cuda/convolution/backward_data/implicit_gemm_deconv_cutlass_wrapper.cuinl similarity index 97% rename from dnn/src/cuda/convolution/backward_data/int8/deconv_int8_implicit_gemm_cutlass_wrapper.cuinl rename to dnn/src/cuda/convolution/backward_data/implicit_gemm_deconv_cutlass_wrapper.cuinl index f5382f919..349b392ae 100644 --- a/dnn/src/cuda/convolution/backward_data/int8/deconv_int8_implicit_gemm_cutlass_wrapper.cuinl +++ b/dnn/src/cuda/convolution/backward_data/implicit_gemm_deconv_cutlass_wrapper.cuinl @@ -1,6 +1,6 @@ /** * \file - * dnn/src/cuda/conv_bias/int8/conv_bias_int8_implicit_gemm_cutlass_wrapper.cuinl + * dnn/src/cuda/convolution/backward_data/implicit_gemm_deconv_cutlass_wrapper.cuinl * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. diff --git a/dnn/src/cuda/convolution/backward_data/int8/kimpl/cutlass_simt_s8_idgrad_identity_s8_16x128x16_16x128x16_1_nc4hw4.cu b/dnn/src/cuda/convolution/backward_data/int8/kimpl/cutlass_simt_s8_idgrad_identity_s8_16x128x16_16x128x16_1_nc4hw4.cu new file mode 100644 index 0000000000000000000000000000000000000000..63d8b005d12b85d227605f38e1ce943ebd54a88e GIT binary patch literal 1716 zcmbtV+invv5PjdTFsX!~N{IqdRGCnzBqd1D7IiC?x0Pc%8!KMh@+FskJ>&Im(klw$ z#dhpDvvX$VOtK>}M{h1Krjx6y;`U;Cb6))JXT2U0rP7w8HL*&l66raj!As&?c4RCm zD}tuuF=B#%o%4bs6Wrppx8#hW&dFU+!EttCh$R*G+;$GYIBx~@CqxS3AQ*rMMP*fr}^7&E>Slw-Hu_1G) zN+QzK3FGvhSHu33hu8ubPhrg$#PCsn*hNq`ScXk9c0T^c6#R8jo8FyAZiLijfV%mG z<%D7U6&m`Sv7bV5V#m6g2}NQD1Fy=YdhRM(mkOalVpLy0NX_%#bE%21K~I^b9!fFH zq^3)Z$9wfmVsEF~-PU2&Ciz<2-%H+&U6o4Hq;}A>8sxD4n709}>f?@hTdvC>y)beM zl+Qkc)-umQo1hD{(t@FimrN<>anN&I>v7X6bbpZ)+~}lz9zBx6**gdTXga@_&?5tk zFC?!(zPY8xh!rqK>Yxw8d=nA|SA@;$)8WWi?}(ehemH94d<5Osh}}qdnNMTOTp9tO zpJTMvcIgbEsV=WMc+djB+TomX_%?2)=iB*9PO-lWX0y0!Y-M%DD#} ZRE0+{;o&EI-U&KPQ)Jp>gTXJ>|3QgP31>j;eVR#1ONq#zE00f@1O*pVq#mKekBCmMv3tt^Yw z7u+f?kxsn6*j165Flh?buYQGQAWCB&7aGvwIuBs=nG2Z>n`2 zq!&hRfbzu`P+I0WC=+CXmRc}W@scS8Ee>jqYdvfleI727es?lyo`+AAaPa{G0GiGp z6!gRZ<15K45N~ejDPjc-kviz3FyDoQu@zyn`gA-p);r>+upf^aJ0C&Y8nK@Uuk+d2 zGM6R*$mbNTv|TtuXsXLw4j#3@?`AZo9KMg+vZx34zhl@n_-*he_*;WUxlT;y6`nEE*SuDI>%NH*B`i|Eu-UQy?=BV%b% zQWy`1z$L{v=OqIZ-2An(bQy=%$z9OF34UUzrKRw~whq8JZzc05R7vVE8H7m9RNw+y z&aw^*oOW2Hmz4@$R=P8#Y=MdMoKsIbvNTFE>GMLAWzI#6%(rCnm8HfA`+)_iRV&LP z^@XrnD3IEDsu_Ny33jsdxjV}xFD&J_2<@fc#I0_B-S4gU>uIi8Z@%iapb%PE>IH}C zrm11GS=lT^MlhZ$~ujVTx|5 z;0d`E+Ew;yxc+{(9`w%J2Sa|WZzjWkgFOGr$J4_zO9YV3OCW05oRDiXjLkSfMG}#% zMuepEN!jl{If(KTGREg*PK_9J`)%m=+Hkfl#qNgx4{SrX4NYlx_X#6aRRq+`&#j=G zkgwRtUpW6MwV-xbl~bu{>|Eqjnxy4U%~~daUSd;kKPb!d-!oNFUxS|VO8t{^n5v2` z2pR5OGm1T(bi1w7tV1R%d4I1&J9bo(udX^3tdcQa=BL z9?QHyPogJaSxFA1D7e71k?$#sC@!*jS&fj4INYnYfeI5l6a-l?t zww+jZ3_Qp2Q73&6=9^SFup(?$za5T@dPm$8*TYdG=OgI8M(oDH%Y3%6+$H;?caC9I z*@ZKdC0$;-;6V%gYKC&E;oG<@^LkYOJ%??BUk7i3zcz%_H@W7T#sG;rsyy-NgEIGk VlO12)$EJAgZ>2KXkE< literal 0 HcmV?d00001 diff --git a/dnn/src/cuda/convolution/backward_data/int8/kimpl/cutlass_simt_s8_idgrad_identity_s8_32x128x32_32x64x32_2_nc4hw4.cu b/dnn/src/cuda/convolution/backward_data/int8/kimpl/cutlass_simt_s8_idgrad_identity_s8_32x128x32_32x64x32_2_nc4hw4.cu new file mode 100644 index 0000000000000000000000000000000000000000..0cc081ec86f7ad01bed385d0fd59cac309eed32e GIT binary patch literal 1715 zcmb7F+in^$5PjdTaEVm45h>y#6uC4~3MrM^BvpvizO9UH77MR!`NAb%-|>1Ea;@mg z+Sqf(b7tlY*@;-7ch^_b$<0lEe>J_k%>Q(=P6vrXY0J@uSS3_}^a9b~C2=l0F_si1 zLDTUVF+sr2c|nm0Zu!<-a>h{W>~~PXadu{iB_;RVwvNC!Zw2*dL<-^{7=Q>xWndh& zoTVKa80k>0)>;NHwCc1>#!i$zcjviag(VCYuDtNOc+~H0yS?*mzZZAILEQ9mMF-2xpoI#q zxFw!5G~YF8Y!0=}LL`MIRbVv*^aW)I`JLek($ruuj3&uYyU#9S2Vgvf?`%m7AN6}}1Z8WZey>i(-p6aE!@)6@UGUjPO?LMV zxe-zq0m|l=mJ^2YS7_%;#(oOLi5=^5E)T@<={~d{Ay-%%HjLCtn+GO|2u}d!*7E(!QUE;D^IR* i(6nMfMl%J~~kV literal 0 HcmV?d00001 diff --git a/dnn/src/cuda/convolution/backward_data/int8/kimpl/cutlass_simt_s8_idgrad_identity_s8_64x128x32_64x32x32_2_nc4hw4.cu b/dnn/src/cuda/convolution/backward_data/int8/kimpl/cutlass_simt_s8_idgrad_identity_s8_64x128x32_64x32x32_2_nc4hw4.cu new file mode 100644 index 0000000000000000000000000000000000000000..5eaa5d469f5c63686b53d679be09d077adcb0370 GIT binary patch literal 1715 zcmb7F+in^$5PjdTaEVm45h>y#6uC4~3MrM^BvpvizO9UH77MR!`NAb%-|>1Ea;@mg z+Sqf(b7tlY*@;-7ch^_b$<0lEe>J_k%>Q(=P6vrXY0J@uSS3_}^a9b~C2=l0F_si1 zLDTUVF+sr2c|nm0Zu!<-a>h{W>~~PXadu{iB_;RVwvNC!Zw2*dL<-^{7=Q>xWndh& zoTVKa80k>0)>;NHwCc1>#!i$zcjviag(VCYuDtNOcr+YryS?*mzZZA?UflF@MF-2xpoI#q zxFw!5G~YF8Y!0=}LL`MIRbVv*^aW)I`JLek($ruuj3&uYyU#9S2Vgvf?`%m79}Nd>1Z8WZey>i(-p6aE{oXN_UGUjPO?LMV zxe-zq0m|l=mJ^2YS7_%;#(oOLi5=^5E)T@<={~d{Ay-%%HjLCtn+GO|2u}d!*7E(!QUE;D^IR* i(6nMfMjP3_4o? literal 0 HcmV?d00001 diff --git a/dnn/src/cuda/convolution/backward_data/int8/kimpl/deconv_int8_implicit_gemm_dp4a_ncdiv4hw4_16x128x16_16x128x16_id.cu b/dnn/src/cuda/convolution/backward_data/int8/kimpl/deconv_int8_implicit_gemm_dp4a_ncdiv4hw4_16x128x16_16x128x16_id.cu deleted file mode 100644 index d3d701ea716156562346f89547b09461fbb44aa0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1829 zcmbVN*-jfV6n)QExIwB~6o_amgiMMANt#w=sTd^sHgardV#VXJyd;FL@3m*i1c-p< z!EtP#yPb1y_QVW*xEfDJ*VpOYcyfE5{ypmS`zYgz8{#-a({=cfQqM?A3$;wAg4lGy zjY>tXr9IT^01%lf3`Z+slu#KeW{7$xiM5?QZAg|AG#L&N69nE_Cn(b1&Uf9F#u(}z z+^ZsG*?d)_5^Fv5TZ#V4s!)-Er4jWmw8}rQ7-P~}(E@VL$$((53hKB4g zp1>Nf21my|boAu}mg6SWdt&r|0e>kV#nVvk*YV)8hc@*YYaxrw^Bdg`msh74yO9XjW(i>li;g7DyiR`#!$;Eptlv#OK0(MB}$Bmp>vFHG~{e;B62`K zxl8h6sBT0(RYMOo{PxgO#(HSOLxpcFmnA59m|d z*dp!hQzI3~3);)AHCHfw(AG1I$CBq~Y1!tn&{T&u_i} diff --git a/dnn/src/cuda/convolution/backward_data/int8/kimpl/deconv_int8_implicit_gemm_dp4a_ncdiv4hw4_16x128x16_16x64x16_id.cu b/dnn/src/cuda/convolution/backward_data/int8/kimpl/deconv_int8_implicit_gemm_dp4a_ncdiv4hw4_16x128x16_16x64x16_id.cu deleted file mode 100644 index 934d3264512000e1a6ee531a5dab297aa59e94b7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1828 zcmb7FTTdb}6n^JdbTQfN8kFc7Vk&E*z~W}TY{bNUo0PT;4V_NY3y8mdPdgU|m8d+F z()K&I?>mP*F+*Q&M&se_ZSpW0-(MvEj#}L=O1a{OIL^>?9eyO#Gm_9;Et9DrHd%0^ z5|L?X5A`|#M5=Pb(TW%)REqK$qTWejZEH^(l4b;r2Lr?efp^vkinO=$U3Y~shS~>@ zUU|#efhLAz+;P+X0LD5as5>AMB86Z8A{3RLan!bkc0(%NkV-DS6S?Y630*+oEMdfv zu2Al@%XrK+>|>gO3YVlj%b*Z3tz>10*4!LYFO+PzT0Xc!*JPc0H!&3btO|g!jE5LM zaAk9IJ9xT!eCnT^pKW1&2?^=#hW&ZEpA25ZhSu%IymIw0H=M9bnbXB&PBd>qLv|RC zVU1V)qh1Fc9iPI|t3!PxMt=y{JK4hhR=|m;q1x}m0og&D@{F{QdFuI{Zbr+?Q;c2A z3FAv(`+~7+p*S&vJevwdg2RC%neMmH8=pw817{rs+aJdX?CZ@;=ET*R52GMT#WVv6 zI>DInrJx++t!IVP%1JGa=;Ji_Dvk=~H>WYw;tJSpMD*5Kd|YZ0V`AtW<2wyGTRRaM zpkLfgvK~}7qF#!jgKBy^=p|zvwBez|*OrSC6g*6WwlE%S+H05^!U`o89^;eAN<2SH zUYa_N9Gr9zsH8fp%~wg?c1w$E6nNk>s-VWV}gQd(R=rFN&@dIWF>x>ev&12oN3i*Rw(IXi=eo7W6%(`8e^L`~iU0rr diff --git a/dnn/src/cuda/convolution/backward_data/int8/kimpl/deconv_int8_implicit_gemm_dp4a_ncdiv4hw4_16x64x8_16x64x8_id.cu b/dnn/src/cuda/convolution/backward_data/int8/kimpl/deconv_int8_implicit_gemm_dp4a_ncdiv4hw4_16x64x8_16x64x8_id.cu deleted file mode 100644 index 28512b4914d68cb0b3c52d81fc3fbb82c91de502..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1825 zcmbVN+fExX5Pi>AxIwB~v=BiNLMBCmB&AikR0N5>tsL9gSn+x-UlPLCckI1n14Kac zuyJO6=623ZcEl8Yx*lB)Z*J22(dFHF`uCvI>!FM*ZiwRyO;+JYN{ne(B)u&m>_V^Izf^4cDC(Z>5QT7 z-h)@(a<-?5Avt&4bl-uo&Is!Eh=fcb8Gr~yrDq&j zaik}dJLxeVYYpdErl7*58P9WQL@X;^8lp8f`_v01yPb{?q0kjs72b^vMc?ZNU@W5% z<9n`bVQvPGKOY|ZM`x!Sm>)tydD~&XAMeJ4m$0FA+c7Uf-Omgs>_QfFKAsWH+t5%Q z#+UGnulfha2|7MX&`Fc%12OtPV80X);%R90+vo%xBxv26kruMZJipcLRC#5^*u{b{ zz5uGv8M_jS6Ei6CiBKd297vMueh0nsiQGB}mLNEOJ51r+9Hz1$t|@#N)lkW$ok$P~ z#*8ln6I%K8I}>_mGM$MJxN}< zI*c3~B?wef+Zy+EQP*BNq2&rl23j14)dTAZEegR<&NHSI#$m9-b7Mf)hH|DW7VG)# zZ;*EOrI8BM1*7Fwnk(2mXyY2jBgu2F!l3UWmcmx4Y6&`f7qa9%fTwZrKZ*~659qLs zKZ*rm1?W=?_ePx+f(1o39Rh9dqGe&`Rufti7aFk5SxXmx>U~~z>vQ;8q;bo)B5ZTi usL?y^-L1wr`m?pOdOx&W1#OHSY`;8p2=);izMgLTFVThW>~O`zwDS)*L2mm1 diff --git a/dnn/src/cuda/convolution/backward_data/int8/kimpl/deconv_int8_implicit_gemm_dp4a_ncdiv4hw4_32x128x32_32x64x32_id.cu b/dnn/src/cuda/convolution/backward_data/int8/kimpl/deconv_int8_implicit_gemm_dp4a_ncdiv4hw4_32x128x32_32x64x32_id.cu deleted file mode 100644 index 3fdf28326b729d102a2fddf01efdb7fdf3724e96..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1829 zcmb7FTTk0C6n^io@QO5TC{R%lLZ(B4w(F*GnFtbli(K1HtvZfnJ1ync@7TFC1uCEq zse9t@+`jLe?ui-taxue&DAoQ3SyH5 zH!2aCmiADuJwT)?Hyo{qQ9`9CpCMW}Nv!ScX+zSCpviEEm>_V^Izf>x?0na|(iubD zgU3P@ma_v*49U3TruzYmbw*HkKqO=e$pAzsDhtL@*BaXMrSyC$xh$N>Rc}h@0upBl zBaZZha%VlpL#_TC$`n+%B;{EKjR<8WD?_yA=8zUb$!@1p_)zGYtnUxUBq*uc^m4h z!*~ME@M>_}kI+&7EJDZqCe=q`^oM|_CtJAR3OFgK@AkX!BM2LzO?$>#$UH6hoo;8# zD=Wq>)P(UBuzktcwNRXxVV+HeB0k_yl1vXe=#5XH*Fmrd!ST4Cz_~fhWKLXD`6vjY zQcOD$uM>t%Eeu2rF<26bRVWkqwh~Y}S z63@?)SFS*6CqVmYgg_>>vGHG5byG+uv|Izrz>DLseqcqRWiA-Xc*>N**bldVZVlz`d;@klYYF3Tz0=EXef3|< zBy9Rtgk6prITlVA?oMMI{Mp)By&zhyyf$J78!$^8f`tT!@8`SzPjs$3dt5Ow>-+~j Cq;ItV diff --git a/dnn/src/cuda/convolution/backward_data/int8/kimpl/deconv_int8_implicit_gemm_dp4a_ncdiv4hw4_64x128x32_64x32x32_id.cu b/dnn/src/cuda/convolution/backward_data/int8/kimpl/deconv_int8_implicit_gemm_dp4a_ncdiv4hw4_64x128x32_64x32x32_id.cu deleted file mode 100644 index 20ba90c374eb433b189273f757dc8c466d972947..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1829 zcmb7FTTk0C6n^io@QO5TC{R%lLZ(B4w(F*GnFtbli(K1HtvZfnJ1ync@7TFC1uCEq zse9t@+`jLe?ui-taxue&DAoQ3SyH5 zH!2aCmiADuJwT)?Hyo{qQ9`9CpCMW}Nv!ScX+zSCpviEEm>_V^Izf>x?0na|(iubD zgU3P@ma_v*49U3TruzYmbw*HkKqO=e$pAzsDhtL@*BaXMrSyC$xh$N>Rc}h@0upBl zBaZZha%VlpL#_TC$`n+%B;{EKjR<8WD?_yA=8zUb$!@1p_)zGYtnUxUBq*uc^m4h z!*~ME@M>^+5}~91S%i-JO{$N?=nnyp`&+o*3OFgK@AkX!BM2LzO?$>#$UH6hoo;8# zD=Wq>)P(UBuzktcwNRXxVV+HeB0k_yl1vXe=#5XH*Fmrd!ST4Cz_~fhWKLXD`6vjY zQcOD$uM>t%Eeu2rF<26bRVWkqwh~Y}S z63@?)SFS)UCqVmYgg_>>vGHG5byG+uv|Izrz>DLseqcqRWiA-Xc*>N**bldVZVlz`d;@klYYF3Tz0=EXef3|< zBy9Rtgk6prITlVA?oMMI{Mp)By&zhyyf$J78!$^8f`tT!@8`SzPjs$3dt5Ow>-+~+ C=x@3J -- GitLab