From 948ff63adda422f2ff68be2d32074819493b0fa3 Mon Sep 17 00:00:00 2001 From: yanzhenxiang2020 Date: Wed, 1 Apr 2020 17:48:30 +0800 Subject: [PATCH] fix mindrecord ut long time --- mindspore/mindrecord/tools/mnist_to_mr.py | 18 +++++++++--------- .../testMnistData/t10k-images-idx3-ubyte.gz | Bin 61261 -> 407 bytes .../testMnistData/t10k-labels-idx1-ubyte.gz | Bin 92 -> 74 bytes .../testMnistData/train-images-idx3-ubyte.gz | Bin 340460 -> 474 bytes .../testMnistData/train-labels-idx1-ubyte.gz | Bin 140 -> 82 bytes .../python/mindrecord/test_mindrecord_base.py | 10 +++++----- .../ut/python/mindrecord/test_mnist_to_mr.py | 12 ++++++------ 7 files changed, 20 insertions(+), 20 deletions(-) diff --git a/mindspore/mindrecord/tools/mnist_to_mr.py b/mindspore/mindrecord/tools/mnist_to_mr.py index 462ab7fb5..046788535 100644 --- a/mindspore/mindrecord/tools/mnist_to_mr.py +++ b/mindspore/mindrecord/tools/mnist_to_mr.py @@ -77,20 +77,20 @@ class MnistToMR: self.mnist_schema_json = {"label": {"type": "int64"}, "data": {"type": "bytes"}} - def _extract_images(self, filename, num_images): + def _extract_images(self, filename): """Extract the images into a 4D tensor [image index, y, x, channels].""" with gzip.open(filename) as bytestream: bytestream.read(16) - buf = bytestream.read(self.image_size * self.image_size * num_images * self.num_channels) + buf = bytestream.read() data = np.frombuffer(buf, dtype=np.uint8) - data = data.reshape(num_images, self.image_size, self.image_size, self.num_channels) + data = data.reshape(-1, self.image_size, self.image_size, self.num_channels) return data - def _extract_labels(self, filename, num_images): + def _extract_labels(self, filename): """Extract the labels into a vector of int64 label IDs.""" with gzip.open(filename) as bytestream: bytestream.read(8) - buf = bytestream.read(1 * num_images) + buf = bytestream.read() labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64) return labels @@ -101,8 +101,8 @@ class MnistToMR: Yields: data (dict of list): mnist data list which contains dict. """ - train_data = self._extract_images(self.train_data_filename_, 60000) - train_labels = self._extract_labels(self.train_labels_filename_, 60000) + train_data = self._extract_images(self.train_data_filename_) + train_labels = self._extract_labels(self.train_labels_filename_) for data, label in zip(train_data, train_labels): _, img = cv2.imencode(".jpeg", data) yield {"label": int(label), "data": img.tobytes()} @@ -114,8 +114,8 @@ class MnistToMR: Yields: data (dict of list): mnist data list which contains dict. """ - test_data = self._extract_images(self.test_data_filename_, 10000) - test_labels = self._extract_labels(self.test_labels_filename_, 10000) + test_data = self._extract_images(self.test_data_filename_) + test_labels = self._extract_labels(self.test_labels_filename_) for data, label in zip(test_data, test_labels): _, img = cv2.imencode(".jpeg", data) yield {"label": int(label), "data": img.tobytes()} diff --git a/tests/ut/data/mindrecord/testMnistData/t10k-images-idx3-ubyte.gz b/tests/ut/data/mindrecord/testMnistData/t10k-images-idx3-ubyte.gz index 9fdddeebe95e6e0a9790fdaf19532d46e83647c8..d7a2ea5de2afb0a2e2af780ea309418c83b786cf 100644 GIT binary patch literal 407 zcmV;I0cidoiwFo#VuW4*19UMkYb|MQVP|D?Eoo$UGc9#ud30p}?U=Do0x=ZEU&7+7 z#L3|{He*;B82uOAU~utoaAYHKB4J@@jDybp8P4wPXiPL=t`s8-^nE?PqsMXdm)tF1 z-}~P6+FpAiCk+v4F0KoA*_ep6nfRsi<^2mqzisdK$4{O{xikSS2eN;^1 zC=tiLi}>dei6)ArGN+e-eqpD4?E!rT_Ag+6ea6?K5o7&yn)ZOf`Sn0(FzKlsg*+4c zC^Tn(cuvh&edbb6*MYVv#b|9@+pXaFO zX?rh+|AQrVsvcPq6@eOm;Xy$xTrLD{P)J(4PZ{BS8)2cJlxyep`31tV?EJ7E000xe B!vFvP literal 61261 zcmeHQ{c{xM9mk0ps8cs0V+h8*Vk?pIazUk_EIBn+?N|vvSj9oI28@7#Icy-=nB<=N z(pfMG@eOQY;Ec4yRv^&~34+Ojmt4RM9s?#(0$lEnklcmjZtrrrx83LIPG>s(2ihsq z`TTN|TsAv%pLw3o+dbbqQ_a+=Q>>+hQ_{9B$}L&2adXl7;?e~hUy&YMux)L{*5WDG zCzkh=jkX3io;?~{DBd>r=*dUt=AY`k^uS%~^clB4zjD^JaObNXPds_=)K6!AAIn{N z^DT=rIv;&*{{7;&7h`)`zFIx%s@|{XRgPW`ys*5aG+N_JuUzCz*bhWY@=CK)W|H7E zpP~&}B4&Na_^n`w)sT9oj(?&*6W!&2izvQw%=XNK6-VOMh=kSwW)%aHW9a$4AP7GyCk+ozruQY zEpa8lD`eQR@t|@K3!EUOv2Yt{Z;$J3Yy%yz$%&i-GQywfjB!n`A z+Fj}VTk;pU7-(Q^V5fizK#Y(Hh$dVCJQ5ru{4)Xq;u69m(lOF8(lOF8(lOHU4VRAf zDcpIBa)j1VB5q)c0vv?YqU_oFvfC@m5 z&;|%2TmdW*Y~sH>a%DNk0&KC@5^gH%5!rZ5)Lh1~4N8q4F8C(JY}P=An)TwsrP?X& zveu&~=QZ}=4}b)V0owq~08WG&KpBEHh;#`T*HV z&yvmk%A`db=wY|>9p5-CPYACFm)@0s#a~>{^VwV$*XC`glPCWyr+4Q&9fze2zJ}^+ zo>S_3>Xv!@MQMY*LfV3V0b*D%*e{?9uqDI-S_!KFrv#%2pNyD*l!TCojEsr^4G9_& zG$d$9(2$@ZK|}ICYe<%#b1-FLN2K1>*i`ejP6h&sjds;oCY626`8ioe{~Gaoa|)yG zN9?vW+Qa(`$N+noUPydwrX8{IfO~{Zc9q={c6B?HloXFm=VWQsU3{yw%J-(6$M@9; zRgVkV-l7+!AG}3%>3mXJWe$g?sav+o#h#)Xc`+}I_=?(3J90W5v-#bBlo#-Q#sEpZ z9KDybXW(YQ3`+ys39JEfgrGn-VGm%b;2dF)5h{?K5D^ip(H@{AL9vAX44osYRW!(G z-EWLH7dl7>zv4vvg|T@w-xxd5NQ&7=`Rn08!PQTRZ*59cCcjcxa&;fAjO}%c%+7k} zZ6+zRSjYS|YFL~kY9L^IqZ)UvH+F<>SeVRQ5oBY^g!c?7R8Kl8gzLsfZS}7RH`mLL zRDGd_TJa~sX{WV^a0exLB1sO(%{F=7l*G12LWr5oSjQ&cCr@8&E% zPv{xxvku!muJF0;G-C{F#9MTNbf}?ppJ%xLyBxm5a?&xKD-pCSQap>3#c%SX(NSr) zG4drX?aa^Q2J}dsY?;Yfl(IZEq~FBvHYUP6F{i^NE!C#;e{T}ITZ9Qij%V}7r60!a z#>+V+#S=#(JLDi2YVLH*;*=%4&FS!b99mheKeB(Qp+23zT|L+3C}UMV4`*L%LcTRMd^bmO0X3k;ss@38K)}_IMaUu? z2+SUQ_5ZW4W=dILPPBFN>d5ck*>G=e*4(q(<2Sv3#}kL%D$aQ9q0jHj{dvpu_uu{H z_KoviKL3;LyE1Ox^`|SP?do?AJC;3ZU6-Xlc_96#`%T$D?2R4j>n1z?bH_Y^O?sr1fxWy_wfqXz+ z>C9hbRHuc)4E1Eb`#1bfXMPsf)tSassw<7h>YR$Dv$$em#+{sSy(|9?PMFwt)w2ON z0!COz*dm}2FeFq20tpiVe*_N+dyEKybc6(nl#K!bB@6l^bZn>(QL&;u{*N^5#81O= zV;)({t_In%U~#gaDQT>r`pfGi?)X^KEt^R4$HeGR8l7y@BK31^JPjzRMmvd{N5zz# zwy<&L_4md|ywUOryVgW~!}ZC%Oh1rp^Ec&@TeJ@?`Vu{=#2VEm=CS9ETH+TxY=kZ% zDUmkyY_0Z@{xBD#>MZal$-!A%i6?K zE@i}R+fTbKUvJafQx5xRv~4u`I*W6h*5iDb&Ayvb3x0wUXL&-f68fjts{SFXE+{%3^04=WBCHgGE~3OirkE&oVwFPhGuE z2<6&vG0?!;z)k@bfEXbY5KXuOcqBMR_-6zJ#3h7Bq+_IGq+_IGq+_Jx8#o=mIKXbp NzBH{YqoI7tKLE^TE}Z}X diff --git a/tests/ut/data/mindrecord/testMnistData/t10k-labels-idx1-ubyte.gz b/tests/ut/data/mindrecord/testMnistData/t10k-labels-idx1-ubyte.gz index c8a685166004730bc0367208b092155ee9423b76..6925ee8ce4a67a8c8e7b6ad9575968b22273b801 100644 GIT binary patch literal 74 zcmV-Q0JZ-giwFo#VuW4*19UMkYb|VHVr6V|Eoo$UF)ejsd30p}EylG003ZOsu;8%s g|E=gyp$E($lJ@0|a-?2sFZ!>s0J+@_B{cv50R43xyZ`_I literal 92 zcmV-i0HgmOiwFn+Z*yJ%19UMkYb|VHVr6V|Eoo$UF)ejsd30p}?ZLqjfFJ+^up%n> ya|`$1&^>xdl0A~|4Nz|Y^$wuk1nQkYy$h&!1N9bAZv*unpxzHgt&brXCjbEcw<5Cu diff --git a/tests/ut/data/mindrecord/testMnistData/train-images-idx3-ubyte.gz b/tests/ut/data/mindrecord/testMnistData/train-images-idx3-ubyte.gz index 4f27a302031116543f7a3dd95617f39eafe97c0e..80c13bf9a37aaebcefd7a2af05ae2d7d17761142 100644 GIT binary patch literal 474 zcmV<00VVz)iwFovVuW4*19Wm>X>KiPZDD6+b1i9Pcrz_^VtI6B0PUI~PsBhFg(tzF zfuIm|I6|-pB!26y1P zv)%5yZnqU#T@jIupJS^$d`iUoLc-E!_4|&ZpPE>Li~ zHPmy+MXbS*HFMvgALxewL-&JqKA1upzT<~1XW7}DEX*MJ^3pbFZ6>E`k}wP>XN{%NObv54Kc=2 ziwF~Op)j|nG%vI~Q(VUZ&AQ`p473UA3<*L%&<|uhNXfE0@y9I^U6PWDrz^m?SS?R7 zhknqZADRQk3_Rb0{m6BDUo=vzCrV8_!KlA34H`_k=C5KkNhi<`&FWRq598u_H`by^ zu}U#!gr6(w-0KqhR1sk8{{zroY$>x@K@J1@!36y@hKC$A4{kKNI7!exN&}dXz2bCz%rn?0N1p3NHH{EpK?cG0MYjtJlq5y*4>z)rQ%t|4ELW{m#m) zjjKl7Z+c&gbjS#*d0z`t`1HiZkYCk?&3~|Kve-`U}^SRyB?* zpY^Ei-7VF&oK20TyYF^%`0ZKQzFW3~2M^}=UrN&2^Mn2Uj&NOmU{-amFW7ug^Um&Q zx>l-1<@NRz7uk&8R_CgX`|YMpeHStloGrTs2l9QNFED@UP1CI?dT?k#wl=sw^QK^5 zZ4;Jir;Cb~sC+e>Z4v$>xRkJgsH_}a6#&)EW_wmkOk$?}$YdHTHH z&d>h+y^-bN+r{a8!9(Cp00p1`6o3Ly017|>C;$bZ02F`%Pyh-*0Vn_kpa2wr0#E=7 zKmjNK1)u;FfC5ke3P1rU00p1`6o3Ly017|>C;$bZ02KW7P%v!Ikq25X##JPoJd-yR zY*h}oRK@iQ!Q9h%9>)@U!i^Ohx6X`iU!|n$YnI==RFT;0J!`Y2=>t{Wx1w!nMcTK` z#)O1!m(gQ;swPXy3wTdNi^c~Hip}e5b2Vr|wK^zEc|VE1o@CYn>aE!HUh^U4Zrp<0 zp9$IWi4I3!(|cmV+>YdqQwIU!V9=!SVG+J4TM5{oao*k3D+y=<0DlIWg*?hl;aD zPF(uhSzqE!0q}qc;7E=G4oWluY6)S8WX2ufo3MzOXL14(6v856 zG!jFyRAOTSb!G=lRTv;KT4RdHe3el$Lw5QEG%YBVP`9C3MCXcn8O1v~hO{p!no_}~ zZA=fF$~ouJ~1uBTm3yPkGE?RwhvwCict)2^po zPrIIWJ?(nh^|b40*VC@2T~E87c0KKS+V!;SY1h-Pr(I9Go_0O$dfN50>uJ~1uBTo9 zztyh4sqIp9`uyQr#^E7TwW`-VDQ;J^*5-*0dd+H!KGVnSpk@ z$y0gwfk*T0Q8P6)PM_0d@;_8qdhF$(nquroX-Us-8*DKi3=ZxO{d2v#!6JLWTWL3Y z|5bmlLUS}!1jhx75*-fh(a`_JL^Z=|$!d34LOw@+Ux^hPV+GwV#cFv~Dp4~IW_dG0 zg{gy1$4;g1qD66)sqRTaid2#!CQJ=^!^3jC%OUl-9ii!oRBw4ixcdi1XfU`OPl%0j zyiLlD=Rd)N;*A0q5C}}mdBMSoJU~YwIFZfx3tSR*65maFK+Hl|M5;#oNQg?>OlHn- zfhi2LB!+H`4wC z6c?%!Tv3hE)4P=Sm0wDHoF~Ie06bsuJ~1uBTm3yPkGE?RwhvwCict z)2^poPrIIWJ?(nh^|b40*VC@2T~E87c0KKS+V!;SY1h-Pr(I9Go_0O$dfN50>uJ~1 zuBTm3yPkGE?Rwhv--CAjnli=QY3Uv6>1wLbi5E@Xy#epaEh2^ z@&Y0gQX_&hvP0ri5@bSj<_L^Zm?kk$W2VShmB}*0b@~NVENGcfw4rN6lZqx4O)8pH zG^uD((WIhDMU#pq6-_FdR5Yn*QhjHdRQ|%ed5g;)f1*6*v#^PE3rxEsj1x1?4}EgBs{W}?_wN5tv#&~ZTa}5zBZAbg@*SQAZvrR)1)u;F zfC5ke3P1rU00p1`6o3Ly017|>C;$bZ02F`%Pyh-*0Vn_kpa2wr0#E=7KmjNK1)u;F zfC5ke3P1rU00p1`6o3Ly017|>C;$bZ02F`%Pyh-*0Vn_kpa2wr0#E=7KmjNK1)u;F zfC5ke3P1rU00p1`6o3Ly017|>C;$bZ02F`%Q1D+s!IcH#zO@M_jlsUd`e09KK=TLQ zeA}~dPT<)4->lNS!%82#th-7VvXZcp_1y_th^)bm0KNYA1G4=1{jMu=aVI;BPqy%IvBvb@w#E*oxWXmM; z3=SB(FtlQ(#zc~NF5_l~^>hhnV$fruWxM}r6x@i@> zu=JKI?Sto5MZENT<)QiiI5X|g=OdOpzjD>+$jL8^)cy0P+0!4){LA7Gmrc3*to6Ww zno3`Hg2ga4dYv>iwAbaTx;r#vt$ta2TdH$At(IeItznw*geZSgd?%`bkMTfwL%;&$ z09$e{a8M!%&`5|zP?;lci*`hvu2{cM$&%bLb(W{ag8`q_qNtZ^eqAbi z7t|Ui3whF+IPs<&QL{z4S7Uu$Y!Cf86cdC>Ssv_)6m(XD{~z+44$lZ-xzw+^Z_97F zlD$6`%l%VQvJNza2?pt+({ZCBUCNCRx>E;sDE2UEqk6f)lH)2XR2)m>?LDa_ZONh_ zja6d`loWG~>Q3ed<%#l+0w(AQ9_3`>2t|~jp3tKRXRHXG2^Wf8CV(I*A$uatBX%Un zC2=OhXV}2_hB*_HJ4TEQZJ91Jucuf*RfA3mEgkAa6tt+k(aodnNG+41D~((_&XlpK zx6{MtW&>9+xYWY!8}2i4@rtW#+_~cxBlkkNz{-_i?k;m1oNMbW0$^hSJ0MtA!TJyO zl(6}Q6*VmVVLK7~o>)x9sxOwOvE_}uc`W#2Wg@#B|H|7khXwz}-D{QC>ax&g&%^%DQnJpewT2zaB*EkDQm){RskclOdaafSp>2sQ zpRUN>r%V;3z5FYAmb^ru2ikxyIVdsu4Twv~iOAdt8%bG- zm&w%`C@@K3gv9KOIV1yECeMuA=^9Y7pj|?(hCUJ9EJ|s#@u(rvzod9dG zvZa$Poowl3OD9`8+0x0DPPTNirIRh4Z0Tf6CtEt%(#e)iwsf+klP#TW>HL9j=?p$5 z277}Zv(_hc>GD4?`E0|@x?rbZzM~)LHFev9SIvQBe}VRid7yN-NS~(lHLvhD=swpE zFCTtg(;xKvjX6EWhlOXvuj;~t9Cd*BCxB znq_d!9G#W{9Sj;O^mnKeQMRIrMp2I%Bdtn0r!;cuE7RttTu$|#iwRuY;8M%~<10Vz z#7&*;b=QxKwZyy|tNf_-`AH?C$G@^LW<+evzNKeJjI~OWt~RaB$^1ynJDqWU<=ijV z&5Ezz(0R)kc*Fm+xKMIugyN$_SF+U-y5QvCJ3I~E1W*79KmjNK1)u;FfC5ke3P1rU z00p1`6o3Ly017|>C;$bZ02F`%Pyh-*0Vn_kpa2wr0#E=7KmjNK1)u;FfC5ke3P1rU z00p1`6o3Ly017|>C;$bZ02F`%Pyh-*0Vn_kpa2wr0#E=7KmjNK1)u;FfC5ke3P1rU z00p1`6o3Ly0180C_Xq`AreMr>|GjePG_a>=yf$ptwHx(AO({)xD^$PD+*6rpHSJN`_M3(qV~rjq(H_(i z^$~)1!23>Vu;GjI+LL09)6o+86ZPa-V(8cV;o|#>bF<$DQOFnrED~lBk4#!XNJ3mh zN=8aXN=8aXN=8aXN=8aXN=8aXN=8aXN=8aXN=8aXN=8aXN=8aXN=8aXN=8aXN=8aX zN=8aXN=8aXN=8aXN=8aXO7@?mWbTgwYQODIfkFL+mb?1?_Mr~jh}45lw(LLKvcoQN^fYR57X;Rg5Y|6{Ct##i(LbF{&6gDWNVnX>KiSVPa)$b1i9Pcrh(?VtI6B09}S#4geqw!iulW ofA4=*jWLTFpaopOkVq@!UCJ3XK)Z-N!avjFOw2g?lwtq?0KJ$Wy8r+H literal 140 zcmb2|=HO7xFOFkiE-6aP%+t+DOiIlu*3C?*Fw`wgsw_!mcze*1u|a`>WrK6bhNVA! z6FYwAd4KcRXU+fu4!K~8VJkD3Uf~O-7L+l8>8;!l(Ys*EA(s&%$_u8h$bqQ^-vto= diff --git a/tests/ut/python/mindrecord/test_mindrecord_base.py b/tests/ut/python/mindrecord/test_mindrecord_base.py index 576063295..7fdf1f0f9 100644 --- a/tests/ut/python/mindrecord/test_mindrecord_base.py +++ b/tests/ut/python/mindrecord/test_mindrecord_base.py @@ -203,9 +203,9 @@ def test_nlp_page_reader_tutorial(): os.remove("{}".format(x)) os.remove("{}.db".format(x)) -def test_cv_file_writer_shard_num_1000(): - """test file writer when shard num equals 1000.""" - writer = FileWriter(CV_FILE_NAME, 1000) +def test_cv_file_writer_shard_num_10(): + """test file writer when shard num equals 10.""" + writer = FileWriter(CV_FILE_NAME, 10) data = get_data("../data/mindrecord/testImageNetData/") cv_schema_json = {"file_name": {"type": "string"}, "label": {"type": "int64"}, "data": {"type": "bytes"}} @@ -214,8 +214,8 @@ def test_cv_file_writer_shard_num_1000(): writer.write_raw_data(data) writer.commit() - paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(3, '0')) - for x in range(1000)] + paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) + for x in range(10)] for x in paths: os.remove("{}".format(x)) os.remove("{}.db".format(x)) diff --git a/tests/ut/python/mindrecord/test_mnist_to_mr.py b/tests/ut/python/mindrecord/test_mnist_to_mr.py index c299a1f71..505b0d6b4 100644 --- a/tests/ut/python/mindrecord/test_mnist_to_mr.py +++ b/tests/ut/python/mindrecord/test_mnist_to_mr.py @@ -37,7 +37,7 @@ def read(train_name, test_name): count = count + 1 if count == 1: logger.info("data: {}".format(x)) - assert count == 60000 + assert count == 20 reader.close() count = 0 @@ -47,7 +47,7 @@ def read(train_name, test_name): count = count + 1 if count == 1: logger.info("data: {}".format(x)) - assert count == 10000 + assert count == 10 reader.close() @@ -102,10 +102,10 @@ def test_mnist_to_mindrecord_compare_data(): 't10k-images-idx3-ubyte.gz') test_labels_filename_ = os.path.join(MNIST_DIR, 't10k-labels-idx1-ubyte.gz') - train_data = _extract_images(train_data_filename_, 60000) - train_labels = _extract_labels(train_labels_filename_, 60000) - test_data = _extract_images(test_data_filename_, 10000) - test_labels = _extract_labels(test_labels_filename_, 10000) + train_data = _extract_images(train_data_filename_, 20) + train_labels = _extract_labels(train_labels_filename_, 20) + test_data = _extract_images(test_data_filename_, 10) + test_labels = _extract_labels(test_labels_filename_, 10) reader = FileReader(train_name) for x, data, label in zip(reader.get_next(), train_data, train_labels): -- GitLab