提交 8e55e52b 编写于 作者: R Ray Smith

Harder unittest that uses file i/o and string manipulation

上级 45729406
此差异已折叠。
此差异已折叠。
111
NULL 0 NULL 0
I 5 59,68,216,255,10,155,0,50,29,173 Latin 7 0 1 I # I [49 ]A
' 10 148,225,216,255,11,51,0,97,36,173 Common 2 10 2 ' # ' [27 ]p
v 3 59,68,187,197,84,173,0,32,84,218 Latin 61 0 3 v # v [76 ]a
e 3 58,64,189,200,87,154,0,32,98,188 Latin 88 0 4 e # e [65 ]a
J 5 0,64,216,255,39,242,0,30,62,234 Latin 79 0 5 J # J [4a ]A
o 3 58,66,188,200,87,151,0,32,98,185 Latin 83 0 6 o # o [6f ]a
i 3 59,69,216,255,11,141,0,54,27,173 Latin 1 0 7 i # i [69 ]a
n 3 59,68,188,202,87,187,0,25,101,208 Latin 45 0 8 n # n [6e ]a
| 0 0,67,216,255,8,73,0,80,50,173 Common 9 10 9 | # | [7c ]
- 10 105,161,122,175,49,176,0,43,56,215 Common 10 3 10 - # - [2d ]p
S 5 57,64,219,255,87,174,0,30,100,200 Latin 26 0 11 S # S [53 ]A
z 3 46,68,186,199,65,151,0,32,68,173 Latin 95 0 12 z # z [7a ]a
: 10 58,85,141,221,11,69,0,67,38,173 Common 13 6 13 : # : [3a ]p
# 10 37,84,200,255,99,221,0,41,109,266 Common 14 4 14 # # # [23 ]p
6 8 58,66,219,255,87,156,0,54,104,173 Common 15 2 15 6 # 6 [36 ]0
% 10 27,67,205,255,105,257,0,49,117,288 Common 16 4 16 % # % [25 ]p
5 8 12,66,199,255,82,160,0,36,103,173 Common 17 2 17 5 # 5 [35 ]0
0 8 58,66,187,255,88,164,0,45,103,180 Common 18 2 18 0 # 0 [30 ]0
@ 10 0,65,211,255,99,286,0,39,117,291 Common 19 10 19 @ # @ [40 ]p
p 3 0,47,192,226,87,180,0,25,100,200 Latin 68 0 20 p # p [70 ]a
a 3 58,65,186,200,85,164,0,26,97,185 Latin 67 0 21 a # a [61 ]a
r 3 59,68,186,202,58,173,0,40,69,180 Latin 40 0 22 r # r [72 ]a
m 3 56,68,189,202,108,280,0,25,117,306 Latin 38 0 23 m # m [6d ]a
F 5 57,68,216,255,68,210,0,31,77,209 Latin 29 0 24 F # F [46 ]A
u 3 57,65,187,202,85,184,0,39,100,208 Latin 85 0 25 u # u [75 ]a
s 3 58,65,192,200,78,147,0,30,91,173 Latin 11 0 26 s # s [73 ]a
B 5 62,68,216,255,91,227,0,27,106,227 Latin 46 0 27 B # B [42 ]A
» 10 0,133,146,235,63,284,0,32,71,294 Common 28 10 49 » # » [bb ]p
f 3 0,68,216,255,54,175,0,42,55,193 Latin 24 0 29 f # f [66 ]a
d 3 57,65,216,255,88,174,0,28,100,200 Latin 59 0 30 d # d [64 ]a
c 3 58,64,192,200,80,153,0,36,88,178 Latin 33 0 31 c # c [63 ]a
h 3 59,68,216,255,87,187,0,25,101,208 Latin 55 0 32 h # h [68 ]a
C 5 58,65,219,255,87,192,0,32,107,209 Latin 31 0 33 C # C [43 ]A
t 3 58,66,206,254,57,167,0,47,59,180 Latin 37 0 34 t # t [74 ]a
L 5 59,68,216,255,64,193,0,31,74,206 Latin 41 0 35 L # L [4c ]A
? 10 40,67,219,255,59,144,0,65,77,188 Common 36 10 36 ? # ? [3f ]p
T 5 59,68,216,255,85,227,0,47,88,236 Latin 34 0 37 T # T [54 ]A
M 5 57,68,216,255,99,301,0,35,117,286 Latin 23 0 38 M # M [4d ]A
y 3 0,47,187,202,87,199,0,25,87,230 Latin 100 0 39 y # y [79 ]a
R 5 57,68,216,255,88,227,0,27,104,232 Latin 22 0 40 R # R [52 ]A
l 3 59,68,216,255,11,147,0,56,27,173 Latin 35 0 41 l # l [6c ]a
~ 0 91,229,135,255,73,174,0,41,0,200 Common 42 10 42 ~ # ~ [7e ]
< 0 29,102,173,255,69,184,0,50,90,256 Common 43 10 76 < # < [3c ]
® 0 28,163,209,255,83,223,0,48,92,257 Common 44 10 44 ® # ® [ae ]
N 5 59,68,216,255,87,262,0,27,104,249 Latin 8 0 45 N # N [4e ]A
b 3 58,64,216,255,87,180,0,25,100,200 Latin 27 0 46 b # b [62 ]a
k 3 57,68,216,255,85,177,0,35,93,198 Latin 101 0 47 k # k [6b ]a
[ 10 8,64,216,255,39,136,0,80,55,173 Common 48 10 70 [ # [ [5b ]p
« 10 26,133,148,235,63,279,0,35,71,281 Common 49 10 28 « # « [ab ]p
1 8 49,69,192,255,45,128,0,66,74,173 Common 50 2 50 1 # 1 [31 ]0
, 10 14,46,79,115,17,78,0,58,30,173 Common 51 6 51 , # , [2c ]p
. 10 26,67,73,112,13,51,0,67,30,173 Common 52 6 52 . # . [2e ]p
” 10 141,233,216,255,59,141,0,87,66,298 Common 53 10 53 " # ” [201d ]p
g 3 0,43,188,212,88,176,0,32,100,210 Latin 93 0 54 g # g [67 ]a
H 5 59,68,216,255,91,258,0,27,107,244 Latin 32 0 55 H # H [48 ]A
$ 0 24,63,229,255,85,174,0,36,106,174 Common 56 4 56 $ # $ [24 ]
( 10 0,64,216,255,42,118,0,97,61,173 Common 57 10 94 ( # ( [28 ]p
+ 0 54,102,171,253,90,176,0,37,103,213 Common 58 3 58 + # + [2b ]
D 5 59,68,216,255,93,230,0,27,107,236 Latin 30 0 59 D # D [44 ]A
w 3 59,68,187,195,108,235,0,32,117,286 Latin 103 0 60 w # w [77 ]a
V 5 59,68,216,255,103,207,0,41,101,245 Latin 3 0 61 V # V [56 ]A
£ 0 0,135,219,255,64,201,0,55,61,298 Common 62 4 62 £ # £ [a3 ]
4 8 0,68,198,255,93,161,0,41,96,173 Common 63 2 63 4 # 4 [34 ]0
9 8 0,66,200,255,89,156,0,39,104,173 Common 64 2 64 9 # 9 [39 ]0
Q 5 7,64,219,255,91,205,0,30,106,227 Latin 96 0 65 Q # Q [51 ]A
& 10 53,64,194,255,108,232,0,47,112,239 Common 66 10 66 & # & [26 ]p
A 5 52,68,216,255,100,216,0,17,98,231 Latin 21 0 67 A # A [41 ]A
P 5 57,68,216,255,87,225,0,32,97,230 Latin 20 0 68 P # P [50 ]A
¢ 0 14,158,190,255,56,144,0,72,61,270 Common 69 4 69 ¢ # ¢ [a2 ]
] 10 8,64,216,255,39,129,0,44,55,173 Common 70 10 48 ] # ] [5d ]p
3 8 0,66,196,255,84,158,0,32,103,173 Common 71 2 71 3 # 3 [33 ]0
2 8 30,69,194,255,80,160,0,27,97,173 Common 72 2 72 2 # 2 [32 ]0
© 0 28,125,209,255,118,232,0,32,119,257 Common 73 10 73 © # © [a9 ]
8 8 57,66,219,255,88,162,0,41,103,174 Common 74 2 74 8 # 8 [38 ]0
/ 10 0,65,219,255,59,228,0,36,62,238 Common 75 6 75 / # / [2f ]p
> 0 29,102,173,255,78,184,0,50,90,256 Common 76 10 43 > # > [3e ]
X 5 59,68,216,255,94,275,0,25,93,256 Latin 86 0 77 X # X [58 ]A
é 3 0,64,222,255,87,384,0,32,98,391 Latin 78 0 78 é # é [e9 ]a
j 3 0,47,216,255,36,145,0,49,50,173 Latin 5 0 79 j # j [6a ]a
; 10 14,56,131,221,17,93,0,58,38,173 Common 80 10 80 ; # ; [3b ]p
7 8 12,68,196,255,72,160,0,60,75,173 Common 81 2 81 7 # 7 [37 ]0
€ 0 32,68,209,255,97,238,0,49,103,293 Common 82 4 82 € # € [20ac ]
O 5 57,64,219,255,91,209,0,34,106,233 Latin 6 0 83 O # O [4f ]A
¥ 0 59,75,209,255,91,238,0,52,91,270 Common 84 4 84 ¥ # ¥ [a5 ]
U 5 58,64,216,255,91,214,0,39,106,220 Latin 25 0 85 U # U [55 ]A
x 3 59,68,187,201,85,189,0,25,84,218 Latin 77 0 86 x # x [78 ]a
} 10 0,44,216,255,54,148,0,56,59,173 Common 87 10 97 } # } [7d ]p
E 5 59,68,216,255,68,210,0,31,80,219 Latin 4 0 88 E # E [45 ]A
§ 0 9,66,219,255,82,207,0,86,93,293 Common 89 10 89 § # § [a7 ]
= 0 74,139,144,199,90,186,0,32,103,224 Common 90 10 90 = # = [3d ]
! 10 41,67,216,255,11,87,0,71,50,173 Common 91 10 91 ! # ! [21 ]p
’ 10 141,233,212,255,17,78,0,109,30,298 Common 92 10 92 ' # ’ [2019 ]p
G 5 58,64,219,255,91,230,0,30,106,230 Latin 54 0 93 G # G [47 ]A
) 10 0,64,216,255,42,119,0,53,61,173 Common 94 10 57 ) # ) [29 ]p
Z 5 64,68,216,255,72,218,0,30,77,236 Latin 12 0 95 Z # Z [5a ]A
q 3 0,47,192,202,88,196,0,30,100,200 Latin 65 0 96 q # q [71 ]a
{ 10 0,44,216,255,54,148,0,71,59,173 Common 97 10 87 { # { [7b ]p
“ 10 141,233,216,255,56,133,0,172,66,298 Common 98 10 98 " # “ [201c ]p
— 10 110,155,132,167,126,297,0,23,136,298 Common 99 10 99 - # — [2014 ]p
Y 5 59,68,216,255,91,205,0,47,91,223 Latin 39 0 100 Y # Y [59 ]A
K 5 57,68,216,255,92,225,0,37,103,216 Latin 47 0 101 K # K [4b ]A
* 10 78,183,188,255,49,134,0,60,53,173 Common 102 10 102 * # * [2a ]p
W 5 54,68,216,255,106,314,0,41,117,318 Latin 60 0 103 W # W [57 ]A
" 10 151,225,216,255,52,115,0,71,71,173 Common 104 10 104 " # " [22 ]p
\ 10 0,67,219,255,28,250,0,71,62,261 Common 105 10 105 \ # \ [5c ]p
° 0 66,247,209,255,22,399,0,98,66,409 Common 106 4 106 ° # ° [b0 ]
fi 3 0,71,216,255,87,202,0,28,105,199 Latin 107 0 107 fi # fi [fb01 ]a
‘ 10 141,233,210,255,17,64,0,216,30,298 Common 108 10 108 ' # ‘ [2018 ]p
_ 10 0,50,0,64,73,248,0,29,75,259 Common 109 10 109 _ # _ [5f ]p
fl 3 0,71,216,255,87,219,0,28,105,236 Latin 110 0 110 fl # fl [fb02 ]a
因为 它太大了无法显示 source diff 。你可以改为 查看blob
因为 它太大了无法显示 source diff 。你可以改为 查看blob
此差异已折叠。
因为 它太大了无法显示 source diff 。你可以改为 查看blob
226
NULL 0 NULL 0
Joined 7 0,69,188,255,486,1218,0,30,486,1188 Latin 22 0 64 Joined # Joined [4a 6f 69 6e 65 64 ]a
|Broken|0|1 f 0,69,186,255,892,2138,0,80,892,2058 Common 101 10 101 |Broken|0|1 # Broken
T 5 59,68,216,255,85,227,0,47,88,236 Latin 46 0 3 T # T [54 ]A
u 3 57,65,187,202,85,184,0,39,100,208 Latin 47 0 4 u # u [75 ]a
d 3 57,65,216,255,88,174,0,28,100,200 Latin 23 0 5 d # d [64 ]a
o 3 58,66,188,200,87,151,0,32,98,185 Latin 28 0 6 o # o [6f ]a
S 5 57,64,219,255,87,174,0,30,100,200 Latin 15 0 7 S # S [53 ]A
C 5 58,65,219,255,87,192,0,32,107,209 Latin 31 0 8 C # C [43 ]A
R 5 57,68,216,255,88,227,0,27,104,232 Latin 20 0 9 R # R [52 ]A
A 5 52,68,216,255,100,216,0,17,98,231 Latin 14 0 10 A # A [41 ]A
P 5 57,68,216,255,87,225,0,32,97,230 Latin 37 0 11 P # P [50 ]A
g 3 0,43,188,212,88,176,0,32,100,210 Latin 76 0 12 g # g [67 ]a
i 3 59,69,216,255,11,141,0,54,27,173 Latin 27 0 13 i # i [69 ]a
a 3 58,65,186,200,85,164,0,26,97,185 Latin 10 0 14 a # a [61 ]a
s 3 58,65,192,200,78,147,0,30,91,173 Latin 7 0 15 s # s [73 ]a
2 8 30,69,194,255,80,160,0,27,97,173 Common 16 2 16 2 # 2 [32 ]0
0 8 58,66,187,255,88,164,0,45,103,180 Common 17 2 17 0 # 0 [30 ]0
8 8 57,66,219,255,88,162,0,41,103,174 Common 18 2 18 8 # 8 [38 ]0
M 5 57,68,216,255,99,301,0,35,117,286 Latin 35 0 19 M # M [4d ]A
r 3 59,68,186,202,58,173,0,40,69,180 Latin 9 0 20 r # r [72 ]a
y 3 0,47,187,202,87,199,0,25,87,230 Latin 109 0 21 y # y [79 ]a
j 3 0,47,216,255,36,145,0,49,50,173 Latin 64 0 22 j # j [6a ]a
D 5 59,68,216,255,93,230,0,27,107,236 Latin 5 0 23 D # D [44 ]A
w 3 59,68,187,195,108,235,0,32,117,286 Latin 108 0 24 w # w [77 ]a
n 3 59,68,188,202,87,187,0,25,101,208 Latin 49 0 25 n # n [6e ]a
É 5 59,68,232,255,68,314,0,31,80,325 Latin 59 0 26 É # É [c9 ]A
I 5 59,68,216,255,10,155,0,50,29,173 Latin 13 0 27 I # I [49 ]A
O 5 57,64,219,255,91,209,0,34,106,233 Latin 6 0 28 O # O [4f ]A
“ 10 141,233,216,255,56,133,0,172,66,298 Common 29 10 29 " # “ [201c ]p
l 3 59,68,216,255,11,147,0,56,27,173 Latin 48 0 30 l # l [6c ]a
c 3 58,64,192,200,80,153,0,36,88,178 Latin 8 0 31 c # c [63 ]a
e 3 58,64,189,200,87,154,0,32,98,188 Latin 33 0 32 e # e [65 ]a
E 5 59,68,216,255,68,210,0,31,80,219 Latin 32 0 33 E # E [45 ]A
â 3 0,64,222,255,85,256,0,26,97,256 Latin 34 0 34 â # â [e2 ]a
m 3 56,68,189,202,108,280,0,25,117,306 Latin 19 0 35 m # m [6d ]a
h 3 59,68,216,255,87,187,0,25,101,208 Latin 97 0 36 h # h [68 ]a
p 3 0,47,192,226,87,180,0,25,100,200 Latin 11 0 37 p # p [70 ]a
B 5 62,68,216,255,91,227,0,27,106,227 Latin 45 0 38 B # B [42 ]A
. 10 26,67,73,112,13,51,0,67,30,173 Common 39 6 39 . # . [2e ]p
£ 0 0,135,219,255,64,201,0,55,61,298 Common 40 4 40 £ # £ [a3 ]
ó 3 0,64,222,255,87,192,0,32,98,197 Latin 73 0 41 ó # ó [f3 ]a
1 8 49,69,192,255,45,128,0,66,74,173 Common 42 2 42 1 # 1 [31 ]0
6 8 58,66,219,255,87,156,0,54,104,173 Common 43 2 43 6 # 6 [36 ]0
4 8 0,68,198,255,93,161,0,41,96,173 Common 44 2 44 4 # 4 [34 ]0
b 3 58,64,216,255,87,180,0,25,100,200 Latin 38 0 45 b # b [62 ]a
t 3 58,66,206,254,57,167,0,47,59,180 Latin 3 0 46 t # t [74 ]a
U 5 58,64,216,255,91,214,0,39,106,220 Latin 4 0 47 U # U [55 ]A
L 5 59,68,216,255,64,193,0,31,74,206 Latin 30 0 48 L # L [4c ]A
N 5 59,68,216,255,87,262,0,27,104,249 Latin 25 0 49 N # N [4e ]A
V 5 59,68,216,255,103,207,0,41,101,245 Latin 56 0 50 V # V [56 ]A
” 10 141,233,216,255,59,141,0,87,66,298 Common 51 10 51 " # ” [201d ]p
) 10 0,64,216,255,42,119,0,53,61,173 Common 52 10 86 ) # ) [29 ]p
, 10 14,46,79,115,17,78,0,58,30,173 Common 53 6 53 , # , [2c ]p
k 3 57,68,216,255,85,177,0,35,93,198 Latin 57 0 54 k # k [6b ]a
: 10 58,85,141,221,11,69,0,67,38,173 Common 55 6 55 : # : [3a ]p
v 3 59,68,187,197,84,173,0,32,84,218 Latin 50 0 56 v # v [76 ]a
K 5 57,68,216,255,92,225,0,37,103,216 Latin 54 0 57 K # K [4b ]A
í 3 62,69,222,255,40,279,0,54,47,286 Latin 92 0 58 í # í [ed ]a
é 3 0,64,222,255,87,384,0,32,98,391 Latin 26 0 59 é # é [e9 ]a
ê 3 0,64,222,255,87,409,0,32,98,407 Latin 99 0 60 ê # ê [ea ]a
Ç 5 0,64,217,255,87,286,0,32,107,296 Latin 82 0 61 Ç # Ç [c7 ]A
@ 10 0,65,211,255,99,286,0,39,117,291 Common 62 10 62 @ # @ [40 ]p
Ú 5 0,64,232,255,91,294,0,39,106,291 Latin 107 0 63 Ú # Ú [da ]A
J 5 0,64,216,255,39,242,0,30,62,234 Latin 22 0 64 J # J [4a ]A
€ 0 32,68,209,255,97,238,0,49,103,293 Common 65 4 65 € # € [20ac ]
9 8 0,66,200,255,89,156,0,39,104,173 Common 66 2 66 9 # 9 [39 ]0
5 8 12,66,199,255,82,160,0,36,103,173 Common 67 2 67 5 # 5 [35 ]0
& 10 53,64,194,255,108,232,0,47,112,239 Common 68 10 68 & # & [26 ]p
x 3 59,68,187,201,85,189,0,25,84,218 Latin 89 0 69 x # x [78 ]a
/ 10 0,65,219,255,59,228,0,36,62,238 Common 70 6 70 / # / [2f ]p
² 0 3,192,209,255,50,248,0,105,0,293 Common 71 2 71 2 # ² [b2 ]
F 5 57,68,216,255,68,210,0,31,77,209 Latin 84 0 72 F # F [46 ]A
Ó 5 0,64,232,255,91,276,0,34,106,286 Latin 41 0 73 Ó # Ó [d3 ]A
3 8 0,66,196,255,84,158,0,32,103,173 Common 74 2 74 3 # 3 [33 ]0
z 3 46,68,186,199,65,151,0,32,68,173 Latin 112 0 75 z # z [7a ]a
G 5 58,64,219,255,91,230,0,30,106,230 Latin 12 0 76 G # G [47 ]A
á 3 0,64,222,255,85,414,0,26,97,412 Latin 100 0 77 á # á [e1 ]a
- 10 105,161,122,175,49,176,0,43,56,215 Common 78 3 78 - # - [2d ]p
? 10 40,67,219,255,59,144,0,65,77,188 Common 79 10 79 ? # ? [3f ]p
! 10 41,67,216,255,11,87,0,71,50,173 Common 80 10 80 ! # ! [21 ]p
q 3 0,47,192,202,88,196,0,30,100,200 Latin 116 0 81 q # q [71 ]a
ç 3 0,31,182,232,80,299,0,36,96,309 Latin 61 0 82 ç # ç [e7 ]a
ã 3 0,64,224,255,85,279,0,26,97,289 Latin 96 0 83 ã # ã [e3 ]a
f 3 0,68,216,255,54,175,0,42,55,193 Latin 72 0 84 f # f [66 ]a
+ 0 54,102,171,253,90,176,0,37,103,213 Common 85 3 85 + # + [2b ]
( 10 0,64,216,255,42,118,0,97,61,173 Common 86 10 52 ( # ( [28 ]p
' 10 148,225,216,255,11,51,0,97,36,173 Common 87 10 87 ' # ' [27 ]p
; 10 14,56,131,221,17,93,0,58,38,173 Common 88 10 88 ; # ; [3b ]p
X 5 59,68,216,255,94,275,0,25,93,256 Latin 69 0 89 X # X [58 ]A
* 10 78,183,188,255,49,134,0,60,53,173 Common 90 10 90 * # * [2a ]p
º 3 64,187,188,255,51,189,0,81,64,293 Latin 91 0 91 o # º [ba ]a
Í 5 64,68,232,255,35,197,0,48,55,207 Latin 58 0 92 Í # Í [cd ]A
³ 0 0,192,209,255,48,268,0,99,0,293 Common 93 2 93 3 # ³ [b3 ]
› 10 64,101,142,215,32,100,0,84,37,173 Common 94 10 94 › # › [203a ]p
ª 3 64,187,207,255,51,286,0,71,62,296 Latin 95 0 95 a # ª [aa ]a
à 5 6,68,232,255,100,204,0,25,98,245 Latin 83 0 96 à # à [c3 ]A
H 5 59,68,216,255,91,258,0,27,107,244 Latin 36 0 97 H # H [48 ]A
# 10 37,84,200,255,99,221,0,41,109,266 Common 98 4 98 # # # [23 ]p
Ê 5 0,68,232,255,72,266,0,31,80,276 Latin 60 0 99 Ê # Ê [ca ]A
Á 5 64,68,232,255,100,203,0,29,98,245 Latin 77 0 100 Á # Á [c1 ]A
| 0 0,67,216,255,8,73,0,80,50,173 Common 101 10 101 | # | [7c ]
" 10 151,225,216,255,52,115,0,71,71,173 Common 102 10 102 " # " [22 ]p
> 0 29,102,173,255,78,184,0,50,90,256 Common 103 10 111 > # > [3e ]
à 3 0,64,222,255,85,407,0,26,97,407 Latin 104 0 104 à # à [e0 ]a
õ 3 58,66,224,255,87,194,0,32,98,204 Latin 105 0 105 õ # õ [f5 ]a
« 10 26,133,148,235,63,279,0,35,71,281 Common 106 10 126 « # « [ab ]p
ú 3 0,65,222,255,85,212,0,39,100,212 Latin 63 0 107 ú # ú [fa ]a
W 5 54,68,216,255,106,314,0,41,117,318 Latin 24 0 108 W # W [57 ]A
Y 5 59,68,216,255,91,205,0,47,91,223 Latin 21 0 109 Y # Y [59 ]A
7 8 12,68,196,255,72,160,0,60,75,173 Common 110 2 110 7 # 7 [37 ]0
< 0 29,102,173,255,69,184,0,50,90,256 Common 111 10 103 < # < [3c ]
Z 5 64,68,216,255,72,218,0,30,77,236 Latin 75 0 112 Z # Z [5a ]A
¹ 0 64,192,209,255,24,279,1,119,0,293 Common 113 2 113 1 # ¹ [b9 ]
ü 3 0,65,219,255,85,220,0,39,100,225 Latin 114 0 114 ü # ü [fc ]a
_ 10 0,50,0,64,73,248,0,29,75,259 Common 115 10 115 _ # _ [5f ]p
Q 5 7,64,219,255,91,205,0,30,106,227 Latin 81 0 116 Q # Q [51 ]A
… 10 60,143,79,232,101,332,0,45,107,337 Common 117 10 117 ... # … [2026 ]p
¡ 10 0,66,185,255,11,176,0,125,49,293 Common 118 10 118 ¡ # ¡ [a1 ]p
$ 0 24,63,229,255,85,174,0,36,106,174 Common 119 4 119 $ # $ [24 ]
© 0 28,125,209,255,118,232,0,32,119,257 Common 120 10 120 © # © [a9 ]
[ 10 8,64,216,255,39,136,0,80,55,173 Common 121 10 123 [ # [ [5b ]p
% 10 27,67,205,255,105,257,0,49,117,288 Common 122 4 122 % # % [25 ]p
] 10 8,64,216,255,39,129,0,44,55,173 Common 123 10 121 ] # ] [5d ]p
= 0 74,139,144,199,90,186,0,32,103,224 Common 124 10 124 = # = [3d ]
₂ 0 10,67,113,172,50,118,0,105,77,293 Common 125 2 125 2 # ₂ [2082 ]
» 10 0,133,146,235,63,284,0,32,71,294 Common 126 10 106 » # » [bb ]p
⁴ 0 115,163,227,255,63,131,0,101,77,293 Common 127 2 127 4 # ⁴ [2074 ]
ô 3 58,66,222,255,87,192,0,32,98,202 Latin 128 0 128 ô # ô [f4 ]a
° 0 66,247,209,255,22,399,0,98,66,409 Common 129 4 129 ° # ° [b0 ]
₄ 0 12,67,115,170,62,131,0,97,77,293 Common 130 2 130 4 # ₄ [2084 ]
₃ 0 8,67,113,172,52,106,0,103,77,293 Common 131 2 131 3 # ₃ [2083 ]
₁ 0 10,67,113,172,36,78,0,108,77,293 Common 132 2 132 1 # ₁ [2081 ]
fl 3 0,68,216,255,82,408,0,42,82,366 Latin 72 0 84 fl # fl [66 6c ]a
fi 3 0,69,216,255,82,408,0,42,82,366 Latin 72 0 84 fi # fi [66 69 ]a
... 10 26,67,73,112,90,586,0,67,90,519 Common 39 6 39 ... # ... [2e 2e 2e ]p
ff 3 0,68,216,255,110,428,0,42,110,386 Latin 72 0 84 ff # ff [66 66 ]a
⁸ 0 124,151,229,255,56,102,0,53,75,173 Common 137 2 137 8 # ⁸ [2078 ]
⁶ 0 124,151,229,255,56,99,0,56,77,173 Common 138 2 138 6 # ⁶ [2076 ]
⁹ 0 126,153,230,255,56,104,0,57,77,173 Common 139 2 139 9 # ⁹ [2079 ]
⁵ 0 124,153,227,255,50,104,0,51,75,173 Common 140 2 140 5 # ⁵ [2075 ]
⁷ 0 128,153,227,255,52,106,0,58,77,173 Common 141 2 141 7 # ⁷ [2077 ]
⁰ 0 124,151,229,255,56,102,0,53,77,173 Common 142 2 142 0 # ⁰ [2070 ]
₆ 0 10,65,118,172,56,99,0,56,77,173 Common 143 2 143 6 # ₆ [2086 ]
₉ 0 10,65,118,172,56,104,0,57,77,173 Common 144 2 144 9 # ₉ [2089 ]
₀ 0 8,65,119,172,56,102,0,53,77,173 Common 145 2 145 0 # ₀ [2080 ]
₅ 0 8,65,113,170,50,98,0,51,77,173 Common 146 2 146 5 # ₅ [2085 ]
₈ 0 8,65,118,172,59,102,0,53,75,173 Common 147 2 147 8 # ₈ [2088 ]
ffi 3 0,69,216,255,137,601,0,42,137,559 Latin 72 0 84 ffi # ffi [66 66 69 ]a
₇ 0 10,67,115,170,52,92,0,60,77,173 Common 149 2 149 7 # ₇ [2087 ]
Th 7 59,68,216,255,189,491,0,47,189,444 Latin 46 0 3 Th # Th [54 68 ]a
ft 3 0,68,206,255,114,415,0,42,114,373 Latin 72 0 84 ft # ft [66 74 ]a
ffl 3 0,68,216,255,137,601,0,42,137,559 Latin 72 0 84 ffl # ffl [66 66 6c ]a
NJ 5 0,68,216,255,166,510,0,27,166,483 Latin 25 0 49 NJ # NJ [4e 4a ]A
ij 3 0,69,216,255,77,400,0,54,77,346 Latin 27 0 13 ij # ij [69 6a ]a
tt 3 58,66,206,254,118,407,0,47,118,360 Latin 3 0 46 tt # tt [74 74 ]a
ti 3 58,69,206,255,86,400,0,47,86,353 Latin 3 0 46 ti # ti [74 69 ]a
it 3 58,69,206,255,86,407,0,54,86,353 Latin 27 0 13 it # it [69 74 ]a
sc 3 58,65,192,200,179,381,0,30,179,351 Latin 7 0 15 sc # sc [73 63 ]a
rt 3 58,68,186,254,128,400,0,40,128,360 Latin 9 0 20 rt # rt [72 74 ]a
es 3 58,65,189,200,189,393,0,32,189,361 Latin 33 0 32 es # es [65 73 ]a
ee 3 58,64,189,200,196,408,0,32,196,376 Latin 33 0 32 ee # ee [65 65 ]a
th 3 58,68,206,255,160,435,0,47,160,388 Latin 3 0 46 th # th [74 68 ]a
st 3 58,66,192,254,150,383,0,30,150,353 Latin 7 0 15 st # st [73 74 ]a
ch 3 58,68,192,255,189,422,0,36,189,386 Latin 8 0 31 ch # ch [63 68 ]a
et 3 58,66,189,254,157,400,0,32,157,368 Latin 33 0 32 et # et [65 74 ]a
sh 3 58,68,192,255,192,411,0,30,192,381 Latin 7 0 15 sh # sh [73 68 ]a
il 3 59,69,216,255,54,400,0,54,54,346 Latin 27 0 13 il # il [69 6c ]a
ot 3 58,66,188,254,157,397,0,32,157,365 Latin 28 0 6 ot # ot [6f 74 ]a
ge 3 0,64,188,212,198,430,0,32,198,398 Latin 76 0 12 ge # ge [67 65 ]a
sp 3 0,65,192,226,191,403,0,30,191,373 Latin 7 0 15 sp # sp [73 70 ]a
di 3 57,69,216,255,127,401,0,28,127,373 Latin 23 0 5 di # di [64 69 ]a
fü 3 0,68,216,255,155,460,0,42,155,418 Latin 72 0 84 fü # fü [66 fc ]a
ss 3 58,65,192,200,182,376,0,30,182,346 Latin 7 0 15 ss # ss [73 73 ]a
pp 3 0,47,192,226,200,425,0,25,200,400 Latin 11 0 37 pp # pp [70 70 ]a
pt 3 0,66,192,254,159,405,0,25,159,380 Latin 11 0 37 pt # pt [70 74 ]a
sl 3 58,68,192,255,118,376,0,30,118,346 Latin 7 0 15 sl # sl [73 6c ]a
sf 3 0,68,192,255,146,396,0,30,146,366 Latin 7 0 15 sf # sf [73 66 ]a
cc 3 58,64,192,200,176,392,0,36,176,356 Latin 8 0 31 cc # cc [63 63 ]a
ll 3 59,68,216,255,54,402,0,56,54,346 Latin 48 0 30 ll # ll [6c 6c ]a
ct 3 58,66,192,254,147,394,0,36,147,358 Latin 8 0 31 ct # ct [63 74 ]a
rr 3 59,68,186,202,138,400,0,40,138,360 Latin 9 0 20 rr # rr [72 72 ]a
aa 3 58,65,186,200,194,396,0,26,194,370 Latin 10 0 14 aa # aa [61 61 ]a
fu 3 0,68,187,255,155,443,0,42,155,401 Latin 72 0 84 fu # fu [66 75 ]a
ii 3 59,69,216,255,54,400,0,54,54,346 Latin 27 0 13 ii # ii [69 69 ]a
ph 3 0,68,192,255,201,433,0,25,201,408 Latin 11 0 37 ph # ph [70 68 ]a
gy 3 0,47,187,212,187,472,0,32,187,440 Latin 76 0 12 gy # gy [67 79 ]a
fr 3 0,68,186,255,124,415,0,42,124,373 Latin 72 0 84 fr # fr [66 72 ]a
dt 3 57,66,206,255,159,408,0,28,159,380 Latin 23 0 5 dt # dt [64 74 ]a
cti 3 58,69,192,255,174,567,0,36,174,531 Latin 8 0 31 cti # cti [63 74 69 ]a
oo 3 58,66,188,200,196,402,0,32,196,370 Latin 28 0 6 oo # oo [6f 6f ]a
sti 3 58,69,192,255,177,556,0,30,177,526 Latin 7 0 15 sti # sti [73 74 69 ]a
sk 3 57,68,192,255,184,401,0,30,184,371 Latin 7 0 15 sk # sk [73 6b ]a
cs 3 58,65,192,200,179,387,0,36,179,351 Latin 8 0 31 cs # cs [63 73 ]a
ooo 3 58,66,188,200,294,587,0,32,294,555 Latin 28 0 6 ooo # ooo [6f 6f 6f ]a
ty 3 0,66,187,254,146,457,0,47,146,410 Latin 3 0 46 ty # ty [74 79 ]a
tz 3 46,68,186,254,127,400,0,47,127,353 Latin 3 0 46 tz # tz [74 7a ]a
fk 3 0,68,216,255,148,433,0,42,148,391 Latin 72 0 84 fk # fk [66 6b ]a
ck 3 57,68,192,255,181,412,0,36,181,376 Latin 8 0 31 ck # ck [63 6b ]a
gg 3 0,43,188,212,200,452,0,32,200,420 Latin 76 0 12 gg # gg [67 67 ]a
°C 5 58,247,209,255,173,716,0,98,173,618 Common 129 4 129 °C # °C [b0 43 ]A
!? 10 40,67,216,255,127,432,0,71,127,361 Common 80 10 80 !? # !? [21 3f ]p
!! 10 41,67,216,255,100,417,0,71,100,346 Common 80 10 80 !! # !! [21 21 ]p
Qu 7 7,65,187,255,206,465,0,30,206,435 Latin 81 0 116 Qu # Qu [51 75 ]a
ry 3 0,68,186,202,156,450,0,40,156,410 Latin 9 0 20 ry # ry [72 79 ]a
gj 3 0,47,188,255,150,415,0,32,150,383 Latin 76 0 12 gj # gj [67 6a ]a
bt 3 58,66,206,255,159,405,0,25,159,380 Latin 38 0 45 bt # bt [62 74 ]a
sch 3 58,68,192,255,280,589,0,30,280,559 Latin 7 0 15 sch # sch [73 63 68 ]a
SS 5 57,64,219,255,200,430,0,30,200,400 Latin 15 0 7 SS # SS [53 53 ]A
AND 5 52,68,216,255,309,733,0,17,309,716 Latin 14 0 10 AND # AND [41 4e 44 ]A
ET 5 59,68,216,255,168,486,0,31,168,455 Latin 32 0 33 ET # ET [45 54 ]A
UND 5 58,68,216,255,317,744,0,39,317,705 Latin 4 0 47 UND # UND [55 4e 44 ]A
fb 3 0,68,216,255,155,435,0,42,155,393 Latin 72 0 84 fb # fb [66 62 ]a
fj 3 0,68,216,255,105,408,0,42,105,366 Latin 72 0 84 fj # fj [66 6a ]a
nj 3 0,68,188,255,151,406,0,25,151,381 Latin 49 0 25 nj # nj [6e 6a ]a
ffb 3 0,68,216,255,210,628,0,42,210,586 Latin 72 0 84 ffb # ffb [66 66 62 ]a
fh 3 0,68,216,255,156,443,0,42,156,401 Latin 72 0 84 fh # fh [66 68 ]a
or 3 58,68,186,202,167,397,0,32,167,365 Latin 28 0 6 or # or [6f 72 ]a
on 3 58,68,188,202,199,425,0,32,199,393 Latin 28 0 6 on # on [6f 6e ]a
of 3 0,68,188,255,153,410,0,32,153,378 Latin 28 0 6 of # of [6f 66 ]a
om 3 56,68,188,202,215,523,0,32,215,491 Latin 28 0 6 om # om [6f 6d ]a
op 3 0,66,188,226,198,417,0,32,198,385 Latin 28 0 6 op # op [6f 70 ]a
ou 3 57,66,187,202,198,425,0,32,198,393 Latin 28 0 6 ou # ou [6f 75 ]a
fft 3 0,68,206,255,169,608,0,42,169,566 Latin 72 0 84 fft # fft [66 66 74 ]a
sb 3 58,65,192,255,191,403,0,30,191,373 Latin 7 0 15 sb # sb [73 62 ]a
the 3 58,68,189,255,258,623,0,47,258,576 Latin 3 0 46 the # the [74 68 65 ]a
4
NULL 0 NULL 0
i 3 59,69,216,255,11,141,0,54,27,173 Latin 1 0 1 i # i [69 ]a
f 3 0,68,216,255,54,175,0,42,55,193 Latin 2 0 2 f # f [66 ]a
fi 3 0,71,216,255,87,202,0,28,105,199 Latin 3 0 3 fi # fi [fb01 ]a
// (C) Copyright 2017, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "unicharcompress.h"
#include "gunit.h"
#include "serialis.h"
#include "printf.h"
namespace tesseract {
namespace {
class UnicharcompressTest : public ::testing::Test {
protected:
// Loads and compresses the given unicharset.
void LoadUnicharset(const string& unicharset_name) {
string radical_stroke_file =
file::JoinPath(FLAGS_test_srcdir,
"langdata/radical-stroke.txt");
string unicharset_file = file::JoinPath(
FLAGS_test_srcdir, "testdata",
unicharset_name);
string uni_data;
CHECK_OK(file::GetContents(unicharset_file, &uni_data, file::Defaults()));
string radical_data;
CHECK_OK(file::GetContents(radical_stroke_file, &radical_data,
file::Defaults()));
CHECK(
unicharset_.load_from_inmemory_file(uni_data.data(), uni_data.size()));
STRING radical_str(radical_data.c_str());
null_char_ =
unicharset_.has_special_codes() ? UNICHAR_BROKEN : unicharset_.size();
compressed_.ComputeEncoding(unicharset_, null_char_, &radical_str);
// Get the encoding of the null char.
RecodedCharID code;
compressed_.EncodeUnichar(null_char_, &code);
encoded_null_char_ = code(0);
string output_name = file::JoinPath(
FLAGS_test_tmpdir, absl::StrCat(unicharset_name, ".encoding.txt"));
STRING encoding = compressed_.GetEncodingAsString(unicharset_);
string encoding_str(&encoding[0], encoding.size());
CHECK_OK(file::SetContents(output_name, encoding_str, file::Defaults()));
LOG(INFO) << "Wrote encoding to:" << output_name;
}
// Serializes and de-serializes compressed_ over itself.
void SerializeAndUndo() {
GenericVector<char> data;
TFile wfp;
wfp.OpenWrite(&data);
EXPECT_TRUE(compressed_.Serialize(&wfp));
TFile rfp;
rfp.Open(&data[0], data.size());
EXPECT_TRUE(compressed_.DeSerialize(&rfp));
}
// Returns true if the lang is in CJK.
bool IsCJKLang(const string& lang) {
return lang == "chi_sim" || lang == "chi_tra" || lang == "kor" ||
lang == "jpn";
}
// Returns true if the lang is Indic.
bool IsIndicLang(const string& lang) {
return lang == "asm" || lang == "ben" || lang == "bih" || lang == "hin" ||
lang == "mar" || lang == "nep" || lang == "san" || lang == "bod" ||
lang == "dzo" || lang == "guj" || lang == "kan" || lang == "mal" ||
lang == "ori" || lang == "pan" || lang == "sin" || lang == "tam" ||
lang == "tel";
}
// Expects the appropriate results from the compressed_ unicharset_.
void ExpectCorrect(const string& lang) {
// Count the number of times each code is used in each element of
// RecodedCharID.
RecodedCharID zeros;
for (int i = 0; i < RecodedCharID::kMaxCodeLen; ++i) zeros.Set(i, 0);
int code_range = compressed_.code_range();
std::vector<RecodedCharID> times_seen(code_range, zeros);
for (int u = 0; u <= unicharset_.size(); ++u) {
if (u != UNICHAR_SPACE && u != null_char_ &&
(u == unicharset_.size() || (unicharset_.has_special_codes() &&
u < SPECIAL_UNICHAR_CODES_COUNT))) {
continue; // Not used so not encoded.
}
RecodedCharID code;
int len = compressed_.EncodeUnichar(u, &code);
// Check round-trip encoding.
int unichar_id;
GenericVector<UNICHAR_ID> normed_ids;
if (u == null_char_ || u == unicharset_.size()) {
unichar_id = null_char_;
} else {
unichar_id = u;
}
EXPECT_EQ(unichar_id, compressed_.DecodeUnichar(code));
// Check that the codes are valid.
for (int i = 0; i < len; ++i) {
int code_val = code(i);
EXPECT_GE(code_val, 0);
EXPECT_LT(code_val, code_range);
times_seen[code_val].Set(i, times_seen[code_val](i) + 1);
}
}
// Check that each code is used in at least one position.
for (int c = 0; c < code_range; ++c) {
int num_used = 0;
for (int i = 0; i < RecodedCharID::kMaxCodeLen; ++i) {
if (times_seen[c](i) != 0) ++num_used;
}
EXPECT_GE(num_used, 1) << "c=" << c << "/" << code_range;
}
// Check that GetNextCodes/GetFinalCodes lists match the times_seen,
// and create valid codes.
RecodedCharID code;
CheckCodeExtensions(code, times_seen);
// Finally, we achieved all that using a codebook < 10% of the size of
// the original unicharset, for CK or Indic, and 20% with J, but just
// no bigger for all others.
if (IsCJKLang(lang) || IsIndicLang(lang)) {
EXPECT_LT(code_range, unicharset_.size() / (lang == "jpn" ? 5 : 10));
} else {
EXPECT_LE(code_range, unicharset_.size() + 1);
}
LOG(INFO) << "Compressed unicharset of " << unicharset_.size() << " to "
<< code_range;
}
// Checks for extensions of the current code that either finish a code, or
// extend it and checks those extensions recursively.
void CheckCodeExtensions(const RecodedCharID& code,
const std::vector<RecodedCharID>& times_seen) {
RecodedCharID extended = code;
int length = code.length();
const GenericVector<int>* final_codes = compressed_.GetFinalCodes(code);
if (final_codes != NULL) {
for (int i = 0; i < final_codes->size(); ++i) {
int ending = (*final_codes)[i];
EXPECT_GT(times_seen[ending](length), 0);
extended.Set(length, ending);
int unichar_id = compressed_.DecodeUnichar(extended);
EXPECT_NE(INVALID_UNICHAR_ID, unichar_id);
}
}
const GenericVector<int>* next_codes = compressed_.GetNextCodes(code);
if (next_codes != NULL) {
for (int i = 0; i < next_codes->size(); ++i) {
int extension = (*next_codes)[i];
EXPECT_GT(times_seen[extension](length), 0);
extended.Set(length, extension);
CheckCodeExtensions(extended, times_seen);
}
}
}
UnicharCompress compressed_;
UNICHARSET unicharset_;
int null_char_;
// The encoding of the null_char_.
int encoded_null_char_;
};
TEST_F(UnicharcompressTest, DoesChinese) {
LOG(INFO) << "Testing chi_tra";
LoadUnicharset("chi_tra.unicharset");
ExpectCorrect("chi_tra");
LOG(INFO) << "Testing chi_sim";
LoadUnicharset("chi_sim.unicharset");
ExpectCorrect("chi_sim");
}
TEST_F(UnicharcompressTest, DoesJapanese) {
LOG(INFO) << "Testing jpn";
LoadUnicharset("jpn.unicharset");
ExpectCorrect("jpn");
}
TEST_F(UnicharcompressTest, DoesKorean) {
LOG(INFO) << "Testing kor";
LoadUnicharset("kor.unicharset");
ExpectCorrect("kor");
}
TEST_F(UnicharcompressTest, DoesKannada) {
LOG(INFO) << "Testing kan";
LoadUnicharset("kan.unicharset");
ExpectCorrect("kan");
SerializeAndUndo();
ExpectCorrect("kan");
}
TEST_F(UnicharcompressTest, DoesMarathi) {
LOG(INFO) << "Testing mar";
LoadUnicharset("mar.unicharset");
ExpectCorrect("mar");
}
TEST_F(UnicharcompressTest, DoesEnglish) {
LOG(INFO) << "Testing eng";
LoadUnicharset("eng.unicharset");
ExpectCorrect("eng");
}
// Tests that a unicharset that contains double-letter ligatures (eg ff) has
// no null char in the encoding at all.
TEST_F(UnicharcompressTest, DoesLigaturesWithDoubles) {
LOG(INFO) << "Testing por with ligatures";
LoadUnicharset("por.unicharset");
ExpectCorrect("por");
// Check that any unichar-id that is encoded with multiple codes has the
// correct encoded_nulll_char_ in between.
for (int u = 0; u <= unicharset_.size(); ++u) {
RecodedCharID code;
int len = compressed_.EncodeUnichar(u, &code);
if (len > 1) {
// The should not be any null char in the code.
for (int i = 0; i < len; ++i) {
EXPECT_NE(encoded_null_char_, code(i));
}
}
}
}
// Tests that GetEncodingAsString returns the right result for a trivial
// unicharset.
TEST_F(UnicharcompressTest, GetEncodingAsString) {
LoadUnicharset("trivial.unicharset");
ExpectCorrect("trivial");
STRING encoding = compressed_.GetEncodingAsString(unicharset_);
string encoding_str(&encoding[0], encoding.length());
std::vector<string> lines =
strings::Split(encoding_str, "\n", strings::SkipEmpty());
EXPECT_EQ(5, lines.size());
// The first line is always space.
EXPECT_EQ("0\t ", lines[0]);
// Next we have i.
EXPECT_EQ("1\ti", lines[1]);
// Next we have f.
EXPECT_EQ("2\tf", lines[2]);
// Next we have the fi ligature: fi. There are no nulls in it, as there are no
// repeated letter ligatures in this unicharset, unlike por.unicharset above.
EXPECT_EQ("2,1\tfi", lines[3]);
// Finally the null character.
EXPECT_EQ("3\t<nul>", lines[4]);
}
} // namespace
} // namespace tesseract
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册