hb_test_tools.py 12.1 KB
Newer Older
1 2
#!/usr/bin/python

B
Behdad Esfahbod 已提交
3
import sys, os, re, difflib, unicodedata, errno, cgi
B
Behdad Esfahbod 已提交
4
from itertools import *
5

B
Behdad Esfahbod 已提交
6 7 8
diff_symbols = "-+=*&^%$#@!~/"
diff_colors = ['red', 'green', 'blue']

B
Behdad Esfahbod 已提交
9 10
class ColorFormatter:

11
	class Null:
B
Behdad Esfahbod 已提交
12 13 14 15 16 17 18 19 20
		@staticmethod
		def start_color (c): return ''
		@staticmethod
		def end_color (): return ''
		@staticmethod
		def escape (s): return s
		@staticmethod
		def newline (): return '\n'

21
	class ANSI:
B
Behdad Esfahbod 已提交
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
		@staticmethod
		def start_color (c):
			return {
				'red': '\033[41;37;1m',
				'green': '\033[42;37;1m',
				'blue': '\033[44;37;1m',
			}[c]
		@staticmethod
		def end_color ():
			return '\033[m'
		@staticmethod
		def escape (s): return s
		@staticmethod
		def newline (): return '\n'

37
	class HTML:
B
Behdad Esfahbod 已提交
38 39 40 41 42 43 44 45 46 47
		@staticmethod
		def start_color (c):
			return '<span style="background:%s">' % c
		@staticmethod
		def end_color ():
			return '</span>'
		@staticmethod
		def escape (s): return cgi.escape (s)
		@staticmethod
		def newline (): return '<br/>\n'
48 49 50

	@staticmethod
	def Auto (argv = [], out = sys.stdout):
B
Behdad Esfahbod 已提交
51 52 53 54 55 56 57 58 59 60 61 62 63 64
		format = ColorFormatter.ANSI
		if "--format" in argv:
			argv.remove ("--format")
			format = ColorFormatter.ANSI
		if "--format=ansi" in argv:
			argv.remove ("--format=ansi")
			format = ColorFormatter.ANSI
		if "--format=html" in argv:
			argv.remove ("--format=html")
			format = ColorFormatter.HTML
		if "--no-format" in argv:
			argv.remove ("--no-format")
			format = ColorFormatter.Null
		return format
B
Behdad Esfahbod 已提交
65

66

B
Behdad Esfahbod 已提交
67
class DiffColorizer:
68 69 70

	diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')

B
Behdad Esfahbod 已提交
71 72 73 74
	def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
		self.formatter = formatter
		self.colors = colors
		self.symbols = symbols
B
Behdad Esfahbod 已提交
75

B
Behdad Esfahbod 已提交
76 77 78
	def colorize_lines (self, lines):
		lines = (l if l else '' for l in lines)
		ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
79 80 81 82 83 84 85 86
		oo = ["",""]
		st = [False, False]
		for l in difflib.Differ().compare (*ss):
			if l[0] == '?':
				continue
			if l[0] == ' ':
				for i in range(2):
					if st[i]:
B
Behdad Esfahbod 已提交
87
						oo[i] += self.formatter.end_color ()
88
						st[i] = False
B
Behdad Esfahbod 已提交
89
				oo = [o + self.formatter.escape (l[2:]) for o in oo]
90
				continue
B
Behdad Esfahbod 已提交
91 92 93 94 95 96
			if l[0] in self.symbols:
				i = self.symbols.index (l[0])
				if not st[i]:
					oo[i] += self.formatter.start_color (self.colors[i])
					st[i] = True
				oo[i] += self.formatter.escape (l[2:])
97 98 99
				continue
		for i in range(2):
			if st[i]:
B
Behdad Esfahbod 已提交
100 101
				oo[i] += self.formatter.end_color ()
				st[i] = False
102
		oo = [o.replace ('\n', '') for o in oo]
B
Behdad Esfahbod 已提交
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
		return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]

	def colorize_diff (self, f):
		lines = [None, None]
		for l in f:
			if l[0] not in self.symbols:
				yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
				continue
			i = self.symbols.index (l[0])
			if lines[i]:
				# Flush
				for line in self.colorize_lines (lines):
					yield line
				lines = [None, None]
			lines[i] = l[1:]
			if (all (lines)):
				# Flush
				for line in self.colorize_lines (lines):
					yield line
				lines = [None, None]
		if (any (lines)):
			# Flush
			for line in self.colorize_lines (lines):
				yield line

128

B
Behdad Esfahbod 已提交
129 130
class ZipDiffer:

131
	@staticmethod
B
Behdad Esfahbod 已提交
132 133
	def diff_files (files, symbols=diff_symbols):
		files = tuple (files) # in case it's a generator, copy it
B
Behdad Esfahbod 已提交
134
		try:
B
Behdad Esfahbod 已提交
135 136 137
			for lines in izip_longest (*files):
				if all (lines[0] == line for line in lines[1:]):
					sys.stdout.writelines ([" ", lines[0]])
B
Behdad Esfahbod 已提交
138 139
					continue

B
Behdad Esfahbod 已提交
140 141 142
				for i, l in enumerate (lines):
					if l:
						sys.stdout.writelines ([symbols[i], l])
B
Behdad Esfahbod 已提交
143 144
		except IOError as e:
			if e.errno != errno.EPIPE:
145
				print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
B
Behdad Esfahbod 已提交
146
				sys.exit (1)
147

B
Behdad Esfahbod 已提交
148

149 150 151
class DiffFilters:

	@staticmethod
152
	def filter_failures (f):
B
Behdad Esfahbod 已提交
153 154
		for key, lines in DiffHelpers.separate_test_cases (f):
			lines = list (lines)
B
Behdad Esfahbod 已提交
155
			if not DiffHelpers.test_passed (lines):
156
				for l in lines: yield l
157

B
Behdad Esfahbod 已提交
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
class Stat:

	def __init__ (self):
		self.count = 0
		self.freq = 0

	def add (self, test):
		self.count += 1
		self.freq += test.freq

class Stats:

	def __init__ (self):
		self.passed = Stat ()
		self.failed = Stat ()
		self.total  = Stat ()

	def add (self, test):
		self.total.add (test)
		if test.passed:
			self.passed.add (test)
		else:
			self.failed.add (test)

	def mean (self):
		return float (self.passed.count) / self.total.count

	def variance (self):
		return (float (self.passed.count) / self.total.count) * \
		       (float (self.failed.count) / self.total.count)

	def stddev (self):
		return self.variance () ** .5

	def zscore (self, population):
		"""Calculate the standard score.
		   Population is the Stats for population.
		   Self is Stats for sample.
		   Returns larger absolute value if sample is highly unlikely to be random.
		   Anything outside of -3..+3 is very unlikely to be random.
		   See: http://en.wikipedia.org/wiki/Standard_score"""

		return (self.mean () - population.mean ()) / population.stddev ()




B
Behdad Esfahbod 已提交
205 206 207 208 209 210
class DiffSinks:

	@staticmethod
	def print_stat (f):
		passed = 0
		failed = 0
B
Behdad Esfahbod 已提交
211
		# XXX port to Stats, but that would really slow us down here
B
Behdad Esfahbod 已提交
212
		for key, lines in DiffHelpers.separate_test_cases (f):
B
Behdad Esfahbod 已提交
213 214 215 216 217 218 219
			if DiffHelpers.test_passed (lines):
				passed += 1
			else:
				failed += 1
		total = passed + failed
		print "%d out of %d tests passed.  %d failed (%g%%)" % (passed, total, failed, 100. * failed / total)

B
Behdad Esfahbod 已提交
220 221 222
	@staticmethod
	def print_ngrams (f, ns=(1,2,3)):
		gens = tuple (Ngram.generator (n) for n in ns)
B
Behdad Esfahbod 已提交
223 224
		allstats = Stats ()
		allgrams = {}
B
Behdad Esfahbod 已提交
225 226
		for key, lines in DiffHelpers.separate_test_cases (f):
			test = Test (lines)
B
Behdad Esfahbod 已提交
227
			allstats.add (test)
B
Behdad Esfahbod 已提交
228 229

			for gen in gens:
B
Behdad Esfahbod 已提交
230 231 232 233 234 235 236 237 238 239 240 241 242 243
				for ngram in gen (test.unicodes):
					if ngram not in allgrams:
						allgrams[ngram] = Stats ()
					allgrams[ngram].add (test)

		importantgrams = {}
		for ngram, stats in allgrams.iteritems ():
			if stats.failed.count >= 30: # for statistical reasons
				importantgrams[ngram] = stats
		allgrams = importantgrams
		del importantgrams

		for ngram, stats in allgrams.iteritems ():
			print "zscore: %9f failed: %6d passed: %6d ngram: <%s>" % (stats.zscore (allstats), stats.failed.count, stats.passed.count, ','.join ("U+%04X" % u for u in ngram))
B
Behdad Esfahbod 已提交
244 245 246 247 248 249



class Test:

	def __init__ (self, lines):
B
Behdad Esfahbod 已提交
250
		self.freq = 1
B
Behdad Esfahbod 已提交
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
		self.passed = True
		self.identifier = None
		self.text = None
		self.unicodes = None
		self.glyphs = None
		for l in lines:
			symbol = l[0]
			if symbol != ' ':
				self.passed = False
			i = 1
			if ':' in l:
				i = l.index (':')
				if not self.identifier:
					self.identifier = l[1:i]
				i = i + 2 # Skip colon and space
			j = -1
			if l[j] == '\n':
				j -= 1
			brackets = l[i] + l[j]
			l = l[i+1:-2]
			if brackets == '()':
				self.text = l
			elif brackets == '<>':
				self.unicodes = Unicode.parse (l)
			elif brackets == '[]':
				# XXX we don't handle failed tests here
				self.glyphs = l


280
class DiffHelpers:
281

282 283 284 285 286
	@staticmethod
	def separate_test_cases (f):
		'''Reads lines from f, and if the lines have identifiers, ie.
		   have a colon character, groups them by identifier,
		   yielding lists of all lines with the same identifier.'''
B
Behdad Esfahbod 已提交
287

B
Behdad Esfahbod 已提交
288 289 290 291 292
		def identifier (l):
			if ':' in l[1:]:
				return l[1:l.index (':')]
			return l
		return groupby (f, key=identifier)
B
Behdad Esfahbod 已提交
293

B
Behdad Esfahbod 已提交
294 295 296 297
	@staticmethod
	def test_passed (lines):
		return all (l[0] == ' ' for l in lines)

B
Behdad Esfahbod 已提交
298

B
Behdad Esfahbod 已提交
299
class FilterHelpers:
300

B
Cleanup  
Behdad Esfahbod 已提交
301
	@staticmethod
B
Behdad Esfahbod 已提交
302
	def filter_printer_function (filter_callback):
B
Cleanup  
Behdad Esfahbod 已提交
303
		def printer (f):
B
Behdad Esfahbod 已提交
304
			for line in filter_callback (f):
B
Cleanup  
Behdad Esfahbod 已提交
305 306 307
				print line
		return printer

B
Behdad Esfahbod 已提交
308 309 310 311 312 313 314 315
	@staticmethod
	def filter_printer_function_no_newline (filter_callback):
		def printer (f):
			for line in filter_callback (f):
				sys.stdout.writelines ([line])
		return printer


B
Behdad Esfahbod 已提交
316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332
class Ngram:

	@staticmethod
	def generator (n):

		def gen (f):
			l = []
			for x in f:
				l.append (x)
				if len (l) == n:
					yield tuple (l)
					l[:1] = []

		gen.n = n
		return gen


B
Behdad Esfahbod 已提交
333
class UtilMains:
B
Cleanup  
Behdad Esfahbod 已提交
334

335
	@staticmethod
336
	def process_multiple_files (callback, mnemonic = "FILE"):
337

B
Behdad Esfahbod 已提交
338
		if "--help" in sys.argv:
339
			print "Usage: %s %s..." % (sys.argv[0], mnemonic)
340 341
			sys.exit (1)

B
Behdad Esfahbod 已提交
342
		try:
B
Behdad Esfahbod 已提交
343 344
			files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
			for s in files:
B
Behdad Esfahbod 已提交
345 346 347
				callback (FileHelpers.open_file_or_stdin (s))
		except IOError as e:
			if e.errno != errno.EPIPE:
348
				print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
B
Behdad Esfahbod 已提交
349
				sys.exit (1)
350 351

	@staticmethod
352 353 354 355 356 357
	def process_multiple_args (callback, mnemonic):

		if len (sys.argv) == 1:
			print "Usage: %s %s..." % (sys.argv[0], mnemonic)
			sys.exit (1)

B
Behdad Esfahbod 已提交
358 359 360 361 362
		try:
			for s in sys.argv[1:]:
				callback (s)
		except IOError as e:
			if e.errno != errno.EPIPE:
363
				print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
B
Behdad Esfahbod 已提交
364
				sys.exit (1)
365 366 367

	@staticmethod
	def filter_multiple_strings_or_stdin (callback, mnemonic, \
368 369 370 371 372
					      separator = " ", \
					      concat_separator = False):

		if len (sys.argv) == 1 or ('--stdin' in sys.argv and len (sys.argv) != 2):
			print "Usage:\n  %s %s...\nor:\n  %s --stdin" \
373
			      % (sys.argv[0], mnemonic, sys.argv[0])
374 375
			sys.exit (1)

B
Behdad Esfahbod 已提交
376 377 378 379 380 381 382
		try:
			if '--stdin' in sys.argv:
				sys.argv.remove ('--stdin')
				while (1):
					line = sys.stdin.readline ()
					if not len (line):
						break
B
Behdad Esfahbod 已提交
383 384
					if line[-1] == '\n':
						line = line[:-1]
B
Behdad Esfahbod 已提交
385 386 387 388 389 390 391 392
					print callback (line)
			else:
				args = sys.argv[1:]
				if concat_separator != False:
					args = [concat_separator.join (args)]
				print separator.join (callback (x) for x in (args))
		except IOError as e:
			if e.errno != errno.EPIPE:
393
				print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
B
Behdad Esfahbod 已提交
394
				sys.exit (1)
395 396 397 398 399 400


class Unicode:

	@staticmethod
	def decode (s):
B
Behdad Esfahbod 已提交
401
		return '<' + u','.join ("U+%04X" % ord (u) for u in unicode (s, 'utf-8')).encode ('utf-8') + '>'
402 403

	@staticmethod
B
Behdad Esfahbod 已提交
404
	def parse (s):
B
Behdad Esfahbod 已提交
405
		s = re.sub (r"[<+>,\\uU\n	]", " ", s)
406
		s = re.sub (r"0[xX]", " ", s)
B
Behdad Esfahbod 已提交
407 408 409 410 411
		return [int (x, 16) for x in s.split (' ') if len (x)]

	@staticmethod
	def encode (s):
		return u''.join (unichr (x) for x in Unicode.parse (s)).encode ('utf-8')
412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447

	shorthands = {
		"ZERO WIDTH NON-JOINER": "ZWNJ",
		"ZERO WIDTH JOINER": "ZWJ",
		"NARROW NO-BREAK SPACE": "NNBSP",
		"COMBINING GRAPHEME JOINER": "CGJ",
		"LEFT-TO-RIGHT MARK": "LRM",
		"RIGHT-TO-LEFT MARK": "RLM",
		"LEFT-TO-RIGHT EMBEDDING": "LRE",
		"RIGHT-TO-LEFT EMBEDDING": "RLE",
		"POP DIRECTIONAL FORMATTING": "PDF",
		"LEFT-TO-RIGHT OVERRIDE": "LRO",
		"RIGHT-TO-LEFT OVERRIDE": "RLO",
	}

	@staticmethod
	def pretty_name (u):
		try:
			s = unicodedata.name (u)
		except ValueError:
			return "XXX"
		s = re.sub (".* LETTER ", "", s)
		s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
		s = re.sub (".* SIGN ", "", s)
		s = re.sub (".* COMBINING ", "", s)
		if re.match (".* VIRAMA", s):
			s = "HALANT"
		if s in Unicode.shorthands:
			s = Unicode.shorthands[s]
		return s

	@staticmethod
	def pretty_names (s):
		s = re.sub (r"[<+>\\uU]", " ", s)
		s = re.sub (r"0[xX]", " ", s)
		s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
B
Behdad Esfahbod 已提交
448
		return u' + '.join (Unicode.pretty_name (x) for x in s).encode ('utf-8')
449

B
Behdad Esfahbod 已提交
450

451
class FileHelpers:
B
Behdad Esfahbod 已提交
452 453 454 455 456 457

	@staticmethod
	def open_file_or_stdin (f):
		if f == '-':
			return sys.stdin
		return file (f)
458

459 460 461 462

class Manifest:

	@staticmethod
B
Behdad Esfahbod 已提交
463 464
	def read (s, strict = True):

465 466
		if not os.path.exists (s):
			if strict:
B
Behdad Esfahbod 已提交
467
				print >> sys.stderr, "%s: %s does not exist" % (sys.argv[0], s)
468 469 470
				sys.exit (1)
			return

B
Behdad Esfahbod 已提交
471
		s = os.path.normpath (s)
472

B
Behdad Esfahbod 已提交
473
		if os.path.isdir (s):
474 475

			try:
476
				m = file (os.path.join (s, "MANIFEST"))
477 478
				items = [x.strip () for x in m.readlines ()]
				for f in items:
B
Behdad Esfahbod 已提交
479 480
					for p in Manifest.read (os.path.join (s, f)):
						yield p
481 482
			except IOError:
				if strict:
B
Behdad Esfahbod 已提交
483
					print >> sys.stderr, "%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST"))
484 485 486
					sys.exit (1)
				return
		else:
B
Behdad Esfahbod 已提交
487 488
			yield s

489 490 491 492 493
	@staticmethod
	def update_recursive (s):

		for dirpath, dirnames, filenames in os.walk (s, followlinks=True):

494
			for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510
				if f in dirnames:
					dirnames.remove (f)
				if f in filenames:
					filenames.remove (f)
			dirnames.sort ()
			filenames.sort ()
			ms = os.path.join (dirpath, "MANIFEST")
			print "  GEN    %s" % ms
			m = open (ms, "w")
			for f in filenames:
				print >> m, f
			for f in dirnames:
				print >> m, f
			for f in dirnames:
				Manifest.update_recursive (os.path.join (dirpath, f))

511 512
if __name__ == '__main__':
	pass