gen-indic-table.py 4.7 KB
Newer Older
B
Behdad Esfahbod 已提交
1 2 3 4
#!/usr/bin/python

import sys

5
if len (sys.argv) != 4:
B
Behdad Esfahbod 已提交
6 7 8
	print >>sys.stderr, "usage: ./gen-indic-table.py IndicSyllabicCategory.txt IndicMatraCategory.txt Blocks.txt"
	sys.exit (1)

9
files = [file (x) for x in sys.argv[1:]]
B
Behdad Esfahbod 已提交
10 11 12 13 14 15 16 17 18 19 20 21

headers = [[f.readline () for i in range (2)] for f in files]

blocks = {}
data = [{} for f in files]
values = [{} for f in files]
for i, f in enumerate (files):
	for line in f:

		j = line.find ('#')
		if j >= 0:
			line = line[:j]
B
Behdad Esfahbod 已提交
22

B
Behdad Esfahbod 已提交
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
		fields = [x.strip () for x in line.split (';')]
		if len (fields) == 1:
			continue

		uu = fields[0].split ('..')
		start = int (uu[0], 16)
		if len (uu) == 1:
			end = start
		else:
			end = int (uu[1], 16)

		t = fields[1]

		for u in range (start, end + 1):
			data[i][u] = t
		values[i][t] = values[i].get (t, 0) + 1

		if i == 2:
			blocks[t] = (start, end)

# Merge data into one dict:
defaults = ('Other', 'Not_Applicable', 'No_Block')
for i,v in enumerate (defaults):
	values[i][v] = values[i].get (v, 0) + 1
combined = {}
for i,d in enumerate (data):
	for u,v in d.items ():
		if i == 2 and not u in combined:
			continue
		if not u in combined:
			combined[u] = list (defaults)
		combined[u][i] = v
data = combined
del combined
num = len (data)

# Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out
singles = {}
for u in [0x00A0, 0x25CC]:
	singles[u] = data[u]
	del data[u]

print "/* == Start of generated table == */"
print "/*"
print " * The following table is generated by running:"
print " *"
print " *   ./gen-indic-table.py IndicSyllabicCategory.txt IndicMatraCategory.txt Blocks.txt"
print " *"
print " * on files with these headers:"
print " *"
for h in headers:
	for l in h:
		print " * %s" % (l.strip())
print " */"
B
Behdad Esfahbod 已提交
77
print
B
Behdad Esfahbod 已提交
78
print '#include "hb-ot-shape-complex-indic-private.hh"'
B
Behdad Esfahbod 已提交
79
print
B
Behdad Esfahbod 已提交
80 81 82 83 84 85

# Shorten values
short = [{
	"Bindu":		'Bi',
	"Visarga":		'Vs',
	"Vowel":		'Vo',
B
Behdad Esfahbod 已提交
86 87
	"Vowel_Dependent":	'M',
	"Other":		'x',
B
Behdad Esfahbod 已提交
88
},{
B
Behdad Esfahbod 已提交
89
	"Not_Applicable":	'x',
B
Behdad Esfahbod 已提交
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
}]
all_shorts = [[],[]]

# Add some of the values, to make them more readable, and to avoid duplicates


for i in range (2):
	for v,s in short[i].items ():
		all_shorts[i].append (s)

what = ["INDIC_SYLLABIC_CATEGORY", "INDIC_MATRA_CATEGORY"]
what_short = ["ISC", "IMC"]
for i in range (2):
	print
	vv = values[i].keys ()
	vv.sort ()
	for v in vv:
		v_no_and = v.replace ('_And_', '_')
		if v in short[i]:
			s = short[i][v]
		else:
			s = ''.join ([c for c in v_no_and if ord ('A') <= ord (c) <= ord ('Z')])
			if s in all_shorts[i]:
				raise Exception ("Duplicate short value alias", v, s)
			all_shorts[i].append (s)
			short[i][v] = s
		print "#define %s_%s	%s_%s	%s/* %3d chars; %s */" % \
			(what_short[i], s, what[i], v.upper (), \
			'	'* ((48-1 - len (what[i]) - 1 - len (v)) / 8), \
			values[i][v], v)
print
print "#define _(S,M) INDIC_COMBINE_CATEGORIES (ISC_##S, IMC_##M)"
print
print

125 126
total = 0
used = 0
B
Behdad Esfahbod 已提交
127 128 129 130 131 132 133 134 135 136 137 138
def print_block (block, start, end, data):
	print
	print
	print "  /* %s  (%04X..%04X) */" % (block, start, end)
	num = 0
	for u in range (start, end+1):
		if u % 8 == 0:
			print
			print "  /* %04X */" % u,
		if u in data:
			num += 1
		d = data.get (u, defaults)
B
Behdad Esfahbod 已提交
139
		sys.stdout.write ("%9s" % ("_(%s,%s)," % (short[0][d[0]], short[1][d[1]])))
B
Behdad Esfahbod 已提交
140

141 142 143
	global total, used
	total += end - start + 1
	used += num
B
Behdad Esfahbod 已提交
144 145 146 147 148 149

uu = data.keys ()
uu.sort ()

last = -1
num = 0
150 151 152 153
offset = 0
starts = []
ends = []
print "static const INDIC_TABLE_ELEMENT_TYPE indic_table[] = {"
B
Behdad Esfahbod 已提交
154 155 156 157 158 159 160 161 162 163 164 165
for u in uu:
	if u <= last:
		continue
	block = data[u][2]
	(start, end) = blocks[block]

	if start != last + 1:
		if start - last <= 33:
			print_block ("FILLER", last+1, start-1, data)
			last = start-1
		else:
			if last >= 0:
166 167 168 169 170 171
				ends.append (last + 1)
				offset += ends[-1] - starts[-1]
			print
			print
			print "#define indic_offset_0x%04x %d" % (start, offset)
			starts.append (start)
B
Behdad Esfahbod 已提交
172 173 174

	print_block (block, start, end, data)
	last = end
175 176
ends.append (last + 1)
offset += ends[-1] - starts[-1]
B
Behdad Esfahbod 已提交
177 178
print
print
179 180
print "#define indic_offset_total %d" % offset
print
181 182
occupancy = used * 100. / total
print "}; /* Table occupancy: %d%% */" % occupancy
B
Behdad Esfahbod 已提交
183
print
B
Behdad Esfahbod 已提交
184 185
print "INDIC_TABLE_ELEMENT_TYPE"
print "hb_indic_get_categories (hb_codepoint_t u)"
B
Behdad Esfahbod 已提交
186
print "{"
187 188 189
for (start,end) in zip (starts, ends):
	offset = "indic_offset_0x%04x" % start
	print "  if (0x%04X <= u && u <= 0x%04X) return indic_table[u - 0x%04X + %s];" % (start, end, start, offset)
B
Behdad Esfahbod 已提交
190 191
for u,d in singles.items ():
	print "  if (unlikely (u == 0x%04X)) return _(%s,%s);" % (u, short[0][d[0]], short[1][d[1]])
B
Behdad Esfahbod 已提交
192
print "  return _(x,x);"
B
Behdad Esfahbod 已提交
193 194 195 196 197 198 199 200 201 202 203 204
print "}"
print
print "#undef _"
for i in range (2):
	print
	vv = values[i].keys ()
	vv.sort ()
	for v in vv:
		print "#undef %s_%s" % \
			(what_short[i], short[i][v])
print
print "/* == End of generated table == */"
205 206 207 208

# Maintain at least 30% occupancy in the table */
if occupancy < 30:
	raise Exception ("Table too sparse, please investigate: ", occupancy)