gen-indic-table.py 4.9 KB
Newer Older
B
Behdad Esfahbod 已提交
1 2 3 4
#!/usr/bin/python

import sys

5
if len (sys.argv) != 4:
B
Behdad Esfahbod 已提交
6 7 8
	print >>sys.stderr, "usage: ./gen-indic-table.py IndicSyllabicCategory.txt IndicMatraCategory.txt Blocks.txt"
	sys.exit (1)

9
files = [file (x) for x in sys.argv[1:]]
B
Behdad Esfahbod 已提交
10 11 12 13 14 15 16 17 18 19 20 21

headers = [[f.readline () for i in range (2)] for f in files]

blocks = {}
data = [{} for f in files]
values = [{} for f in files]
for i, f in enumerate (files):
	for line in f:

		j = line.find ('#')
		if j >= 0:
			line = line[:j]
B
Behdad Esfahbod 已提交
22

B
Behdad Esfahbod 已提交
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
		fields = [x.strip () for x in line.split (';')]
		if len (fields) == 1:
			continue

		uu = fields[0].split ('..')
		start = int (uu[0], 16)
		if len (uu) == 1:
			end = start
		else:
			end = int (uu[1], 16)

		t = fields[1]

		for u in range (start, end + 1):
			data[i][u] = t
		values[i][t] = values[i].get (t, 0) + 1

		if i == 2:
			blocks[t] = (start, end)

# Merge data into one dict:
defaults = ('Other', 'Not_Applicable', 'No_Block')
for i,v in enumerate (defaults):
	values[i][v] = values[i].get (v, 0) + 1
combined = {}
for i,d in enumerate (data):
	for u,v in d.items ():
		if i == 2 and not u in combined:
			continue
		if not u in combined:
			combined[u] = list (defaults)
		combined[u][i] = v
data = combined
del combined
num = len (data)

# Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out
singles = {}
for u in [0x00A0, 0x25CC]:
	singles[u] = data[u]
	del data[u]

print "/* == Start of generated table == */"
print "/*"
print " * The following table is generated by running:"
print " *"
print " *   ./gen-indic-table.py IndicSyllabicCategory.txt IndicMatraCategory.txt Blocks.txt"
print " *"
print " * on files with these headers:"
print " *"
for h in headers:
	for l in h:
		print " * %s" % (l.strip())
print " */"
B
Behdad Esfahbod 已提交
77
print
B
Behdad Esfahbod 已提交
78
print '#include "hb-ot-shape-complex-indic-private.hh"'
B
Behdad Esfahbod 已提交
79
print
B
Behdad Esfahbod 已提交
80 81 82 83 84 85

# Shorten values
short = [{
	"Bindu":		'Bi',
	"Visarga":		'Vs',
	"Vowel":		'Vo',
B
Behdad Esfahbod 已提交
86 87
	"Vowel_Dependent":	'M',
	"Other":		'x',
B
Behdad Esfahbod 已提交
88
},{
B
Behdad Esfahbod 已提交
89
	"Not_Applicable":	'x',
B
Behdad Esfahbod 已提交
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
}]
all_shorts = [[],[]]

# Add some of the values, to make them more readable, and to avoid duplicates


for i in range (2):
	for v,s in short[i].items ():
		all_shorts[i].append (s)

what = ["INDIC_SYLLABIC_CATEGORY", "INDIC_MATRA_CATEGORY"]
what_short = ["ISC", "IMC"]
for i in range (2):
	print
	vv = values[i].keys ()
	vv.sort ()
	for v in vv:
		v_no_and = v.replace ('_And_', '_')
		if v in short[i]:
			s = short[i][v]
		else:
			s = ''.join ([c for c in v_no_and if ord ('A') <= ord (c) <= ord ('Z')])
			if s in all_shorts[i]:
				raise Exception ("Duplicate short value alias", v, s)
			all_shorts[i].append (s)
			short[i][v] = s
		print "#define %s_%s	%s_%s	%s/* %3d chars; %s */" % \
			(what_short[i], s, what[i], v.upper (), \
			'	'* ((48-1 - len (what[i]) - 1 - len (v)) / 8), \
			values[i][v], v)
print
print "#define _(S,M) INDIC_COMBINE_CATEGORIES (ISC_##S, IMC_##M)"
print
print

125 126
total = 0
used = 0
127
last_block = None
B
Behdad Esfahbod 已提交
128
def print_block (block, start, end, data):
129 130 131 132 133
	global total, used, last_block
	if block and block != last_block:
		print
		print
		print "  /* %s */" % block
B
Behdad Esfahbod 已提交
134
	num = 0
135 136
	assert start % 8 == 0
	assert (end+1) % 8 == 0
B
Behdad Esfahbod 已提交
137 138 139 140 141 142 143
	for u in range (start, end+1):
		if u % 8 == 0:
			print
			print "  /* %04X */" % u,
		if u in data:
			num += 1
		d = data.get (u, defaults)
B
Behdad Esfahbod 已提交
144
		sys.stdout.write ("%9s" % ("_(%s,%s)," % (short[0][d[0]], short[1][d[1]])))
B
Behdad Esfahbod 已提交
145

146 147
	total += end - start + 1
	used += num
148 149
	if block:
		last_block = block
B
Behdad Esfahbod 已提交
150 151 152 153 154 155

uu = data.keys ()
uu.sort ()

last = -1
num = 0
156 157 158 159
offset = 0
starts = []
ends = []
print "static const INDIC_TABLE_ELEMENT_TYPE indic_table[] = {"
B
Behdad Esfahbod 已提交
160 161 162 163
for u in uu:
	if u <= last:
		continue
	block = data[u][2]
164 165 166 167 168 169 170
	(bstart, bend) = blocks[block]

	start = u//8*8
	end = start+1
	while end < bend and end in uu:
		end += 1
	end = (end-1)//8*8 + 7
B
Behdad Esfahbod 已提交
171 172

	if start != last + 1:
173 174
		if start - last <= 1+16*3:
			print_block (None, last+1, start-1, data)
B
Behdad Esfahbod 已提交
175 176 177
			last = start-1
		else:
			if last >= 0:
178 179 180 181 182 183
				ends.append (last + 1)
				offset += ends[-1] - starts[-1]
			print
			print
			print "#define indic_offset_0x%04x %d" % (start, offset)
			starts.append (start)
B
Behdad Esfahbod 已提交
184 185 186

	print_block (block, start, end, data)
	last = end
187 188
ends.append (last + 1)
offset += ends[-1] - starts[-1]
B
Behdad Esfahbod 已提交
189 190
print
print
191 192
print "#define indic_offset_total %d" % offset
print
193 194
occupancy = used * 100. / total
print "}; /* Table occupancy: %d%% */" % occupancy
B
Behdad Esfahbod 已提交
195
print
B
Behdad Esfahbod 已提交
196 197
print "INDIC_TABLE_ELEMENT_TYPE"
print "hb_indic_get_categories (hb_codepoint_t u)"
B
Behdad Esfahbod 已提交
198
print "{"
199 200 201
for (start,end) in zip (starts, ends):
	offset = "indic_offset_0x%04x" % start
	print "  if (0x%04X <= u && u <= 0x%04X) return indic_table[u - 0x%04X + %s];" % (start, end, start, offset)
B
Behdad Esfahbod 已提交
202 203
for u,d in singles.items ():
	print "  if (unlikely (u == 0x%04X)) return _(%s,%s);" % (u, short[0][d[0]], short[1][d[1]])
B
Behdad Esfahbod 已提交
204
print "  return _(x,x);"
B
Behdad Esfahbod 已提交
205 206 207 208 209 210 211 212 213 214 215 216
print "}"
print
print "#undef _"
for i in range (2):
	print
	vv = values[i].keys ()
	vv.sort ()
	for v in vv:
		print "#undef %s_%s" % \
			(what_short[i], short[i][v])
print
print "/* == End of generated table == */"
217 218 219 220

# Maintain at least 30% occupancy in the table */
if occupancy < 30:
	raise Exception ("Table too sparse, please investigate: ", occupancy)