gen-indic-table.py 4.8 KB
Newer Older
B
Behdad Esfahbod 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
#!/usr/bin/python

import sys

if len (sys.argv) < 4:
	print >>sys.stderr, "usage: ./gen-indic-table.py IndicSyllabicCategory.txt IndicMatraCategory.txt Blocks.txt"
	sys.exit (1)

files = [file (sys.argv[i+1]) for i in range (3)]

headers = [[f.readline () for i in range (2)] for f in files]

blocks = {}
data = [{} for f in files]
values = [{} for f in files]
for i, f in enumerate (files):
	for line in f:

		j = line.find ('#')
		if j >= 0:
			line = line[:j]
B
Behdad Esfahbod 已提交
22

B
Behdad Esfahbod 已提交
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
		fields = [x.strip () for x in line.split (';')]
		if len (fields) == 1:
			continue

		uu = fields[0].split ('..')
		start = int (uu[0], 16)
		if len (uu) == 1:
			end = start
		else:
			end = int (uu[1], 16)

		t = fields[1]

		for u in range (start, end + 1):
			data[i][u] = t
		values[i][t] = values[i].get (t, 0) + 1

		if i == 2:
			blocks[t] = (start, end)

# Merge data into one dict:
defaults = ('Other', 'Not_Applicable', 'No_Block')
for i,v in enumerate (defaults):
	values[i][v] = values[i].get (v, 0) + 1
combined = {}
for i,d in enumerate (data):
	for u,v in d.items ():
		if i == 2 and not u in combined:
			continue
		if not u in combined:
			combined[u] = list (defaults)
		combined[u][i] = v
data = combined
del combined
num = len (data)

# Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out
singles = {}
for u in [0x00A0, 0x25CC]:
	singles[u] = data[u]
	del data[u]

print "/* == Start of generated table == */"
print "/*"
print " * The following table is generated by running:"
print " *"
print " *   ./gen-indic-table.py IndicSyllabicCategory.txt IndicMatraCategory.txt Blocks.txt"
print " *"
print " * on files with these headers:"
print " *"
for h in headers:
	for l in h:
		print " * %s" % (l.strip())
print " */"
B
Behdad Esfahbod 已提交
77 78 79 80
print
print "#ifndef HB_OT_SHAPE_COMPLEX_INDIC_TABLE_HH"
print "#define HB_OT_SHAPE_COMPLEX_INDIC_TABLE_HH"
print
B
Behdad Esfahbod 已提交
81 82 83 84 85 86

# Shorten values
short = [{
	"Bindu":		'Bi',
	"Visarga":		'Vs',
	"Vowel":		'Vo',
B
Behdad Esfahbod 已提交
87 88
	"Vowel_Dependent":	'M',
	"Other":		'x',
B
Behdad Esfahbod 已提交
89
},{
B
Behdad Esfahbod 已提交
90
	"Not_Applicable":	'x',
B
Behdad Esfahbod 已提交
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
}]
all_shorts = [[],[]]

# Add some of the values, to make them more readable, and to avoid duplicates


for i in range (2):
	for v,s in short[i].items ():
		all_shorts[i].append (s)

what = ["INDIC_SYLLABIC_CATEGORY", "INDIC_MATRA_CATEGORY"]
what_short = ["ISC", "IMC"]
for i in range (2):
	print
	vv = values[i].keys ()
	vv.sort ()
	for v in vv:
		v_no_and = v.replace ('_And_', '_')
		if v in short[i]:
			s = short[i][v]
		else:
			s = ''.join ([c for c in v_no_and if ord ('A') <= ord (c) <= ord ('Z')])
			if s in all_shorts[i]:
				raise Exception ("Duplicate short value alias", v, s)
			all_shorts[i].append (s)
			short[i][v] = s
		print "#define %s_%s	%s_%s	%s/* %3d chars; %s */" % \
			(what_short[i], s, what[i], v.upper (), \
			'	'* ((48-1 - len (what[i]) - 1 - len (v)) / 8), \
			values[i][v], v)
print
print "#define _(S,M) INDIC_COMBINE_CATEGORIES (ISC_##S, IMC_##M)"
print
print

126 127
total = 0
used = 0
B
Behdad Esfahbod 已提交
128 129 130 131 132 133 134 135 136 137 138 139
def print_block (block, start, end, data):
	print
	print
	print "  /* %s  (%04X..%04X) */" % (block, start, end)
	num = 0
	for u in range (start, end+1):
		if u % 8 == 0:
			print
			print "  /* %04X */" % u,
		if u in data:
			num += 1
		d = data.get (u, defaults)
B
Behdad Esfahbod 已提交
140
		sys.stdout.write ("%9s" % ("_(%s,%s)," % (short[0][d[0]], short[1][d[1]])))
B
Behdad Esfahbod 已提交
141

142 143 144
	global total, used
	total += end - start + 1
	used += num
B
Behdad Esfahbod 已提交
145 146 147 148 149 150

uu = data.keys ()
uu.sort ()

last = -1
num = 0
151 152 153 154
offset = 0
starts = []
ends = []
print "static const INDIC_TABLE_ELEMENT_TYPE indic_table[] = {"
B
Behdad Esfahbod 已提交
155 156 157 158 159 160 161 162 163 164 165 166
for u in uu:
	if u <= last:
		continue
	block = data[u][2]
	(start, end) = blocks[block]

	if start != last + 1:
		if start - last <= 33:
			print_block ("FILLER", last+1, start-1, data)
			last = start-1
		else:
			if last >= 0:
167 168 169 170 171 172
				ends.append (last + 1)
				offset += ends[-1] - starts[-1]
			print
			print
			print "#define indic_offset_0x%04x %d" % (start, offset)
			starts.append (start)
B
Behdad Esfahbod 已提交
173 174 175

	print_block (block, start, end, data)
	last = end
176 177
ends.append (last + 1)
offset += ends[-1] - starts[-1]
B
Behdad Esfahbod 已提交
178 179
print
print
180 181
print "#define indic_offset_total %d" % offset
print
182 183
occupancy = used * 100. / total
print "}; /* Table occupancy: %d%% */" % occupancy
B
Behdad Esfahbod 已提交
184 185 186 187
print
print "static INDIC_TABLE_ELEMENT_TYPE"
print "get_indic_categories (hb_codepoint_t u)"
print "{"
188 189 190
for (start,end) in zip (starts, ends):
	offset = "indic_offset_0x%04x" % start
	print "  if (0x%04X <= u && u <= 0x%04X) return indic_table[u - 0x%04X + %s];" % (start, end, start, offset)
B
Behdad Esfahbod 已提交
191 192
for u,d in singles.items ():
	print "  if (unlikely (u == 0x%04X)) return _(%s,%s);" % (u, short[0][d[0]], short[1][d[1]])
B
Behdad Esfahbod 已提交
193
print "  return _(x,x);"
B
Behdad Esfahbod 已提交
194 195 196 197 198 199 200 201 202 203 204
print "}"
print
print "#undef _"
for i in range (2):
	print
	vv = values[i].keys ()
	vv.sort ()
	for v in vv:
		print "#undef %s_%s" % \
			(what_short[i], short[i][v])
print
B
Behdad Esfahbod 已提交
205
print "#endif /* HB_OT_SHAPE_COMPLEX_INDIC_TABLE_HH */"
B
Behdad Esfahbod 已提交
206 207
print
print "/* == End of generated table == */"
208 209 210 211

# Maintain at least 30% occupancy in the table */
if occupancy < 30:
	raise Exception ("Table too sparse, please investigate: ", occupancy)