gen-unicode-ranges.py 1.3 KB
Newer Older
1 2 3
# -*- coding: utf-8 -*-

# Generates the code for a sorted unicode range array as used in hb-ot-os2-unicode-ranges.hh
4 5
# Input is a tab seperated list of unicode ranges from the otspec
# (https://docs.microsoft.com/en-us/typography/opentype/spec/os2#ulunicoderange1).
6 7 8 9 10 11 12 13

import io
import re
import sys

reload(sys)
sys.setdefaultencoding('utf-8')

14 15
print (u"""static Range os2UnicodeRangesSorted[] =
{""")
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52

args = sys.argv[1:]
input_file = args[0]

with io.open(input_file, mode="r", encoding="utf-8") as f:

  all_ranges = [];
  current_bit = 0
  while True:
    line = f.readline().strip()
    if not line:
      break
    fields = re.split(r'\t+', line)
    if len(fields) == 3:
      current_bit = fields[0]
      fields = fields[1:]
    elif len(fields) > 3:
      raise Error("bad input :(.")

    name = fields[0]
    ranges = re.split("-", fields[1])
    if len(ranges) != 2:
      raise Error("bad input :(.")

    v = tuple((int(ranges[0], 16), int(ranges[1], 16), int(current_bit), name))
    all_ranges.append(v)

all_ranges = sorted(all_ranges, key=lambda t: t[0])

for ranges in all_ranges:
  start = ("0x%X" % ranges[0]).rjust(8)
  end = ("0x%X" % ranges[1]).rjust(8)
  bit = ("%s" % ranges[2]).rjust(3)

  print "  {%s, %s, %s}, // %s" % (start, end, bit, ranges[3])

print (u"""};""");