Merge branch 'use'

df6cb844 · Behdad Esfahbod · 2ed6be66 · 786ba458 · df6cb844 · df6cb844
33 changed file
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -14,7 +14,7 @@ DISTCHECK_CONFIGURE_FLAGS = --enable-introspection
 #AM_CXXFLAGS =

 # Convenience targets:
-lib: libharfbuzz.la
+lib: $(BUILT_SOURCES) libharfbuzz.la

 lib_LTLIBRARIES = libharfbuzz.la

@@ -93,6 +93,7 @@ HBSOURCES += \
 	hb-ot-shape.cc \
 	hb-ot-shape-complex-arabic.cc \
 	hb-ot-shape-complex-arabic-fallback.hh \
+	hb-ot-shape-complex-arabic-private.hh \
 	hb-ot-shape-complex-arabic-table.hh \
 	hb-ot-shape-complex-arabic-win1256.hh \
 	hb-ot-shape-complex-default.cc \
@@ -104,10 +105,12 @@ HBSOURCES += \
 	hb-ot-shape-complex-indic-table.cc \
 	hb-ot-shape-complex-myanmar.cc \
 	hb-ot-shape-complex-myanmar-machine.hh \
-	hb-ot-shape-complex-sea.cc \
-	hb-ot-shape-complex-sea-machine.hh \
 	hb-ot-shape-complex-thai.cc \
 	hb-ot-shape-complex-tibetan.cc \
+	hb-ot-shape-complex-use.cc \
+	hb-ot-shape-complex-use-machine.hh \
+	hb-ot-shape-complex-use-private.hh \
+	hb-ot-shape-complex-use-table.cc \
 	hb-ot-shape-complex-private.hh \
 	hb-ot-shape-normalize-private.hh \
 	hb-ot-shape-normalize.cc \
@@ -276,29 +279,34 @@ harfbuzz.def: $(HBHEADERS) $(HBNODISTHEADERS)
 GENERATORS = \
 	gen-arabic-table.py \
 	gen-indic-table.py \
+	gen-use-table.py \
 	$(NULL)
 EXTRA_DIST += $(GENERATORS)

-unicode-tables: arabic-table indic-table
-
-indic-table: gen-indic-table.py IndicSyllabicCategory.txt IndicMatraCategory.txt Blocks.txt
-	$(AM_V_GEN) $(builddir)/$^ > hb-ot-shape-complex-indic-table.cc \
-	|| ($(RM) hb-ot-shape-complex-indic-table.cc; false)
+unicode-tables: arabic-table indic-table use-table

 arabic-table: gen-arabic-table.py ArabicShaping.txt UnicodeData.txt Blocks.txt
 	$(AM_V_GEN) $(builddir)/$^ > hb-ot-shape-complex-arabic-table.hh \
 	|| ($(RM) hb-ot-shape-complex-arabic-table.hh; false)

+indic-table: gen-indic-table.py IndicSyllabicCategory-7.0.0.txt IndicMatraCategory-7.0.0.txt Blocks.txt
+	$(AM_V_GEN) $(builddir)/$^ > hb-ot-shape-complex-indic-table.cc \
+	|| ($(RM) hb-ot-shape-complex-indic-table.cc; false)
+
+use-table: gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt
+	$(AM_V_GEN) $(builddir)/$^ > hb-ot-shape-complex-use-table.cc \
+	|| ($(RM) hb-ot-shape-complex-use-table.cc; false)
+
 built-sources: $(BUILT_SOURCES)

-.PHONY: unicode-tables arabic-table indic-table built-sources
+.PHONY: unicode-tables arabic-table indic-table use-table built-sources

 RAGEL_GENERATED = \
 	$(srcdir)/hb-buffer-deserialize-json.hh \
 	$(srcdir)/hb-buffer-deserialize-text.hh \
 	$(srcdir)/hb-ot-shape-complex-indic-machine.hh \
 	$(srcdir)/hb-ot-shape-complex-myanmar-machine.hh \
-	$(srcdir)/hb-ot-shape-complex-sea-machine.hh \
+	$(srcdir)/hb-ot-shape-complex-use-machine.hh \
 	$(NULL)
 BUILT_SOURCES += $(RAGEL_GENERATED)
 EXTRA_DIST += \
@@ -306,7 +314,7 @@ EXTRA_DIST += \
 	hb-buffer-deserialize-text.rl \
 	hb-ot-shape-complex-indic-machine.rl \
 	hb-ot-shape-complex-myanmar-machine.rl \
-	hb-ot-shape-complex-sea-machine.rl \
+	hb-ot-shape-complex-use-machine.rl \
 	$(NULL)
 MAINTAINERCLEANFILES += $(RAGEL_GENERATED)
 $(srcdir)/%.hh: $(srcdir)/%.rl

--- a/src/gen-use-table.py
+++ b/src/gen-use-table.py
+#!/usr/bin/python
+
+import sys
+
+if len (sys.argv) != 5:
+	print >>sys.stderr, "usage: ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt"
+	sys.exit (1)
+
+BLACKLISTED_BLOCKS = ["Thai", "Lao", "Tibetan"]
+
+files = [file (x) for x in sys.argv[1:]]
+
+headers = [[f.readline () for i in range (2)] for j,f in enumerate(files) if j != 2]
+headers.append (["UnicodeData.txt does not have a header."])
+
+data = [{} for f in files]
+values = [{} for f in files]
+for i, f in enumerate (files):
+	for line in f:
+
+		j = line.find ('#')
+		if j >= 0:
+			line = line[:j]
+
+		fields = [x.strip () for x in line.split (';')]
+		if len (fields) == 1:
+			continue
+
+		uu = fields[0].split ('..')
+		start = int (uu[0], 16)
+		if len (uu) == 1:
+			end = start
+		else:
+			end = int (uu[1], 16)
+
+		t = fields[1 if i != 2 else 2]
+
+		for u in range (start, end + 1):
+			data[i][u] = t
+		values[i][t] = values[i].get (t, 0) + end - start + 1
+
+defaults = ('Other', 'Not_Applicable', 'Cn', 'No_Block')
+
+# TODO Characters that are not in Unicode Indic files, but used in USE
+data[0][0x034F] = defaults[0]
+data[0][0x2060] = defaults[0]
+for u in range (0xFE00, 0xFE0F + 1):
+	data[0][u] = defaults[0]
+
+# Merge data into one dict:
+for i,v in enumerate (defaults):
+	values[i][v] = values[i].get (v, 0) + 1
+combined = {}
+for i,d in enumerate (data):
+	for u,v in d.items ():
+		if i >= 2 and not u in combined:
+			continue
+		if not u in combined:
+			combined[u] = list (defaults)
+		combined[u][i] = v
+combined = {k:v for k,v in combined.items() if v[3] not in BLACKLISTED_BLOCKS}
+data = combined
+del combined
+num = len (data)
+
+
+property_names = [
+	# General_Category
+	'Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu', 'Mc',
+	'Me', 'Mn', 'Nd', 'Nl', 'No', 'Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po',
+	'Ps', 'Sc', 'Sk', 'Sm', 'So', 'Zl', 'Zp', 'Zs',
+	# Indic_Syllabic_Category
+	'Other',
+	'Bindu',
+	'Visarga',
+	'Avagraha',
+	'Nukta',
+	'Virama',
+	'Pure_Killer',
+	'Invisible_Stacker',
+	'Vowel_Independent',
+	'Vowel_Dependent',
+	'Vowel',
+	'Consonant_Placeholder',
+	'Consonant',
+	'Consonant_Dead',
+	'Consonant_With_Stacker',
+	'Consonant_Prefixed',
+	'Consonant_Preceding_Repha',
+	'Consonant_Succeeding_Repha',
+	'Consonant_Subjoined',
+	'Consonant_Medial',
+	'Consonant_Final',
+	'Consonant_Head_Letter',
+	'Modifying_Letter',
+	'Tone_Letter',
+	'Tone_Mark',
+	'Gemination_Mark',
+	'Cantillation_Mark',
+	'Register_Shifter',
+	'Syllable_Modifier',
+	'Consonant_Killer',
+	'Non_Joiner',
+	'Joiner',
+	'Number_Joiner',
+	'Number',
+	'Brahmi_Joining_Number',
+	# Indic_Positional_Category
+	'Not_Applicable',
+	'Right',
+	'Left',
+	'Visual_Order_Left',
+	'Left_And_Right',
+	'Top',
+	'Bottom',
+	'Top_And_Bottom',
+	'Top_And_Right',
+	'Top_And_Left',
+	'Top_And_Left_And_Right',
+	'Bottom_And_Right',
+	'Top_And_Bottom_And_Right',
+	'Overstruck',
+]
+
+class PropertyValue(object):
+	def __init__(self, name_):
+		self.name = name_
+	def __str__(self):
+		return self.name
+	def __eq__(self, other):
+		return self.name == (other if isinstance(other, basestring) else other.name)
+	def __ne__(self, other):
+		return not (self == other)
+
+property_values = {}
+
+for name in property_names:
+	value = PropertyValue(name)
+	assert value not in property_values
+	assert value not in globals()
+	property_values[name] = value
+globals().update(property_values)
+
+
+def is_BASE(U, UISC, UGC):
+	return (UISC in [Number, Consonant, Consonant_Head_Letter,
+			#SPEC-OUTDATED Consonant_Placeholder,
+			Tone_Letter] or
+		(UGC == Lo and UISC in [Avagraha, Bindu, Consonant_Final, Consonant_Medial,
+					Consonant_Subjoined, Vowel, Vowel_Dependent]))
+def is_BASE_VOWEL(U, UISC, UGC):
+	return UISC == Vowel_Independent
+def is_BASE_IND(U, UISC, UGC):
+	#SPEC-BROKEN return (UISC in [Consonant_Dead, Modifying_Letter] or UGC == Po)
+	return (UISC in [Consonant_Dead, Modifying_Letter] or
+		(UGC == Po and not is_BASE_OTHER(U, UISC, UGC))) # for 104E
+def is_BASE_NUM(U, UISC, UGC):
+	return UISC == Brahmi_Joining_Number
+def is_BASE_OTHER(U, UISC, UGC):
+	if UISC == Consonant_Placeholder: return True #SPEC-OUTDATED
+	return U in [0x00A0, 0x00D7, 0x2015, 0x2022, 0x25CC,
+		     0x25FB, 0x25FC, 0x25FD, 0x25FE]
+def is_CGJ(U, UISC, UGC):
+	return U == 0x034F
+def is_CONS_FINAL(U, UISC, UGC):
+	return ((UISC == Consonant_Final and UGC != Lo) or
+		UISC == Consonant_Succeeding_Repha)
+def is_CONS_FINAL_MOD(U, UISC, UGC):
+	#SPEC-OUTDATED return  UISC in [Consonant_Final_Modifier, Syllable_Modifier]
+	return  UISC == Syllable_Modifier
+def is_CONS_MED(U, UISC, UGC):
+	return UISC == Consonant_Medial and UGC != Lo
+def is_CONS_MOD(U, UISC, UGC):
+	return UISC in [Nukta, Gemination_Mark, Consonant_Killer]
+def is_CONS_SUB(U, UISC, UGC):
+	#SPEC-OUTDATED return UISC == Consonant_Subjoined
+	return UISC == Consonant_Subjoined and UGC != Lo
+def is_HALANT(U, UISC, UGC):
+	return UISC in [Virama, Invisible_Stacker]
+def is_HALANT_NUM(U, UISC, UGC):
+	return UISC == Number_Joiner
+def is_ZWNJ(U, UISC, UGC):
+	return UISC == Non_Joiner
+def is_ZWJ(U, UISC, UGC):
+	return UISC == Joiner
+def is_Word_Joiner(U, UISC, UGC):
+	return U == 0x2060
+def is_OTHER(U, UISC, UGC):
+	#SPEC-OUTDATED return UGC == Zs # or any other SCRIPT_COMMON characters
+	return (UISC == Other
+		and not is_SYM_MOD(U, UISC, UGC)
+		and not is_CGJ(U, UISC, UGC)
+		and not is_Word_Joiner(U, UISC, UGC)
+		and not is_VARIATION_SELECTOR(U, UISC, UGC)
+	)
+def is_Reserved(U, UISC, UGC):
+	return UGC == 'Cn'
+def is_REPHA(U, UISC, UGC):
+	#return UISC == Consonant_Preceding_Repha
+	#SPEC-OUTDATED hack to categorize Consonant_With_Stacker and Consonant_Prefixed
+	return UISC in [Consonant_Preceding_Repha, Consonant_With_Stacker, Consonant_Prefixed]
+def is_SYM(U, UISC, UGC):
+	if U == 0x25CC: return False #SPEC-OUTDATED
+	#SPEC-OUTDATED return UGC in [So, Sc] or UISC == Symbol_Letter
+	return UGC in [So, Sc]
+def is_SYM_MOD(U, UISC, UGC):
+	return U in [0x1B6B, 0x1B6C, 0x1B6D, 0x1B6E, 0x1B6F, 0x1B70, 0x1B71, 0x1B72, 0x1B73]
+def is_VARIATION_SELECTOR(U, UISC, UGC):
+	return 0xFE00 <= U <= 0xFE0F
+def is_VOWEL(U, UISC, UGC):
+	return (UISC == Pure_Killer or
+		(UGC != Lo and UISC in [Vowel, Vowel_Dependent]))
+def is_VOWEL_MOD(U, UISC, UGC):
+	return (UISC in [Tone_Mark, Cantillation_Mark, Register_Shifter, Visarga] or
+		(UGC != Lo and UISC == Bindu))
+
+use_mapping = {
+	'B':	is_BASE,
+	'IV':	is_BASE_VOWEL,
+	'IND':	is_BASE_IND,
+	'N':	is_BASE_NUM,
+	'GB':	is_BASE_OTHER,
+	'CGJ':	is_CGJ,
+	'F':	is_CONS_FINAL,
+	'FM':	is_CONS_FINAL_MOD,
+	'M':	is_CONS_MED,
+	'CM':	is_CONS_MOD,
+	'SUB':	is_CONS_SUB,
+	'H':	is_HALANT,
+	'HN':	is_HALANT_NUM,
+	'ZWNJ':	is_ZWNJ,
+	'ZWJ':	is_ZWJ,
+	'WJ':	is_Word_Joiner,
+	'O':	is_OTHER,
+	'Rsv':	is_Reserved,
+	'R':	is_REPHA,
+	'S':	is_SYM,
+	'SM':	is_SYM_MOD,
+	'VS':	is_VARIATION_SELECTOR,
+	'V':	is_VOWEL,
+	'VM':	is_VOWEL_MOD,
+}
+
+use_positions = {
+	'F': {
+		'Abv': [Top],
+		'Blw': [Bottom],
+		'Pst': [Right],
+	},
+	'M': {
+		'Abv': [Top],
+		'Blw': [Bottom],
+		'Pst': [Right],
+		'Pre': [Left],
+	},
+	'CM': {
+		'Abv': [Top],
+		'Blw': [Bottom],
+	},
+	'V': {
+		'Abv': [Top, Top_And_Bottom, Top_And_Bottom_And_Right, Top_And_Right],
+		'Blw': [Bottom, Overstruck, Bottom_And_Right],
+		'Pst': [Right],
+		'Pre': [Left, Top_And_Left, Top_And_Left_And_Right, Left_And_Right],
+	},
+	'VM': {
+		'Abv': [Top],
+		'Blw': [Bottom, Overstruck],
+		'Pst': [Right],
+		'Pre': [Left],
+	},
+	'SM': {
+		'Abv': [Top],
+		'Blw': [Bottom],
+	},
+	'H': None,
+	'B': None,
+	'FM': None,
+	'SUB': None,
+}
+
+def map_to_use(data):
+	out = {}
+	items = use_mapping.items()
+	for U,(UISC,UIPC,UGC,UBlock) in data.items():
+
+		# Resolve Indic_Syllabic_Category
+
+		# TODO: These don't have UISC assigned in Unicode 8.0, but
+		# have UIPC
+		if U == 0x17DD: UISC = Vowel_Dependent
+		if 0x1CE2 <= U <= 0x1CE8: UISC = Cantillation_Mark
+
+		# TODO: U+1CED should only be allowed after some of
+		# the nasalization marks, maybe only for U+1CE9..U+1CF1.
+		if U == 0x1CED: UISC = Tone_Mark
+
+		evals = [(k, v(U,UISC,UGC)) for k,v in items]
+		values = [k for k,v in evals if v]
+		assert len(values) == 1, "%s %s %s %s" % (hex(U), UISC, UGC, values)
+		USE = values[0]
+
+		# Resolve Indic_Positional_Category
+
+		# TODO: Not in Unicode 8.0 yet, but in spec.
+		if U == 0x1B6C: UIPC = Bottom
+
+		# TODO: These should die, but have UIPC in Unicode 8.0
+		if U in [0x953, 0x954]: UIPC = Not_Applicable
+
+		# TODO: In USE's override list but not in Unicode 8.0
+		if U == 0x103C: UIPC = Left
+
+		# TODO: These are not in USE's override list that we have, nor are they in Unicode 8.0
+		if 0xA926 <= U <= 0xA92A: UIPC = Top
+		if U == 0x111CA: UIPC = Bottom
+		if U == 0x11300: UIPC = Top
+		if U == 0x1133C: UIPC = Bottom
+		if U == 0x1171E: UIPC = Left # Correct?!
+		if 0x1CF2 <= U <= 0x1CF3: UIPC = Right
+		if 0x1CF8 <= U <= 0x1CF9: UIPC = Top
+
+		assert (UIPC in [Not_Applicable, Visual_Order_Left] or
+			USE in use_positions), "%s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UGC)
+
+		pos_mapping = use_positions.get(USE, None)
+		if pos_mapping:
+			values = [k for k,v in pos_mapping.items() if v and UIPC in v]
+			assert len(values) == 1, "%s %s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UGC, values)
+			USE = USE + values[0]
+
+		out[U] = (USE, UBlock)
+	return out
+
+defaults = ('O', 'No_Block')
+data = map_to_use(data)
+
+# Remove the outliers
+singles = {}
+for u in [0x034F, 0x25CC, 0x1107F]:
+	singles[u] = data[u]
+	del data[u]
+
+print "/* == Start of generated table == */"
+print "/*"
+print " * The following table is generated by running:"
+print " *"
+print " *   ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt"
+print " *"
+print " * on files with these headers:"
+print " *"
+for h in headers:
+	for l in h:
+		print " * %s" % (l.strip())
+print " */"
+print
+print '#include "hb-ot-shape-complex-use-private.hh"'
+print
+
+total = 0
+used = 0
+last_block = None
+def print_block (block, start, end, data):
+	global total, used, last_block
+	if block and block != last_block:
+		print
+		print
+		print "  /* %s */" % block
+		if start % 16:
+			print ' ' * (20 + (start % 16 * 6)),
+	num = 0
+	assert start % 8 == 0
+	assert (end+1) % 8 == 0
+	for u in range (start, end+1):
+		if u % 16 == 0:
+			print
+			print "  /* %04X */" % u,
+		if u in data:
+			num += 1
+		d = data.get (u, defaults)
+		sys.stdout.write ("%6s," % d[0])
+
+	total += end - start + 1
+	used += num
+	if block:
+		last_block = block
+
+uu = data.keys ()
+uu.sort ()
+
+last = -100000
+num = 0
+offset = 0
+starts = []
+ends = []
+for k,v in sorted(use_mapping.items()):
+	if k in use_positions and use_positions[k]: continue
+	print "#define %s	USE_%s	/* %s */" % (k, k, v.__name__[3:])
+for k,v in sorted(use_positions.items()):
+	if not v: continue
+	for suf in v.keys():
+		tag = k + suf
+		print "#define %s	USE_%s" % (tag, tag)
+print ""
+print "static const USE_TABLE_ELEMENT_TYPE use_table[] = {"
+for u in uu:
+	if u <= last:
+		continue
+	block = data[u][1]
+
+	start = u//8*8
+	end = start+1
+	while end in uu and block == data[end][1]:
+		end += 1
+	end = (end-1)//8*8 + 7
+
+	if start != last + 1:
+		if start - last <= 1+16*3:
+			print_block (None, last+1, start-1, data)
+			last = start-1
+		else:
+			if last >= 0:
+				ends.append (last + 1)
+				offset += ends[-1] - starts[-1]
+			print
+			print
+			print "#define use_offset_0x%04xu %d" % (start, offset)
+			starts.append (start)
+
+	print_block (block, start, end, data)
+	last = end
+ends.append (last + 1)
+offset += ends[-1] - starts[-1]
+print
+print
+occupancy = used * 100. / total
+page_bits = 12
+print "}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy)
+print
+print "USE_TABLE_ELEMENT_TYPE"
+print "hb_use_get_categories (hb_codepoint_t u)"
+print "{"
+print "  switch (u >> %d)" % page_bits
+print "  {"
+pages = set([u>>page_bits for u in starts+ends+singles.keys()])
+for p in sorted(pages):
+	print "    case 0x%0Xu:" % p
+	for (start,end) in zip (starts, ends):
+		if p not in [start>>page_bits, end>>page_bits]: continue
+		offset = "use_offset_0x%04xu" % start
+		print "      if (hb_in_range (u, 0x%04Xu, 0x%04Xu)) return use_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset)
+	for u,d in singles.items ():
+		if p != u>>page_bits: continue
+		print "      if (unlikely (u == 0x%04Xu)) return %s;" % (u, d[0])
+	print "      break;"
+	print ""
+print "    default:"
+print "      break;"
+print "  }"
+print "  return USE_O;"
+print "}"
+print
+for k in sorted(use_mapping.keys()):
+	if k in use_positions and use_positions[k]: continue
+	print "#undef %s" % k
+for k,v in sorted(use_positions.items()):
+	if not v: continue
+	for suf in v.keys():
+		tag = k + suf
+		print "#undef %s" % tag
+print
+print "/* == End of generated table == */"
+
+# Maintain at least 50% occupancy in the table */
+if occupancy < 50:
+	raise Exception ("Table too sparse, please investigate: ", occupancy)
--- a/src/hb-ot-layout-private.hh
+++ b/src/hb-ot-layout-private.hh
@@ -188,6 +188,30 @@ _hb_ot_layout_destroy (hb_ot_layout_t *layout);
 #define lig_props()		var1.u8[2] /* GSUB/GPOS ligature tracking */
 #define syllable()		var1.u8[3] /* GSUB/GPOS shaping boundaries */

+
+/* loop over syllables */
+
+#define foreach_syllable(buffer, start, end) \
+  for (unsigned int \
+       _count = buffer->len, \
+       start = 0, end = _count ? _next_syllable (buffer, 0) : 0; \
+       start < _count; \
+       start = end, end = _next_syllable (buffer, start))
+
+static inline unsigned int
+_next_syllable (hb_buffer_t *buffer, unsigned int start)
+{
+  hb_glyph_info_t *info = buffer->info;
+  unsigned int count = buffer->len;
+
+  unsigned int syllable = info[start].syllable();
+  while (++start < count && syllable == info[start].syllable())
+    ;
+
+  return start;
+}
+
+
 /* unicode_props */

 enum {
@@ -417,6 +441,14 @@ _hb_glyph_info_clear_ligated_and_multiplied (hb_glyph_info_t *info)
 			   HB_OT_LAYOUT_GLYPH_PROPS_MULTIPLIED);
 }

+static inline void
+_hb_glyph_info_clear_substituted_and_ligated_and_multiplied (hb_glyph_info_t *info)
+{
+  info->glyph_props() &= ~(HB_OT_LAYOUT_GLYPH_PROPS_SUBSTITUTED |
+			   HB_OT_LAYOUT_GLYPH_PROPS_LIGATED |
+			   HB_OT_LAYOUT_GLYPH_PROPS_MULTIPLIED);
+}
+

 /* Allocation / deallocation. */


--- a/src/hb-ot-shape-complex-sea-machine.rl
+++ b/src/hb-ot-shape-complex-sea-machine.rl
 /*
- * Copyright © 2011,2012,2013  Google, Inc.
+ * Copyright © 2015  Mozilla Foundation.
+ * Copyright © 2015  Google, Inc.
 *
 *  This is part of HarfBuzz, a text shaping library.
 *
@@ -21,82 +22,29 @@
 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 *
+ * Mozilla Author(s): Jonathan Kew
 * Google Author(s): Behdad Esfahbod
 */

-#ifndef HB_OT_SHAPE_COMPLEX_SEA_MACHINE_HH
-#define HB_OT_SHAPE_COMPLEX_SEA_MACHINE_HH
+#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_PRIVATE_HH
+#define HB_OT_SHAPE_COMPLEX_ARABIC_PRIVATE_HH

 #include "hb-private.hh"

-%%{
-  machine sea_syllable_machine;
-  alphtype unsigned char;
-  write data;
-}%%
+#include "hb-ot-shape-complex-private.hh"

-%%{

-# Same order as enum sea_category_t.  Not sure how to avoid duplication.
-C    = 1;
-GB   = 12; # Generic Base
-H    = 4;  # Halant
-IV   = 2;  # Independent Vowel
-MR   = 22; # Medial Ra
-CM   = 17; # Consonant Medial
-VAbv = 26;
-VBlw = 27;
-VPre = 28;
-VPst = 29;
-T    = 3;  # Tone Marks
-A    = 10; # Anusvara
+struct arabic_shape_plan_t;

-syllable_tail = (VPre|VAbv|VBlw|VPst|H.C|CM|MR|T|A)*;
+HB_INTERNAL void *
+data_create_arabic (const hb_ot_shape_plan_t *plan);

-consonant_syllable =	(C|IV|GB) syllable_tail;
-broken_cluster =	syllable_tail;
-other =			any;
+HB_INTERNAL void
+data_destroy_arabic (void *data);

-main := |*
-	consonant_syllable	=> { found_syllable (consonant_syllable); };
-	broken_cluster		=> { found_syllable (broken_cluster); };
-	other			=> { found_syllable (non_sea_cluster); };
-*|;
+HB_INTERNAL void
+setup_masks_arabic_plan (const arabic_shape_plan_t *arabic_plan,
+			 hb_buffer_t               *buffer,
+			 hb_script_t                script);

-
-}%%
-
-#define found_syllable(syllable_type) \
-  HB_STMT_START { \
-    if (0) fprintf (stderr, "syllable %d..%d %s\n", last, p+1, #syllable_type); \
-    for (unsigned int i = last; i < p+1; i++) \
-      info[i].syllable() = (syllable_serial << 4) | syllable_type; \
-    last = p+1; \
-    syllable_serial++; \
-    if (unlikely (syllable_serial == 16)) syllable_serial = 1; \
-  } HB_STMT_END
-
-static void
-find_syllables (hb_buffer_t *buffer)
-{
-  unsigned int p, pe, eof, ts HB_UNUSED, te HB_UNUSED, act HB_UNUSED;
-  int cs;
-  hb_glyph_info_t *info = buffer->info;
-  %%{
-    write init;
-    getkey info[p].sea_category();
-  }%%
-
-  p = 0;
-  pe = eof = buffer->len;
-
-  unsigned int last = 0;
-  unsigned int syllable_serial = 1;
-  %%{
-    write exec;
-  }%%
-}
-
-#undef found_syllable
-
-#endif /* HB_OT_SHAPE_COMPLEX_SEA_MACHINE_HH */
+#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_PRIVATE_HH */
--- a/src/hb-ot-shape-complex-arabic-win1256.hh
+++ b/src/hb-ot-shape-complex-arabic-win1256.hh
@@ -142,7 +142,7 @@
 		OT_UARRAY(Name##Substitute, OT_LIST(ToGlyphs)) \
 	) \
 	OT_COVERAGE1(Name##Coverage, OT_LIST(FromGlyphs)) \
-	/* ASSERT_STATIC_EXPR len(FromGlyphs) == len(ToGlyphs) */
+	/* ASSERT_STATIC_EXPR_ZERO (len(FromGlyphs) == len(ToGlyphs)) */

 #define OT_SUBLOOKUP_LIGATURE_SUBST_FORMAT1(Name, FirstGlyphs, LigatureSetOffsets) \
 	OT_SUBLOOKUP(Name, 1, \
@@ -151,7 +151,7 @@
 		OT_UARRAY(Name##LigatureSetOffsetsArray, OT_LIST(LigatureSetOffsets)) \
 	) \
 	OT_COVERAGE1(Name##Coverage, OT_LIST(FirstGlyphs)) \
-	/* ASSERT_STATIC_EXPR len(FirstGlyphs) == len(LigatureSetOffsets) */
+	/* ASSERT_STATIC_EXPR_ZERO (len(FirstGlyphs) == len(LigatureSetOffsets)) */

 #define OT_LIGATURE_SET(Name, LigatureSetOffsets) \
 	OT_UARRAY(Name, OT_LIST(LigatureSetOffsets))

--- a/src/hb-ot-shape-complex-arabic.cc
+++ b/src/hb-ot-shape-complex-arabic.cc
@@ -24,7 +24,7 @@
 * Google Author(s): Behdad Esfahbod
 */

-#include "hb-ot-shape-complex-private.hh"
+#include "hb-ot-shape-complex-arabic-private.hh"
 #include "hb-ot-shape-private.hh"


@@ -32,10 +32,14 @@
 #define arabic_shaping_action() complex_var_u8_0() /* arabic shaping action */


+/*
+ * Joining types:
+ */
+
 /*
 * Bits used in the joining tables
 */
-enum {
+enum hb_arabic_joining_type_t {
  JOINING_TYPE_U		= 0,
  JOINING_TYPE_L		= 1,
  JOINING_TYPE_R		= 2,
@@ -49,10 +53,6 @@ enum {
  JOINING_TYPE_X = 8  /* means: use general-category to choose between U or T. */
 };

-/*
- * Joining types:
- */
-
 #include "hb-ot-shape-complex-arabic-table.hh"

 static unsigned int get_joining_type (hb_codepoint_t u, hb_unicode_general_category_t gen_cat)
@@ -61,7 +61,7 @@ static unsigned int get_joining_type (hb_codepoint_t u, hb_unicode_general_categ
  if (likely (j_type != JOINING_TYPE_X))
    return j_type;

-  return (FLAG(gen_cat) &
+  return (FLAG_SAFE(gen_cat) &
 	  (FLAG(HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) |
 	   FLAG(HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) |
 	   FLAG(HB_UNICODE_GENERAL_CATEGORY_FORMAT))
@@ -212,7 +212,7 @@ struct arabic_shape_plan_t
  arabic_fallback_plan_t *fallback_plan;
 };

-static void *
+void *
 data_create_arabic (const hb_ot_shape_plan_t *plan)
 {
  arabic_shape_plan_t *arabic_plan = (arabic_shape_plan_t *) calloc (1, sizeof (arabic_shape_plan_t));
@@ -230,7 +230,7 @@ data_create_arabic (const hb_ot_shape_plan_t *plan)
  return arabic_plan;
 }

-static void
+void
 data_destroy_arabic (void *data)
 {
  arabic_shape_plan_t *arabic_plan = (arabic_shape_plan_t *) data;
@@ -305,17 +305,15 @@ mongolian_variation_selectors (hb_buffer_t *buffer)
      info[i].arabic_shaping_action() = info[i - 1].arabic_shaping_action();
 }

-static void
-setup_masks_arabic (const hb_ot_shape_plan_t *plan,
-		    hb_buffer_t              *buffer,
-		    hb_font_t                *font HB_UNUSED)
+void
+setup_masks_arabic_plan (const arabic_shape_plan_t *arabic_plan,
+			 hb_buffer_t               *buffer,
+			 hb_script_t                script)
 {
  HB_BUFFER_ALLOCATE_VAR (buffer, arabic_shaping_action);

-  const arabic_shape_plan_t *arabic_plan = (const arabic_shape_plan_t *) plan->data;
-
  arabic_joining (buffer);
-  if (plan->props.script == HB_SCRIPT_MONGOLIAN)
+  if (script == HB_SCRIPT_MONGOLIAN)
    mongolian_variation_selectors (buffer);

  unsigned int count = buffer->len;
@@ -326,6 +324,15 @@ setup_masks_arabic (const hb_ot_shape_plan_t *plan,
  HB_BUFFER_DEALLOCATE_VAR (buffer, arabic_shaping_action);
 }

+static void
+setup_masks_arabic (const hb_ot_shape_plan_t *plan,
+		    hb_buffer_t              *buffer,
+		    hb_font_t                *font HB_UNUSED)
+{
+  const arabic_shape_plan_t *arabic_plan = (const arabic_shape_plan_t *) plan->data;
+  setup_masks_arabic_plan (arabic_plan, buffer, plan->props.script);
+}
+

 static void
 nuke_joiners (const hb_ot_shape_plan_t *plan HB_UNUSED,

--- a/src/hb-ot-shape-complex-indic-private.hh
+++ b/src/hb-ot-shape-complex-indic-private.hh
@@ -161,8 +161,6 @@ enum indic_matra_category_t {
  INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT		= POS_PRE_M
 };

-/* Note: We use ASSERT_STATIC_EXPR_ZERO() instead of ASSERT_STATIC_EXPR() and the comma operation
- * because gcc fails to optimize the latter and fills the table in at runtime. */
 #define INDIC_COMBINE_CATEGORIES(S,M) \
  (ASSERT_STATIC_EXPR_ZERO (M == INDIC_MATRA_CATEGORY_NOT_APPLICABLE || \
 			    ( \

--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -142,7 +142,7 @@ is_one_of (const hb_glyph_info_t &info, unsigned int flags)
 {
  /* If it ligated, all bets are off. */
  if (_hb_glyph_info_ligated (&info)) return false;
-  return !!(FLAG (info.indic_category()) & flags);
+  return !!(FLAG_SAFE (info.indic_category()) & flags);
 }

 static inline bool
@@ -237,7 +237,7 @@ set_indic_properties (hb_glyph_info_t &info)
   * Re-assign position.
   */

-  if ((FLAG (cat) & CONSONANT_FLAGS))
+  if ((FLAG_SAFE (cat) & CONSONANT_FLAGS))
  {
    pos = POS_BASE_C;
    if (is_ra (u))
@@ -247,7 +247,7 @@ set_indic_properties (hb_glyph_info_t &info)
  {
    pos = matra_position (u, pos);
  }
-  else if ((FLAG (cat) & (FLAG (OT_SM) | FLAG (OT_VD) | FLAG (OT_A) | FLAG (OT_Symbol))))
+  else if ((FLAG_SAFE (cat) & (FLAG (OT_SM) | FLAG (OT_VD) | FLAG (OT_A) | FLAG (OT_Symbol))))
  {
    pos = POS_SMVD;
  }
@@ -963,7 +963,7 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
    indic_position_t last_pos = POS_START;
    for (unsigned int i = start; i < end; i++)
    {
-      if ((FLAG (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | MEDIAL_FLAGS | HALANT_OR_COENG_FLAGS)))
+      if ((FLAG_SAFE (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | MEDIAL_FLAGS | HALANT_OR_COENG_FLAGS)))
      {
 	info[i].indic_position() = last_pos;
 	if (unlikely (info[i].indic_category() == OT_H &&
@@ -1161,17 +1161,6 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
    }
 }

-
-static void
-initial_reordering_vowel_syllable (const hb_ot_shape_plan_t *plan,
-				   hb_face_t *face,
-				   hb_buffer_t *buffer,
-				   unsigned int start, unsigned int end)
-{
-  /* We made the vowels look like consonants.  So let's call the consonant logic! */
-  initial_reordering_consonant_syllable (plan, face, buffer, start, end);
-}
-
 static void
 initial_reordering_standalone_cluster (const hb_ot_shape_plan_t *plan,
 				       hb_face_t *face,
@@ -1193,37 +1182,6 @@ initial_reordering_standalone_cluster (const hb_ot_shape_plan_t *plan,
  initial_reordering_consonant_syllable (plan, face, buffer, start, end);
 }

-static void
-initial_reordering_broken_cluster (const hb_ot_shape_plan_t *plan,
-				   hb_face_t *face,
-				   hb_buffer_t *buffer,
-				   unsigned int start, unsigned int end)
-{
-  /* We already inserted dotted-circles, so just call the standalone_cluster. */
-  initial_reordering_standalone_cluster (plan, face, buffer, start, end);
-}
-
-static void
-initial_reordering_symbol_cluster (const hb_ot_shape_plan_t *plan HB_UNUSED,
-				   hb_face_t *face HB_UNUSED,
-				   hb_buffer_t *buffer HB_UNUSED,
-				   unsigned int start HB_UNUSED, unsigned int end HB_UNUSED)
-{
-  /* Nothing to do right now.  If we ever switch to using the output
-   * buffer in the reordering process, we'd need to next_glyph() here. */
-}
-
-static void
-initial_reordering_non_indic_cluster (const hb_ot_shape_plan_t *plan HB_UNUSED,
-				      hb_face_t *face HB_UNUSED,
-				      hb_buffer_t *buffer HB_UNUSED,
-				      unsigned int start HB_UNUSED, unsigned int end HB_UNUSED)
-{
-  /* Nothing to do right now.  If we ever switch to using the output
-   * buffer in the reordering process, we'd need to next_glyph() here. */
-}
-
-
 static void
 initial_reordering_syllable (const hb_ot_shape_plan_t *plan,
 			     hb_face_t *face,
@@ -1231,13 +1189,21 @@ initial_reordering_syllable (const hb_ot_shape_plan_t *plan,
 			     unsigned int start, unsigned int end)
 {
  syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F);
-  switch (syllable_type) {
-  case consonant_syllable:	initial_reordering_consonant_syllable (plan, face, buffer, start, end); return;
-  case vowel_syllable:		initial_reordering_vowel_syllable     (plan, face, buffer, start, end); return;
-  case standalone_cluster:	initial_reordering_standalone_cluster (plan, face, buffer, start, end); return;
-  case symbol_cluster:		initial_reordering_symbol_cluster     (plan, face, buffer, start, end); return;
-  case broken_cluster:		initial_reordering_broken_cluster     (plan, face, buffer, start, end); return;
-  case non_indic_cluster:	initial_reordering_non_indic_cluster  (plan, face, buffer, start, end); return;
+  switch (syllable_type)
+  {
+    case vowel_syllable: /* We made the vowels look like consonants.  So let's call the consonant logic! */
+    case consonant_syllable:
+     initial_reordering_consonant_syllable (plan, face, buffer, start, end);
+     break;
+
+    case broken_cluster: /* We already inserted dotted-circles, so just call the standalone_cluster. */
+    case standalone_cluster:
+     initial_reordering_standalone_cluster (plan, face, buffer, start, end);
+     break;
+
+    case symbol_cluster:
+    case non_indic_cluster:
+      break;
  }
 }

@@ -1310,18 +1276,8 @@ initial_reordering (const hb_ot_shape_plan_t *plan,
  update_consonant_positions (plan, font, buffer);
  insert_dotted_circles (plan, font, buffer);

-  hb_glyph_info_t *info = buffer->info;
-  unsigned int count = buffer->len;
-  if (unlikely (!count)) return;
-  unsigned int last = 0;
-  unsigned int last_syllable = info[0].syllable();
-  for (unsigned int i = 1; i < count; i++)
-    if (last_syllable != info[i].syllable()) {
-      initial_reordering_syllable (plan, font->face, buffer, last, i);
-      last = i;
-      last_syllable = info[last].syllable();
-    }
-  initial_reordering_syllable (plan, font->face, buffer, last, count);
+  foreach_syllable (buffer, start, end)
+    initial_reordering_syllable (plan, font->face, buffer, start, end);
 }

 static void
@@ -1550,7 +1506,7 @@ final_reordering_syllable (const hb_ot_shape_plan_t *plan,
    {
      new_reph_pos = base;
      while (new_reph_pos < end &&
-	     !( FLAG (info[new_reph_pos + 1].indic_position()) & (FLAG (POS_POST_C) | FLAG (POS_AFTER_POST) | FLAG (POS_SMVD))))
+	     !( FLAG_SAFE (info[new_reph_pos + 1].indic_position()) & (FLAG (POS_POST_C) | FLAG (POS_AFTER_POST) | FLAG (POS_SMVD))))
 	new_reph_pos++;
      if (new_reph_pos < end)
        goto reph_move;
@@ -1701,7 +1657,7 @@ final_reordering_syllable (const hb_ot_shape_plan_t *plan,
  /* Apply 'init' to the Left Matra if it's a word start. */
  if (info[start].indic_position () == POS_PRE_M &&
      (!start ||
-       !(FLAG (_hb_glyph_info_get_general_category (&info[start - 1])) &
+       !(FLAG_SAFE (_hb_glyph_info_get_general_category (&info[start - 1])) &
 	 FLAG_RANGE (HB_UNICODE_GENERAL_CATEGORY_FORMAT, HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))))
    info[start].mask |= indic_plan->mask_array[INIT];

@@ -1737,16 +1693,8 @@ final_reordering (const hb_ot_shape_plan_t *plan,
  unsigned int count = buffer->len;
  if (unlikely (!count)) return;

-  hb_glyph_info_t *info = buffer->info;
-  unsigned int last = 0;
-  unsigned int last_syllable = info[0].syllable();
-  for (unsigned int i = 1; i < count; i++)
-    if (last_syllable != info[i].syllable()) {
-      final_reordering_syllable (plan, buffer, last, i);
-      last = i;
-      last_syllable = info[last].syllable();
-    }
-  final_reordering_syllable (plan, buffer, last, count);
+  foreach_syllable (buffer, start, end)
+    final_reordering_syllable (plan, buffer, start, end);

  HB_BUFFER_DEALLOCATE_VAR (buffer, indic_category);
  HB_BUFFER_DEALLOCATE_VAR (buffer, indic_position);

--- a/src/hb-ot-shape-complex-myanmar.cc
+++ b/src/hb-ot-shape-complex-myanmar.cc
@@ -154,7 +154,7 @@ is_one_of (const hb_glyph_info_t &info, unsigned int flags)
 {
  /* If it ligated, all bets are off. */
  if (_hb_glyph_info_ligated (&info)) return false;
-  return !!(FLAG (info.myanmar_category()) & flags);
+  return !!(FLAG_SAFE (info.myanmar_category()) & flags);
 }

 static inline bool
@@ -304,9 +304,7 @@ compare_myanmar_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb)
 * http://www.microsoft.com/typography/OpenTypeDev/myanmar/intro.htm */

 static void
-initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
-				       hb_face_t *face,
-				       hb_buffer_t *buffer,
+initial_reordering_consonant_syllable (hb_buffer_t *buffer,
 				       unsigned int start, unsigned int end)
 {
  hb_glyph_info_t *info = buffer->info;
@@ -398,37 +396,6 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
  hb_bubble_sort (info + start, end - start, compare_myanmar_order);
 }

-static void
-initial_reordering_broken_cluster (const hb_ot_shape_plan_t *plan,
-				   hb_face_t *face,
-				   hb_buffer_t *buffer,
-				   unsigned int start, unsigned int end)
-{
-  /* We already inserted dotted-circles, so just call the consonant_syllable. */
-  initial_reordering_consonant_syllable (plan, face, buffer, start, end);
-}
-
-static void
-initial_reordering_punctuation_cluster (const hb_ot_shape_plan_t *plan HB_UNUSED,
-					hb_face_t *face HB_UNUSED,
-					hb_buffer_t *buffer HB_UNUSED,
-					unsigned int start HB_UNUSED, unsigned int end HB_UNUSED)
-{
-  /* Nothing to do right now.  If we ever switch to using the output
-   * buffer in the reordering process, we'd need to next_glyph() here. */
-}
-
-static void
-initial_reordering_non_myanmar_cluster (const hb_ot_shape_plan_t *plan HB_UNUSED,
-					hb_face_t *face HB_UNUSED,
-					hb_buffer_t *buffer HB_UNUSED,
-					unsigned int start HB_UNUSED, unsigned int end HB_UNUSED)
-{
-  /* Nothing to do right now.  If we ever switch to using the output
-   * buffer in the reordering process, we'd need to next_glyph() here. */
-}
-
-
 static void
 initial_reordering_syllable (const hb_ot_shape_plan_t *plan,
 			     hb_face_t *face,
@@ -437,10 +404,15 @@ initial_reordering_syllable (const hb_ot_shape_plan_t *plan,
 {
  syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F);
  switch (syllable_type) {
-  case consonant_syllable:	initial_reordering_consonant_syllable  (plan, face, buffer, start, end); return;
-  case punctuation_cluster:	initial_reordering_punctuation_cluster (plan, face, buffer, start, end); return;
-  case broken_cluster:		initial_reordering_broken_cluster      (plan, face, buffer, start, end); return;
-  case non_myanmar_cluster:	initial_reordering_non_myanmar_cluster (plan, face, buffer, start, end); return;
+
+    case broken_cluster: /* We already inserted dotted-circles, so just call the consonant_syllable. */
+    case consonant_syllable:
+      initial_reordering_consonant_syllable  (buffer, start, end);
+      break;
+
+    case punctuation_cluster:
+    case non_myanmar_cluster:
+      break;
  }
 }

@@ -505,18 +477,8 @@ initial_reordering (const hb_ot_shape_plan_t *plan,
 {
  insert_dotted_circles (plan, font, buffer);

-  hb_glyph_info_t *info = buffer->info;
-  unsigned int count = buffer->len;
-  if (unlikely (!count)) return;
-  unsigned int last = 0;
-  unsigned int last_syllable = info[0].syllable();
-  for (unsigned int i = 1; i < count; i++)
-    if (last_syllable != info[i].syllable()) {
-      initial_reordering_syllable (plan, font->face, buffer, last, i);
-      last = i;
-      last_syllable = info[last].syllable();
-    }
-  initial_reordering_syllable (plan, font->face, buffer, last, count);
+  foreach_syllable (buffer, start, end)
+    initial_reordering_syllable (plan, font->face, buffer, start, end);
 }

 static void

--- a/src/hb-ot-shape-complex-private.hh
+++ b/src/hb-ot-shape-complex-private.hh
@@ -59,9 +59,9 @@ enum hb_ot_shape_zero_width_marks_type_t {
  HB_COMPLEX_SHAPER_IMPLEMENT (myanmar_old) \
  HB_COMPLEX_SHAPER_IMPLEMENT (indic) \
  HB_COMPLEX_SHAPER_IMPLEMENT (myanmar) \
-  HB_COMPLEX_SHAPER_IMPLEMENT (sea) \
  HB_COMPLEX_SHAPER_IMPLEMENT (thai) \
  HB_COMPLEX_SHAPER_IMPLEMENT (tibetan) \
+  HB_COMPLEX_SHAPER_IMPLEMENT (use) \
  /* ^--- Add new shapers here */


@@ -217,61 +217,9 @@ hb_ot_shape_complex_categorize (const hb_ot_shape_planner_t *planner)

    /* ^--- Add new shapers here */

-
 #if 0
-    /* Note:
-     *
-     * These disabled scripts are listed in ucd/IndicSyllabicCategory.txt, but according
-     * to Martin Hosken and Jonathan Kew do not require complex shaping.
-     *
-     * TODO We should automate figuring out which scripts do not need complex shaping
-     *
-     * TODO We currently keep data for these scripts in our indic table.  Need to fix the
-     * generator to not do that.
-     */
-
-
-    /* Simple? */
-
-    /* Unicode-3.2 additions */
-    case HB_SCRIPT_BUHID:
-    case HB_SCRIPT_HANUNOO:
-
-    /* Unicode-5.1 additions */
-    case HB_SCRIPT_SAURASHTRA:
-
-    /* Unicode-6.0 additions */
-    case HB_SCRIPT_BATAK:
-    case HB_SCRIPT_BRAHMI:
-
-
-    /* Simple */
-
-    /* Unicode-1.1 additions */
-    /* These have their own shaper now. */
-    case HB_SCRIPT_LAO:
-    case HB_SCRIPT_THAI:
-
-    /* Unicode-3.2 additions */
-    case HB_SCRIPT_TAGALOG:
-    case HB_SCRIPT_TAGBANWA:
-
-    /* Unicode-4.0 additions */
-    case HB_SCRIPT_LIMBU:
-    case HB_SCRIPT_TAI_LE:
-
    /* Unicode-4.1 additions */
-    case HB_SCRIPT_KHAROSHTHI:
    case HB_SCRIPT_NEW_TAI_LUE:
-    case HB_SCRIPT_SYLOTI_NAGRI:
-
-    /* Unicode-5.1 additions */
-    case HB_SCRIPT_KAYAH_LI:
-
-    /* Unicode-5.2 additions */
-    case HB_SCRIPT_TAI_VIET:
-
-
 #endif

    /* Unicode-1.1 additions */
@@ -288,28 +236,11 @@ hb_ot_shape_complex_categorize (const hb_ot_shape_planner_t *planner)
    /* Unicode-3.0 additions */
    case HB_SCRIPT_SINHALA:

-    /* Unicode-5.0 additions */
-    case HB_SCRIPT_BALINESE:
-
-    /* Unicode-5.1 additions */
-    case HB_SCRIPT_LEPCHA:
-    case HB_SCRIPT_REJANG:
-    case HB_SCRIPT_SUNDANESE:
-
    /* Unicode-5.2 additions */
    case HB_SCRIPT_JAVANESE:
-    case HB_SCRIPT_KAITHI:
-    case HB_SCRIPT_MEETEI_MAYEK:
-
-    /* Unicode-6.0 additions */
-
-    /* Unicode-6.1 additions */
-    case HB_SCRIPT_CHAKMA:
-    case HB_SCRIPT_SHARADA:
-    case HB_SCRIPT_TAKRI:

      /* If the designer designed the font for the 'DFLT' script,
-       * use the default shaper.  Otherwise, use the Indic shaper.
+       * use the default shaper.  Otherwise, use the specific shaper.
       * Note that for some simple scripts, there may not be *any*
       * GSUB/GPOS needed, so there may be no scripts found! */
      if (planner->map.chosen_script[0] == HB_TAG ('D','F','L','T'))
@@ -341,23 +272,82 @@ hb_ot_shape_complex_categorize (const hb_ot_shape_planner_t *planner)
      else
 	return &_hb_ot_complex_shaper_default;

+
+    /* Unicode-2.0 additions */
+    //case HB_SCRIPT_TIBETAN:
+
+    /* Unicode-3.0 additions */
+    //case HB_SCRIPT_MONGOLIAN:
+    //case HB_SCRIPT_SINHALA:
+
+    /* Unicode-3.2 additions */
+    case HB_SCRIPT_BUHID:
+    case HB_SCRIPT_HANUNOO:
+    case HB_SCRIPT_TAGALOG:
+    case HB_SCRIPT_TAGBANWA:
+
+    /* Unicode-4.0 additions */
+    case HB_SCRIPT_LIMBU:
+    case HB_SCRIPT_TAI_LE:
+
    /* Unicode-4.1 additions */
    case HB_SCRIPT_BUGINESE:
+    case HB_SCRIPT_KHAROSHTHI:
+    case HB_SCRIPT_SYLOTI_NAGRI:
+    case HB_SCRIPT_TIFINAGH:
+
+    /* Unicode-5.0 additions */
+    case HB_SCRIPT_BALINESE:
+    //case HB_SCRIPT_NKO:
+    //case HB_SCRIPT_PHAGS_PA:

    /* Unicode-5.1 additions */
    case HB_SCRIPT_CHAM:
+    case HB_SCRIPT_KAYAH_LI:
+    case HB_SCRIPT_LEPCHA:
+    case HB_SCRIPT_REJANG:
+    case HB_SCRIPT_SAURASHTRA:
+    case HB_SCRIPT_SUNDANESE:

    /* Unicode-5.2 additions */
+    case HB_SCRIPT_EGYPTIAN_HIEROGLYPHS:
+    //case HB_SCRIPT_JAVANESE:
+    case HB_SCRIPT_KAITHI:
+    case HB_SCRIPT_MEETEI_MAYEK:
    case HB_SCRIPT_TAI_THAM:
+    case HB_SCRIPT_TAI_VIET:
+
+    /* Unicode-6.0 additions */
+    case HB_SCRIPT_BATAK:
+    case HB_SCRIPT_BRAHMI:
+    //case HB_SCRIPT_MANDAIC:
+
+    /* Unicode-6.1 additions */
+    case HB_SCRIPT_CHAKMA:
+    case HB_SCRIPT_SHARADA:
+    case HB_SCRIPT_TAKRI:
+
+    /* Unicode-7.0 additions */
+    case HB_SCRIPT_DUPLOYAN:
+    case HB_SCRIPT_GRANTHA:
+    case HB_SCRIPT_KHOJKI:
+    case HB_SCRIPT_KHUDAWADI:
+    case HB_SCRIPT_MAHAJANI:
+    //case HB_SCRIPT_MANICHAEAN:
+    case HB_SCRIPT_MODI:
+    case HB_SCRIPT_PAHAWH_HMONG:
+    //case HB_SCRIPT_PSALTER_PAHLAVI:
+    case HB_SCRIPT_SIDDHAM:
+    case HB_SCRIPT_TIRHUTA:

      /* If the designer designed the font for the 'DFLT' script,
-       * use the default shaper.  Otherwise, use the Indic shaper.
+       * use the default shaper.  Otherwise, use the specific shaper.
       * Note that for some simple scripts, there may not be *any*
       * GSUB/GPOS needed, so there may be no scripts found! */
      if (planner->map.chosen_script[0] == HB_TAG ('D','F','L','T'))
 	return &_hb_ot_complex_shaper_default;
      else
-	return &_hb_ot_complex_shaper_sea;
+	return &_hb_ot_complex_shaper_use;
  }
 }


--- a/src/hb-ot-shape-complex-use-machine.rl
+++ b/src/hb-ot-shape-complex-use-machine.rl
+/*
+ * Copyright © 2015  Mozilla Foundation.
+ * Copyright © 2015  Google, Inc.
+ *
+ *  This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Mozilla Author(s): Jonathan Kew
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH
+#define HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH
+
+#include "hb-private.hh"
+
+%%{
+  machine use_syllable_machine;
+  alphtype unsigned char;
+  write data;
+}%%
+
+%%{
+
+# Same order as enum use_category_t.  Not sure how to avoid duplication.
+
+O	= 0; # OTHER
+
+B	= 1; # BASE
+IV	= 2; # BASE_VOWEL
+IND	= 3; # BASE_IND
+N	= 4; # BASE_NUM
+GB	= 5; # BASE_OTHER
+CGJ	= 6; # CGJ
+#F	= 7; # CONS_FINAL
+FM	= 8; # CONS_FINAL_MOD
+#M	= 9; # CONS_MED
+#CM	= 10; # CONS_MOD
+SUB	= 11; # CONS_SUB
+H	= 12; # HALANT
+
+HN	= 13; # HALANT_NUM
+ZWNJ	= 14; # Zero width non-joiner
+ZWJ	= 15; # Zero width joiner
+WJ	= 16; # Word joiner
+Rsv	= 17; # Reserved characters
+R	= 18; # REPHA
+S	= 19; # SYM
+#SM	= 20; # SYM_MOD
+VS	= 21; # VARIATION_SELECTOR
+#V	= 36; # VOWEL
+#VM	= 40; # VOWEL_MOD
+
+FAbv	= 24; # CONS_FINAL_ABOVE
+FBlw	= 25; # CONS_FINAL_BELOW
+FPst	= 26; # CONS_FINAL_POST
+MAbv	= 27; # CONS_MED_ABOVE
+MBlw	= 28; # CONS_MED_BELOW
+MPst	= 29; # CONS_MED_POST
+MPre	= 30; # CONS_MED_PRE
+CMAbv	= 31; # CONS_MOD_ABOVE
+CMBlw	= 32; # CONS_MOD_BELOW
+VAbv	= 33; # VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST
+VBlw	= 34; # VOWEL_BELOW / VOWEL_BELOW_POST
+VPst	= 35; # VOWEL_POST	UIPC = Right
+VPre	= 22; # VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST
+VMAbv	= 37; # VOWEL_MOD_ABOVE
+VMBlw	= 38; # VOWEL_MOD_BELOW
+VMPst	= 39; # VOWEL_MOD_POST
+VMPre	= 23; # VOWEL_MOD_PRE
+SMAbv	= 41; # SYM_MOD_ABOVE
+SMBlw	= 42; # SYM_MOD_BELOW
+
+
+consonant_modifiers = CMAbv* CMBlw* ((H B | SUB) VS? CMAbv? CMBlw*)*;
+medial_consonants = MPre? MAbv? MBlw? MPst?;
+dependent_vowels = VPre* VAbv* VBlw* VPst*;
+vowel_modifiers = VMPre* VMAbv* VMBlw* VMPst*;
+final_consonants = FAbv* FBlw* FPst* FM?;
+
+virama_terminated_cluster =
+	R? (B | GB | IV) VS?
+	consonant_modifiers
+	H
+;
+consonant_cluster =
+	R? (B | GB) VS?
+	consonant_modifiers
+	medial_consonants
+	dependent_vowels
+	vowel_modifiers
+	final_consonants
+;
+vowel_cluster =
+	R? (IV) VS?
+	consonant_modifiers
+	medial_consonants
+	vowel_modifiers
+	final_consonants
+;
+
+broken_cluster =
+	R?
+	consonant_modifiers
+	medial_consonants
+	dependent_vowels
+	vowel_modifiers
+	final_consonants
+;
+
+number_joiner_terminated_cluster = N VS? (HN N VS?)* H;
+numeral_cluster = N VS? (HN N VS?)*;
+symbol_cluster = S VS? SMAbv* SMBlw*;
+independent_cluster = (IND | O | Rsv | WJ) VS?;
+
+main := |*
+	independent_cluster			=> { found_syllable (independent_cluster); };
+	virama_terminated_cluster		=> { found_syllable (virama_terminated_cluster); };
+	consonant_cluster			=> { found_syllable (consonant_cluster); };
+	vowel_cluster				=> { found_syllable (vowel_cluster); };
+	number_joiner_terminated_cluster	=> { found_syllable (number_joiner_terminated_cluster); };
+	numeral_cluster				=> { found_syllable (numeral_cluster); };
+	symbol_cluster				=> { found_syllable (symbol_cluster); };
+	broken_cluster				=> { found_syllable (broken_cluster); };
+*|;
+
+
+}%%
+
+#define found_syllable(syllable_type) \
+  HB_STMT_START { \
+    if (0) fprintf (stderr, "syllable %d..%d %s\n", last, p+1, #syllable_type); \
+    for (unsigned int i = last; i < p+1; i++) \
+      info[i].syllable() = (syllable_serial << 4) | syllable_type; \
+    last = p+1; \
+    syllable_serial++; \
+    if (unlikely (syllable_serial == 16)) syllable_serial = 1; \
+  } HB_STMT_END
+
+static void
+find_syllables (hb_buffer_t *buffer)
+{
+  unsigned int p, pe, eof, ts HB_UNUSED, te HB_UNUSED, act HB_UNUSED;
+  int cs;
+  hb_glyph_info_t *info = buffer->info;
+  %%{
+    write init;
+    getkey info[p].use_category();
+  }%%
+
+  p = 0;
+  pe = eof = buffer->len;
+
+  unsigned int last = 0;
+  unsigned int syllable_serial = 1;
+  %%{
+    write exec;
+  }%%
+}
+
+#undef found_syllable
+
+#endif /* HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH */
--- a/src/hb-ot-shape-complex-use-private.hh
+++ b/src/hb-ot-shape-complex-use-private.hh
+/*
+ * Copyright © 2015  Mozilla Foundation.
+ * Copyright © 2015  Google, Inc.
+ *
+ *  This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Mozilla Author(s): Jonathan Kew
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_SHAPE_COMPLEX_USE_PRIVATE_HH
+#define HB_OT_SHAPE_COMPLEX_USE_PRIVATE_HH
+
+#include "hb-private.hh"
+
+
+#include "hb-ot-shape-complex-private.hh"
+
+
+#define USE_TABLE_ELEMENT_TYPE uint8_t
+
+/* Cateories used in the Universal Shaping Engine spec:
+ * https://www.microsoft.com/typography/OpenTypeDev/USE/intro.htm
+ */
+/* Note: This enum is duplicated in the -machine.rl source file.
+ * Not sure how to avoid duplication. */
+enum use_category_t {
+  USE_O		= 0,	/* OTHER */
+
+  USE_B		= 1,	/* BASE */
+  USE_IV	= 2,	/* BASE_VOWEL */
+  USE_IND	= 3,	/* BASE_IND */
+  USE_N		= 4,	/* BASE_NUM */
+  USE_GB	= 5,	/* BASE_OTHER */
+  USE_CGJ	= 6,	/* CGJ */
+//  USE_F		= 7,	/* CONS_FINAL */
+  USE_FM	= 8,	/* CONS_FINAL_MOD */
+//  USE_M		= 9,	/* CONS_MED */
+//  USE_CM	= 10,	/* CONS_MOD */
+  USE_SUB	= 11,	/* CONS_SUB */
+  USE_H		= 12,	/* HALANT */
+
+  USE_HN	= 13,	/* HALANT_NUM */
+  USE_ZWNJ	= 14,	/* Zero width non-joiner */
+  USE_ZWJ	= 15,	/* Zero width joiner */
+  USE_WJ	= 16,	/* Word joiner */
+  USE_Rsv	= 17,	/* Reserved characters */
+  USE_R		= 18,	/* REPHA */
+  USE_S		= 19,	/* SYM */
+//  USE_SM	= 20,	/* SYM_MOD */
+  USE_VS	= 21,	/* VARIATION_SELECTOR */
+//  USE_V	= 36,	/* VOWEL */
+//  USE_VM	= 40,	/* VOWEL_MOD */
+
+  USE_FAbv	= 24,	/* CONS_FINAL_ABOVE */
+  USE_FBlw	= 25,	/* CONS_FINAL_BELOW */
+  USE_FPst	= 26,	/* CONS_FINAL_POST */
+  USE_MAbv	= 27,	/* CONS_MED_ABOVE */
+  USE_MBlw	= 28,	/* CONS_MED_BELOW */
+  USE_MPst	= 29,	/* CONS_MED_POST */
+  USE_MPre	= 30,	/* CONS_MED_PRE */
+  USE_CMAbv	= 31,	/* CONS_MOD_ABOVE */
+  USE_CMBlw	= 32,	/* CONS_MOD_BELOW */
+  USE_VAbv	= 33,	/* VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST */
+  USE_VBlw	= 34,	/* VOWEL_BELOW / VOWEL_BELOW_POST */
+  USE_VPst	= 35,	/* VOWEL_POST	UIPC = Right */
+  USE_VPre	= 22,	/* VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST */
+  USE_VMAbv	= 37,	/* VOWEL_MOD_ABOVE */
+  USE_VMBlw	= 38,	/* VOWEL_MOD_BELOW */
+  USE_VMPst	= 39,	/* VOWEL_MOD_POST */
+  USE_VMPre	= 23,	/* VOWEL_MOD_PRE */
+  USE_SMAbv	= 41,	/* SYM_MOD_ABOVE */
+  USE_SMBlw	= 42	/* SYM_MOD_BELOW */
+};
+
+HB_INTERNAL USE_TABLE_ELEMENT_TYPE
+hb_use_get_categories (hb_codepoint_t u);
+
+#endif /* HB_OT_SHAPE_COMPLEX_USE_PRIVATE_HH */
--- a/src/hb-ot-shape-complex-use-table.cc
+++ b/src/hb-ot-shape-complex-use-table.cc
--- a/src/hb-ot-shape-complex-sea.cc
+++ b/src/hb-ot-shape-complex-sea.cc
--- a/src/hb-private.hh
+++ b/src/hb-private.hh
@@ -193,7 +193,8 @@ static inline unsigned int ARRAY_LENGTH (const Type (&)[n]) { return n; }
 #define _ASSERT_STATIC0(_line, _cond)	_ASSERT_STATIC1 (_line, (_cond))
 #define ASSERT_STATIC(_cond)		_ASSERT_STATIC0 (__LINE__, (_cond))

-#define ASSERT_STATIC_EXPR(_cond)((void) sizeof (char[(_cond) ? 1 : -1]))
+/* Note: C++ allows sizeof() of variable-lengh arrays.  So, if _cond is not
+ * constant, it still compiles (ouch!), but at least we'll get a -Wvla warning. */
 #define ASSERT_STATIC_EXPR_ZERO(_cond) (0 * sizeof (char[(_cond) ? 1 : -1]))

 #define _PASTE1(a,b) a##b
@@ -845,9 +846,11 @@ hb_in_ranges (T u, T lo1, T hi1, T lo2, T hi2, T lo3, T hi3)

 /* Useful for set-operations on small enums.
 * For example, for testing "x ∈ {x1, x2, x3}" use:
- * (FLAG(x) & (FLAG(x1) | FLAG(x2) | FLAG(x3)))
+ * (FLAG_SAFE(x) & (FLAG(x1) | FLAG(x2) | FLAG(x3)))
 */
-#define FLAG(x) (1<<(x))
+#define FLAG(x) (ASSERT_STATIC_EXPR_ZERO ((x) < 32) + (1U << (x)))
+#define FLAG_SAFE(x) (1U << (x))
+#define FLAG_UNSAFE(x) ((x) < 32 ? FLAG_SAFE(x) : 0)
 #define FLAG_RANGE(x,y) (ASSERT_STATIC_EXPR_ZERO ((x) < (y)) + FLAG(y+1) - FLAG(x))



--- a/src/hb-unicode-private.hh
+++ b/src/hb-unicode-private.hh
@@ -308,7 +308,7 @@ extern HB_INTERNAL const hb_unicode_funcs_t _hb_unicode_funcs_nil;
 /* Misc */

 #define HB_UNICODE_GENERAL_CATEGORY_IS_MARK(gen_cat) \
-	(FLAG (gen_cat) & \
+	(FLAG_SAFE (gen_cat) & \
 	 (FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | \
 	  FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | \
 	  FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))

--- a/test/shaping/texts/in-tree/MANIFEST
+++ b/test/shaping/texts/in-tree/MANIFEST
@@ -4,6 +4,6 @@ shaper-hangul
 shaper-hebrew
 shaper-indic
 shaper-myanmar
-shaper-sea
 shaper-thai
 shaper-tibetan
+shaper-use
--- a/test/shaping/texts/in-tree/shaper-sea/script-cham/MANIFEST
+++ b/test/shaping/texts/in-tree/shaper-sea/script-cham/MANIFEST
-misc
--- a/test/shaping/texts/in-tree/shaper-sea/script-new-tai-lue/MANIFEST
+++ b/test/shaping/texts/in-tree/shaper-sea/script-new-tai-lue/MANIFEST
-misc
--- a/test/shaping/texts/in-tree/shaper-sea/script-new-tai-lue/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-sea/script-new-tai-lue/misc/misc.txt
-ᦀᦷᧃᧈ
--- a/test/shaping/texts/in-tree/shaper-sea/script-tai-tham/MANIFEST
+++ b/test/shaping/texts/in-tree/shaper-sea/script-tai-tham/MANIFEST
-misc
--- a/test/shaping/texts/in-tree/shaper-sea/MANIFEST
+++ b/test/shaping/texts/in-tree/shaper-sea/MANIFEST
+script-batak
+script-buginese
 script-cham
-script-new-tai-lue
+script-kharoshti
 script-tai-tham
--- a/test/shaping/texts/in-tree/shaper-sea/script-cham/misc/MANIFEST
+++ b/test/shaping/texts/in-tree/shaper-sea/script-cham/misc/MANIFEST
--- a/test/shaping/texts/in-tree/shaper-use/script-batak/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-use/script-batak/misc.txt
+ᯂᯩ
+ᯄ᯦ᯩ
+ᯇᯪᯰ
+ᯓᯩᯰ
+ᯄᯮ
+ᯃᯮ
+ᯎᯮ
+ᯞᯮ
+ᯖᯪᯇ᯲
--- a/test/shaping/texts/in-tree/shaper-sea/script-new-tai-lue/misc/MANIFEST
+++ b/test/shaping/texts/in-tree/shaper-sea/script-new-tai-lue/misc/MANIFEST
--- a/test/shaping/texts/in-tree/shaper-use/script-buginese/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-use/script-buginese/misc.txt
+ᨒᨚᨈᨑ
+ᨔᨑ
+ᨅᨔ ᨈᨚ ᨅᨙᨀ
+ᨕᨒᨚ ᨆᨒᨗᨕᨘ ᨅᨛᨈᨘᨕᨊ
+ᨕᨗᨉᨚ ᨔᨘᨑᨛ
+ᨕᨗᨊ ᨔᨘᨑᨛ
+ᨕᨊ ᨔᨘᨑᨛ
+
+ᨊᨀᨚ	ᨕᨛᨃ	ᨈᨕᨘᨄᨔᨒ᨞	ᨕᨍ	ᨆᨘᨄᨈᨒᨒᨚᨓᨗ	ᨄᨌᨒᨆᨘ	ᨑᨗᨈᨚᨄᨔᨒᨕᨙ᨞
+ᨄᨔᨗᨈᨘᨍᨘᨓᨗᨆᨘᨈᨚᨓᨗᨔ	ᨕᨔᨒᨊ	ᨄᨌᨒᨆᨘ᨞	ᨕᨄ	ᨕᨗᨀᨚᨊᨈᨘ	ᨊᨁᨗᨒᨗ	ᨉᨙᨓᨈᨕᨙ᨞
+ᨊᨀᨚ	ᨅᨕᨗᨌᨘᨆᨘᨄᨗ	ᨕᨔᨒᨊ	ᨈᨕᨘᨓᨙ᨞	ᨆᨘᨄᨙᨑᨍᨕᨗᨔ	ᨄᨉᨈᨚᨓᨗ᨞
+ᨊᨀᨚ	ᨄᨔᨒᨕᨗ	ᨈᨕᨘᨓᨙ᨞	ᨕᨍ	ᨈᨗᨆᨘᨌᨒᨕᨗ	ᨑᨗᨔᨗᨈᨗᨊᨍᨊᨕᨙᨈᨚᨔ	ᨕᨔᨒᨊ᨞
+
+ᨕᨛᨛᨃ	ᨕᨛᨃ	ᨄ ᨙᨑ᨞	ᨕᨛᨃ	 ᨙᨔᨕᨘᨓ	ᨓᨛᨈᨘ᨞
+ᨕᨛᨃ	 ᨙᨔᨕᨘᨓ	ᨕᨑᨘ	ᨆᨀᨘᨋᨕᨗ	ᨑᨗ	ᨒᨘᨓᨘ᨞	ᨆᨔᨒ	ᨕᨘᨒᨗ᨞
+
+ᨄᨘᨑᨊᨗᨀᨚ	ᨆᨙᨋ?
+ᨉᨙᨄ
+
+ᨆᨙᨒᨚ ᨀ ᨌᨛᨙᨆ
+ᨔᨙᨉᨗ	
+ᨉᨘᨓ	
+ᨈᨛᨒᨘ	
+ᨕᨛᨄ	
+ᨒᨗᨆ	
+ᨕᨛᨊᨛ	
+ᨄᨗᨈᨘ	
+ᨕᨑᨘᨓ	
+ᨕᨙᨔᨑ	
+ᨔᨄᨘᨒᨚ	
+ᨉᨘᨓᨄᨘᨒᨚ	
+ᨈᨛᨒᨘᨄᨘᨒᨚ	
+ᨄᨈᨄᨘᨒᨚ	
+ᨒᨗᨆᨄᨘᨒᨚ	
+ᨕᨛᨊᨛᨄᨘᨒᨚᨊ	
+ᨄᨗᨈᨘᨄᨘᨒᨚ	
+ᨕᨑᨘᨓᨄᨘᨒᨚᨊ	
+ᨕᨙᨔᨑᨄᨘᨒᨚᨊ	
+ᨔᨗᨑᨈᨘ	
+ᨔᨗᨔᨛᨅᨘ	
+ᨔᨗᨒᨔ	
+ᨔᨗᨀᨚᨈᨗ	
+
+ᨅᨔ ᨕᨘᨁᨗ
+
+ᨅᨔ ᨆᨀᨔᨑ
+ᨅᨒ	
+ᨅᨚᨒᨚ	
+ᨅᨅ	
+ᨌᨗᨄᨘᨑᨘ	
+ᨉᨚᨕᨙ	
+ᨕᨗᨐᨚ	
+ᨒᨚᨄᨚ	
+ᨔᨒᨚ	
+ᨈ ᨅᨙᨙ	
+ᨈᨙᨊ	
+ᨀᨑᨕᨙ	
+ᨕᨄ ᨀᨑᨙᨅ?	
+ᨒᨀᨙᨀᨚ ᨆᨕᨙ?	
+ᨅᨒ	
+ᨅᨚᨈᨚ	
+ᨑᨈᨔ	
+ᨅᨈᨒ	
+ᨅᨗᨒ	
+ᨁᨙᨒᨙ ᨁᨙᨒᨙ	
+ᨀᨚᨀᨚ	
+ᨍᨑ	
+ᨅᨙᨅᨙ	
+ᨆᨚᨈᨙᨑᨙ	
+ᨂᨑᨙ	
--- a/test/shaping/texts/in-tree/shaper-use/script-cham/MANIFEST
+++ b/test/shaping/texts/in-tree/shaper-use/script-cham/MANIFEST
+misc.txt
--- a/test/shaping/texts/in-tree/shaper-sea/script-cham/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-sea/script-cham/misc/misc.txt
--- a/test/shaping/texts/in-tree/shaper-use/script-kharoshti/MANIFEST
+++ b/test/shaping/texts/in-tree/shaper-use/script-kharoshti/MANIFEST
+misc.txt
--- a/test/shaping/texts/in-tree/shaper-use/script-kharoshti/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-use/script-kharoshti/misc.txt
+𐨤𐨪𐨌𐨪𐨿𐨗𐨸𐨅𐨌𐨏
+𐨀𐨁
+𐨐𐨁
+𐨠𐨁
+𐨀𐨂
+𐨱𐨂
+𐨨𐨂
+𐨀𐨃
+𐨨𐨃
+𐨀𐨅
+𐨐𐨅
+𐨠𐨅
+𐨡𐨅
+𐨀𐨆
+𐨤𐨆
+𐨨𐨌
+𐨯𐨍
+𐨀𐨎
+𐨐𐨏
+𐨗𐨸
+𐨒𐨹
+𐨨𐨺
+𐨢𐨁𐨐𐨿
+𐨐𐨿𐨮
+𐨨𐨿𐨪
+𐨬𐨿𐨱
+𐨯𐨿𐨟
+𐨯𐨿𐨩
+𐨪𐨿𐨟
+𐨟𐨿𐨪
+𐨫𐨿𐨤
+𐨤𐨿𐨫
+𐨐𐨿𐨫
+𐨟𐨿𐨬
+𐨐𐨿𐨟
+𐨑𐨿𐨐𐨿𐨮
--- a/test/shaping/texts/in-tree/shaper-sea/script-tai-tham/misc/MANIFEST
+++ b/test/shaping/texts/in-tree/shaper-sea/script-tai-tham/misc/MANIFEST
--- a/test/shaping/texts/in-tree/shaper-sea/script-tai-tham/misc/misc.txt
+++ b/test/shaping/texts/in-tree/shaper-sea/script-tai-tham/misc/misc.txt
--- a/test/shaping/texts/in-tree/shaper-sea/script-tai-tham/misc/torture.txt
+++ b/test/shaping/texts/in-tree/shaper-sea/script-tai-tham/misc/torture.txt