提交 14d78411 编写于 作者: B Behdad Esfahbod

Update Arabic joining table to include Mandaic

Mandaic was added to Unicode 6.0, but the joining data was not updated.
Draft ArabicShaping.txt from 6.1 includes the joining data for Mandaic.
Use that.
上级 43bf2f7f
...@@ -11,11 +11,23 @@ for line in sys.stdin: ...@@ -11,11 +11,23 @@ for line in sys.stdin:
fields = [x.strip() for x in line.split(';')] fields = [x.strip() for x in line.split(';')]
u = int(fields[0], 16) u = int(fields[0], 16)
if u < 0x0600 or (u > 0x07FF and u != 0x200C and u != 0x200D): if u == 0x200C or u == 0x200D:
continue
if u < 0x0600:
raise Exception ("Ooops, unexpected unicode character: ", fields) raise Exception ("Ooops, unexpected unicode character: ", fields)
dic[u] = fields dic[u] = fields
print " /*" v = dic.keys()
v.sort()
min_u, max_u = v[0], v[-1]
occupancy = len(v) * 100 / (max_u - min_u + 1)
# Maintain at least 40% occupancy in the table */
if occupancy < 40:
raise Exception ("Table too sparse, please investigate: ", occupancy)
print "/* == Start of generated table == */"
print "/*"
print " * The following table is generated by running:" print " * The following table is generated by running:"
print " *" print " *"
print " * ./gen-arabic-joining-table.py < ArabicShaping.txt" print " * ./gen-arabic-joining-table.py < ArabicShaping.txt"
...@@ -25,8 +37,13 @@ print " *" ...@@ -25,8 +37,13 @@ print " *"
for line in header: for line in header:
print " * %s" % (line.strip()) print " * %s" % (line.strip())
print " */" print " */"
print " /* == Start of generated table == */"
for i in range(0x0600, 0x0800): print "#define JOINING_TABLE_FIRST 0x%04x" % min_u
print "#define JOINING_TABLE_LAST 0x%04x" % max_u
print "static const uint8_t joining_table[JOINING_TABLE_LAST-JOINING_TABLE_FIRST+2] ="
print "{"
for i in range(min_u, max_u + 1):
if i not in dic: if i not in dic:
print " JOINING_TYPE_X, /* %04X */" % i print " JOINING_TYPE_X, /* %04X */" % i
else: else:
...@@ -36,4 +53,6 @@ for i in range(0x0600, 0x0800): ...@@ -36,4 +53,6 @@ for i in range(0x0600, 0x0800):
else: else:
value = "JOINING_TYPE_" + entry[2] value = "JOINING_TYPE_" + entry[2]
print " %s, /* %s */" % (value, '; '.join(entry)) print " %s, /* %s */" % (value, '; '.join(entry))
print " /* == End of generated table == */" print " JOINING_TYPE_X /* dummy */"
print "};"
print "/* == End of generated table == */"
...@@ -56,23 +56,21 @@ enum { ...@@ -56,23 +56,21 @@ enum {
*/ */
/* == Start of generated table == */
/* /*
* Main joining-type table, covering U+0600..U+07FF.
* Includes Arabic, Syriac, and N'ko.
*/
static const uint8_t arabic_syriac_nko_joining_types[0x0800 - 0x0600 + 1] =
{
/*
* The following table is generated by running: * The following table is generated by running:
* *
* ./gen-arabic-joining-table.py < ArabicShaping.txt * ./gen-arabic-joining-table.py < ArabicShaping.txt
* *
* on the ArabicShaping.txt file with the header: * on the ArabicShaping.txt file with the header:
* *
* # ArabicShaping-6.0.0.txt * # ArabicShaping-6.1.0.txt
* # Date: 2010-04-30, 13:47:00 PDT [KW] * # Date: 2010-11-09, 12:10:00 PST [KW]
*/ */
/* == Start of generated table == */ #define JOINING_TABLE_FIRST 0x0600
#define JOINING_TABLE_LAST 0x0858
static const uint8_t joining_table[JOINING_TABLE_LAST-JOINING_TABLE_FIRST+2] =
{
JOINING_TYPE_U, /* 0600; ARABIC NUMBER SIGN; U; No_Joining_Group */ JOINING_TYPE_U, /* 0600; ARABIC NUMBER SIGN; U; No_Joining_Group */
JOINING_TYPE_U, /* 0601; ARABIC SIGN SANAH; U; No_Joining_Group */ JOINING_TYPE_U, /* 0601; ARABIC SIGN SANAH; U; No_Joining_Group */
JOINING_TYPE_U, /* 0602; ARABIC FOOTNOTE MARKER; U; No_Joining_Group */ JOINING_TYPE_U, /* 0602; ARABIC FOOTNOTE MARKER; U; No_Joining_Group */
...@@ -585,16 +583,105 @@ static const uint8_t arabic_syriac_nko_joining_types[0x0800 - 0x0600 + 1] = ...@@ -585,16 +583,105 @@ static const uint8_t arabic_syriac_nko_joining_types[0x0800 - 0x0600 + 1] =
JOINING_TYPE_X, /* 07FD */ JOINING_TYPE_X, /* 07FD */
JOINING_TYPE_X, /* 07FE */ JOINING_TYPE_X, /* 07FE */
JOINING_TYPE_X, /* 07FF */ JOINING_TYPE_X, /* 07FF */
/* == End of generated table == */ JOINING_TYPE_X, /* 0800 */
JOINING_TYPE_X JOINING_TYPE_X, /* 0801 */
JOINING_TYPE_X, /* 0802 */
JOINING_TYPE_X, /* 0803 */
JOINING_TYPE_X, /* 0804 */
JOINING_TYPE_X, /* 0805 */
JOINING_TYPE_X, /* 0806 */
JOINING_TYPE_X, /* 0807 */
JOINING_TYPE_X, /* 0808 */
JOINING_TYPE_X, /* 0809 */
JOINING_TYPE_X, /* 080A */
JOINING_TYPE_X, /* 080B */
JOINING_TYPE_X, /* 080C */
JOINING_TYPE_X, /* 080D */
JOINING_TYPE_X, /* 080E */
JOINING_TYPE_X, /* 080F */
JOINING_TYPE_X, /* 0810 */
JOINING_TYPE_X, /* 0811 */
JOINING_TYPE_X, /* 0812 */
JOINING_TYPE_X, /* 0813 */
JOINING_TYPE_X, /* 0814 */
JOINING_TYPE_X, /* 0815 */
JOINING_TYPE_X, /* 0816 */
JOINING_TYPE_X, /* 0817 */
JOINING_TYPE_X, /* 0818 */
JOINING_TYPE_X, /* 0819 */
JOINING_TYPE_X, /* 081A */
JOINING_TYPE_X, /* 081B */
JOINING_TYPE_X, /* 081C */
JOINING_TYPE_X, /* 081D */
JOINING_TYPE_X, /* 081E */
JOINING_TYPE_X, /* 081F */
JOINING_TYPE_X, /* 0820 */
JOINING_TYPE_X, /* 0821 */
JOINING_TYPE_X, /* 0822 */
JOINING_TYPE_X, /* 0823 */
JOINING_TYPE_X, /* 0824 */
JOINING_TYPE_X, /* 0825 */
JOINING_TYPE_X, /* 0826 */
JOINING_TYPE_X, /* 0827 */
JOINING_TYPE_X, /* 0828 */
JOINING_TYPE_X, /* 0829 */
JOINING_TYPE_X, /* 082A */
JOINING_TYPE_X, /* 082B */
JOINING_TYPE_X, /* 082C */
JOINING_TYPE_X, /* 082D */
JOINING_TYPE_X, /* 082E */
JOINING_TYPE_X, /* 082F */
JOINING_TYPE_X, /* 0830 */
JOINING_TYPE_X, /* 0831 */
JOINING_TYPE_X, /* 0832 */
JOINING_TYPE_X, /* 0833 */
JOINING_TYPE_X, /* 0834 */
JOINING_TYPE_X, /* 0835 */
JOINING_TYPE_X, /* 0836 */
JOINING_TYPE_X, /* 0837 */
JOINING_TYPE_X, /* 0838 */
JOINING_TYPE_X, /* 0839 */
JOINING_TYPE_X, /* 083A */
JOINING_TYPE_X, /* 083B */
JOINING_TYPE_X, /* 083C */
JOINING_TYPE_X, /* 083D */
JOINING_TYPE_X, /* 083E */
JOINING_TYPE_X, /* 083F */
JOINING_TYPE_R, /* 0840; MANDAIC HALQA; R; No_Joining_Group */
JOINING_TYPE_D, /* 0841; MANDAIC AB; D; No_Joining_Group */
JOINING_TYPE_D, /* 0842; MANDAIC AG; D; No_Joining_Group */
JOINING_TYPE_D, /* 0843; MANDAIC AD; D; No_Joining_Group */
JOINING_TYPE_D, /* 0844; MANDAIC AH; D; No_Joining_Group */
JOINING_TYPE_D, /* 0845; MANDAIC USHENNA; D; No_Joining_Group */
JOINING_TYPE_R, /* 0846; MANDAIC AZ; R; No_Joining_Group */
JOINING_TYPE_D, /* 0847; MANDAIC IT; D; No_Joining_Group */
JOINING_TYPE_D, /* 0848; MANDAIC ATT; D; No_Joining_Group */
JOINING_TYPE_R, /* 0849; MANDAIC AKSA; R; No_Joining_Group */
JOINING_TYPE_D, /* 084A; MANDAIC AK; D; No_Joining_Group */
JOINING_TYPE_D, /* 084B; MANDAIC AL; D; No_Joining_Group */
JOINING_TYPE_D, /* 084C; MANDAIC AM; D; No_Joining_Group */
JOINING_TYPE_D, /* 084D; MANDAIC AN; D; No_Joining_Group */
JOINING_TYPE_D, /* 084E; MANDAIC AS; D; No_Joining_Group */
JOINING_TYPE_R, /* 084F; MANDAIC IN; R; No_Joining_Group */
JOINING_TYPE_D, /* 0850; MANDAIC AP; D; No_Joining_Group */
JOINING_TYPE_D, /* 0851; MANDAIC ASZ; D; No_Joining_Group */
JOINING_TYPE_D, /* 0852; MANDAIC AQ; D; No_Joining_Group */
JOINING_TYPE_D, /* 0853; MANDAIC AR; D; No_Joining_Group */
JOINING_TYPE_R, /* 0854; MANDAIC ASH; R; No_Joining_Group */
JOINING_TYPE_D, /* 0855; MANDAIC AT; D; No_Joining_Group */
JOINING_TYPE_U, /* 0856; MANDAIC DUSHENNA; U; No_Joining_Group */
JOINING_TYPE_U, /* 0857; MANDAIC KAD; U; No_Joining_Group */
JOINING_TYPE_U, /* 0858; MANDAIC AIN; U; No_Joining_Group */
JOINING_TYPE_X /* dummy */
}; };
/* == End of generated table == */
static unsigned int get_joining_type (hb_codepoint_t u, hb_category_t gen_cat) static unsigned int get_joining_type (hb_codepoint_t u, hb_category_t gen_cat)
{ {
/* TODO Macroize the magic bit operations */ /* TODO Macroize the magic bit operations */
if (likely ((u & ~(0x0600^0x07FF)) == 0x0600)) { if (likely (JOINING_TABLE_FIRST <= u && u <= JOINING_TABLE_LAST)) {
unsigned int j_type = arabic_syriac_nko_joining_types[u - 0x0600]; unsigned int j_type = joining_table[u - JOINING_TABLE_FIRST];
if (likely (j_type != JOINING_TYPE_X)) if (likely (j_type != JOINING_TYPE_X))
return j_type; return j_type;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册