提交 ec8e635e 编写于 作者: B Behdad Esfahbod

[ucd] Use custom encoding to shrink composition data

Saves another 2.5kb.

Part of https://github.com/harfbuzz/harfbuzz/issues/1652
上级 9c933aca
......@@ -38,13 +38,22 @@ assert all((v[0] >> 16) in (0,2) for v in dm1)
dm1_p0_array = ['0x%04Xu' % (v[0] & 0xFFFF) for v in dm1 if (v[0] >> 16) == 0]
dm1_p2_array = ['0x%04Xu' % (v[0] & 0xFFFF) for v in dm1 if (v[0] >> 16) == 2]
dm1_order = {v:i+1 for i,v in enumerate(dm1)}
dm2 = sorted((v, i) for i,v in dm.items() if len(v) == 2)
dm2 = [("HB_CODEPOINT_ENCODE3 (0x%04Xu, 0x%04Xu, 0x%04Xu)" %
(v+(i if i not in ce and not ccc[i] else 0,)), v)
for v,i in dm2]
dm2_array = [s for s,v in dm2]
dm2 = sorted((v+(i if i not in ce and not ccc[i] else 0,), v)
for i,v in dm.items() if len(v) == 2)
filt = lambda v: ((v[0] & 0xFFFFF800) == 0x0000 and
(v[1] & 0xFFFFFF80) == 0x0300 and
(v[2] & 0xFFF0C000) == 0x0000)
dm2_u32_array = [v for v in dm2 if filt(v[0])]
dm2_u64_array = [v for v in dm2 if not filt(v[0])]
assert dm2_u32_array + dm2_u64_array == dm2
dm2_u32_array = ["HB_CODEPOINT_ENCODE3_11_7_14 (0x%04Xu, 0x%04Xu, 0x%04Xu)" % v[0] for v in dm2_u32_array]
dm2_u64_array = ["HB_CODEPOINT_ENCODE3 (0x%04Xu, 0x%04Xu, 0x%04Xu)" % v[0] for v in dm2_u64_array]
l = 1 + len(dm1_p0_array) + len(dm1_p2_array)
dm2_order = {v[1]:i+l for i,v in enumerate(dm2)}
dm_order = {None: 0}
dm_order.update(dm1_order)
dm_order.update(dm2_order)
......@@ -96,7 +105,8 @@ code = packTab.Code('_hb_ucd')
sc_array, _ = code.addArray('hb_script_t', 'sc_map', sc_array)
dm1_p0_array, _ = code.addArray('uint16_t', 'dm1_p0_map', dm1_p0_array)
dm1_p2_array, _ = code.addArray('uint16_t', 'dm1_p2_map', dm1_p2_array)
dm2_array, _ = code.addArray('uint64_t', 'dm2_map', dm2_array)
dm2_u32_array, _ = code.addArray('uint32_t', 'dm2_u32_map', dm2_u32_array)
dm2_u64_array, _ = code.addArray('uint64_t', 'dm2_u64_map', dm2_u64_array)
code.print_c(linkage='static inline')
datasets = [
......
......@@ -41,6 +41,11 @@
#define HB_CODEPOINT_DECODE3_2(v) ((hb_codepoint_t) ((v) >> 21) & 0x1FFFFFu)
#define HB_CODEPOINT_DECODE3_3(v) ((hb_codepoint_t) (v) & 0x1FFFFFu)
/* Custom encoding used by hb-ucd. */
#define HB_CODEPOINT_ENCODE3_11_7_14(x,y,z) (((uint32_t) ((x) & 0x07FFu) << 21) | (((uint32_t) (y) & 0x007Fu) << 14) | (uint32_t) ((z) & 0x3FFFu))
#define HB_CODEPOINT_DECODE3_11_7_14_1(v) ((hb_codepoint_t) ((v) >> 21))
#define HB_CODEPOINT_DECODE3_11_7_14_2(v) ((hb_codepoint_t) (((v) >> 14) & 0x007Fu) | 0x0300)
#define HB_CODEPOINT_DECODE3_11_7_14_3(v) ((hb_codepoint_t) (v) & 0x3FFFu)
struct
{
......
此差异已折叠。
......@@ -114,6 +114,14 @@ _cmp_pair (const void *_key, const void *_item)
return a < b ? -1 : a > b ? +1 : 0;
}
static int
_cmp_pair_11_7_14 (const void *_key, const void *_item)
{
uint32_t& a = * (uint32_t*) _key;
uint32_t b = (* (uint32_t*) _item) & HB_CODEPOINT_ENCODE3_11_7_14(0x1FFFFFu, 0x1FFFFFu, 0);
return a < b ? -1 : a > b ? +1 : 0;
}
static hb_bool_t
hb_ucd_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
......@@ -122,16 +130,30 @@ hb_ucd_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
{
if (_hb_ucd_compose_hangul (a, b, ab)) return true;
uint64_t k = HB_CODEPOINT_ENCODE3 (a, b, 0);
uint64_t *v = (uint64_t*) hb_bsearch (&k, _hb_ucd_dm2_map,
ARRAY_LENGTH (_hb_ucd_dm2_map),
sizeof (*_hb_ucd_dm2_map),
_cmp_pair);
if (likely (!v)) return false;
hb_codepoint_t u = 0;
hb_codepoint_t u = HB_CODEPOINT_DECODE3_3 (*v);
if (unlikely (!u)) return false;
if ((a & 0xFFFFF800u) == 0x0000u && (b & 0xFFFFFF80) == 0x0300u)
{
uint32_t k = HB_CODEPOINT_ENCODE3_11_7_14 (a, b, 0);
uint32_t *v = (uint32_t*) hb_bsearch (&k, _hb_ucd_dm2_u32_map,
ARRAY_LENGTH (_hb_ucd_dm2_u32_map),
sizeof (*_hb_ucd_dm2_u32_map),
_cmp_pair_11_7_14);
if (likely (!v)) return false;
u = HB_CODEPOINT_DECODE3_11_7_14_3 (*v);
}
else
{
uint64_t k = HB_CODEPOINT_ENCODE3 (a, b, 0);
uint64_t *v = (uint64_t*) hb_bsearch (&k, _hb_ucd_dm2_u64_map,
ARRAY_LENGTH (_hb_ucd_dm2_u64_map),
sizeof (*_hb_ucd_dm2_u64_map),
_cmp_pair);
if (likely (!v)) return false;
u = HB_CODEPOINT_DECODE3_3 (*v);
}
if (unlikely (!u)) return false;
*ab = u;
return true;
}
......@@ -162,7 +184,16 @@ hb_ucd_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
}
i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map);
uint64_t v = _hb_ucd_dm2_map[i];
if (i < ARRAY_LENGTH (_hb_ucd_dm2_u32_map))
{
uint32_t v = _hb_ucd_dm2_u32_map[i];
*a = HB_CODEPOINT_DECODE3_11_7_14_1 (v);
*b = HB_CODEPOINT_DECODE3_11_7_14_2 (v);
return true;
}
i -= ARRAY_LENGTH (_hb_ucd_dm2_u32_map);
uint64_t v = _hb_ucd_dm2_u64_map[i];
*a = HB_CODEPOINT_DECODE3_1 (v);
*b = HB_CODEPOINT_DECODE3_2 (v);
return true;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册