From c4641723fbf6532b2e80a662e15573b31276bc73 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Thu, 7 Jul 2011 23:47:19 -0400 Subject: [PATCH] [API] Add compose() and decompose() unicode funcs, rename other ones Add compose() and decompose() unicode funcs. These implement pair-wise canonical composition/decomposition. The glib/icu implementations are lacking for now. We are adding API for this to glib, but I cannot find any useful API in ICU. May end of implementing these in-house. Changed all unicode_funcs callback names to remove the "_get" part. Eg, hb_unicode_get_script_func_t is now hb_unicode_script_func_t, and hb_unicode_get_script() is hb_unicode_script() now. --- TODO | 4 +- src/hb-glib.cc | 6 ++- src/hb-icu.cc | 6 ++- src/hb-ot-shape.cc | 6 +-- src/hb-shape.cc | 2 +- src/hb-unicode-private.hh | 8 ++-- src/hb-unicode.cc | 85 ++++++++++++++++++++++++++++----------- src/hb-unicode.h | 70 +++++++++++++++++++++++--------- test/test-unicode.c | 21 +++++----- 9 files changed, 146 insertions(+), 62 deletions(-) diff --git a/TODO b/TODO index c5fe068b..e15e295f 100644 --- a/TODO +++ b/TODO @@ -68,7 +68,9 @@ Tests to write: - ot-layout enumeration API (needs font) -- Finish test-shape.c +- Finish test-shape.c, grep for TODO + +- Finish test-unicode.c, grep for TODO Optimizations: diff --git a/src/hb-glib.cc b/src/hb-glib.cc index 109b9ba7..de909729 100644 --- a/src/hb-glib.cc +++ b/src/hb-glib.cc @@ -231,7 +231,11 @@ hb_unicode_funcs_t _hb_glib_unicode_funcs = { hb_glib_get_eastasian_width, hb_glib_get_general_category, hb_glib_get_mirroring, - hb_glib_get_script + hb_glib_get_script, + /* TODO + hb_glib_compose, + hb_glib_decompose, + */ } }; diff --git a/src/hb-icu.cc b/src/hb-icu.cc index 1e0134cb..4797cc5b 100644 --- a/src/hb-icu.cc +++ b/src/hb-icu.cc @@ -174,7 +174,11 @@ hb_unicode_funcs_t _hb_icu_unicode_funcs = { hb_icu_get_eastasian_width, hb_icu_get_general_category, hb_icu_get_mirroring, - hb_icu_get_script + hb_icu_get_script, + /* TODO + hb_icu_compose, + hb_icu_decompose, + */ } }; diff --git a/src/hb-ot-shape.cc b/src/hb-ot-shape.cc index 248f2bd4..8378e813 100644 --- a/src/hb-ot-shape.cc +++ b/src/hb-ot-shape.cc @@ -197,8 +197,8 @@ hb_set_unicode_props (hb_ot_shape_context_t *c) unsigned int count = c->buffer->len; for (unsigned int i = 1; i < count; i++) { - info[i].general_category() = hb_unicode_get_general_category (unicode, info[i].codepoint); - info[i].combining_class() = hb_unicode_get_combining_class (unicode, info[i].codepoint); + info[i].general_category() = hb_unicode_general_category (unicode, info[i].codepoint); + info[i].combining_class() = hb_unicode_combining_class (unicode, info[i].codepoint); } } @@ -252,7 +252,7 @@ hb_mirror_chars (hb_ot_shape_context_t *c) unsigned int count = c->buffer->len; for (unsigned int i = 0; i < count; i++) { - hb_codepoint_t codepoint = hb_unicode_get_mirroring (unicode, c->buffer->info[i].codepoint); + hb_codepoint_t codepoint = hb_unicode_mirroring (unicode, c->buffer->info[i].codepoint); if (likely (codepoint == c->buffer->info[i].codepoint)) c->buffer->info[i].mask |= rtlm_mask; /* XXX this should be moved to before setting user-feature masks */ else diff --git a/src/hb-shape.cc b/src/hb-shape.cc index 5db6cfe0..1ff830a7 100644 --- a/src/hb-shape.cc +++ b/src/hb-shape.cc @@ -63,7 +63,7 @@ hb_shape (hb_font_t *font, hb_unicode_funcs_t *unicode = buffer->unicode; unsigned int count = buffer->len; for (unsigned int i = 0; i < count; i++) { - hb_script_t script = hb_unicode_get_script (unicode, buffer->info[i].codepoint); + hb_script_t script = hb_unicode_script (unicode, buffer->info[i].codepoint); if (likely (script != HB_SCRIPT_COMMON && script != HB_SCRIPT_INHERITED && script != HB_SCRIPT_UNKNOWN)) { diff --git a/src/hb-unicode-private.hh b/src/hb-unicode-private.hh index 55b94a75..2b0ec99d 100644 --- a/src/hb-unicode-private.hh +++ b/src/hb-unicode-private.hh @@ -49,6 +49,8 @@ HB_BEGIN_DECLS HB_UNICODE_FUNC_IMPLEMENT (general_category) \ HB_UNICODE_FUNC_IMPLEMENT (mirroring) \ HB_UNICODE_FUNC_IMPLEMENT (script) \ + HB_UNICODE_FUNC_IMPLEMENT (compose) \ + HB_UNICODE_FUNC_IMPLEMENT (decompose) \ /* ^--- Add new callbacks here */ /* Simple callbacks are those taking a hb_codepoint_t and returning a hb_codepoint_t */ @@ -67,13 +69,13 @@ struct _hb_unicode_funcs_t { bool immutable; - /* Don't access these directly. Call hb_unicode_get_*() instead. */ + /* Don't access these directly. Call hb_unicode_*() instead. */ struct { -#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_get_##name##_func_t name; +#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_func_t name; HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS #undef HB_UNICODE_FUNC_IMPLEMENT - } get; + } func; struct { #define HB_UNICODE_FUNC_IMPLEMENT(name) void *name; diff --git a/src/hb-unicode.cc b/src/hb-unicode.cc index ba0004b4..e2043c22 100644 --- a/src/hb-unicode.cc +++ b/src/hb-unicode.cc @@ -40,45 +40,67 @@ HB_BEGIN_DECLS */ static unsigned int -hb_unicode_get_combining_class_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, - hb_codepoint_t unicode HB_UNUSED, - void *user_data HB_UNUSED) +hb_unicode_combining_class_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t unicode HB_UNUSED, + void *user_data HB_UNUSED) { return 0; } static unsigned int -hb_unicode_get_eastasian_width_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, - hb_codepoint_t unicode HB_UNUSED, - void *user_data HB_UNUSED) +hb_unicode_eastasian_width_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t unicode HB_UNUSED, + void *user_data HB_UNUSED) { return 1; } static hb_unicode_general_category_t -hb_unicode_get_general_category_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, - hb_codepoint_t unicode HB_UNUSED, - void *user_data HB_UNUSED) +hb_unicode_general_category_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t unicode HB_UNUSED, + void *user_data HB_UNUSED) { return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER; } static hb_codepoint_t -hb_unicode_get_mirroring_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, - hb_codepoint_t unicode HB_UNUSED, - void *user_data HB_UNUSED) +hb_unicode_mirroring_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t unicode HB_UNUSED, + void *user_data HB_UNUSED) { return unicode; } static hb_script_t -hb_unicode_get_script_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, - hb_codepoint_t unicode HB_UNUSED, - void *user_data HB_UNUSED) +hb_unicode_script_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t unicode HB_UNUSED, + void *user_data HB_UNUSED) { return HB_SCRIPT_UNKNOWN; } +static hb_bool_t +hb_unicode_compose_nil (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t a HB_UNUSED, + hb_codepoint_t b HB_UNUSED, + hb_codepoint_t *ab HB_UNUSED, + void *user_data HB_UNUSED) +{ + /* TODO handle Hangul jamo here? */ + return FALSE; +} + +static hb_bool_t +hb_unicode_decompose_nil (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t ab HB_UNUSED, + hb_codepoint_t *a HB_UNUSED, + hb_codepoint_t *b HB_UNUSED, + void *user_data HB_UNUSED) +{ + /* TODO handle Hangul jamo here? */ + return FALSE; +} + hb_unicode_funcs_t _hb_unicode_funcs_nil = { HB_OBJECT_HEADER_STATIC, @@ -86,7 +108,7 @@ hb_unicode_funcs_t _hb_unicode_funcs_nil = { NULL, /* parent */ TRUE, /* immutable */ { -#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_get_##name##_nil, +#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_nil, HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS #undef HB_UNICODE_FUNC_IMPLEMENT } @@ -113,7 +135,7 @@ hb_unicode_funcs_create (hb_unicode_funcs_t *parent) hb_unicode_funcs_make_immutable (parent); ufuncs->parent = hb_unicode_funcs_reference (parent); - ufuncs->get = parent->get; + ufuncs->func = parent->func; /* We can safely copy user_data from parent since we hold a reference * onto it and it's immutable. We should not copy the destroy notifiers @@ -193,7 +215,7 @@ hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs) \ void \ hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t *ufuncs, \ - hb_unicode_get_##name##_func_t func, \ + hb_unicode_##name##_func_t func, \ void *user_data, \ hb_destroy_func_t destroy) \ { \ @@ -204,11 +226,11 @@ hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t *ufuncs, \ ufuncs->destroy.name (ufuncs->user_data.name); \ \ if (func) { \ - ufuncs->get.name = func; \ + ufuncs->func.name = func; \ ufuncs->user_data.name = user_data; \ ufuncs->destroy.name = destroy; \ } else { \ - ufuncs->get.name = ufuncs->parent->get.name; \ + ufuncs->func.name = ufuncs->parent->func.name; \ ufuncs->user_data.name = ufuncs->parent->user_data.name; \ ufuncs->destroy.name = NULL; \ } \ @@ -221,13 +243,30 @@ hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t *ufuncs, \ #define HB_UNICODE_FUNC_IMPLEMENT(return_type, name) \ \ return_type \ -hb_unicode_get_##name (hb_unicode_funcs_t *ufuncs, \ - hb_codepoint_t unicode) \ +hb_unicode_##name (hb_unicode_funcs_t *ufuncs, \ + hb_codepoint_t unicode) \ { \ - return ufuncs->get.name (ufuncs, unicode, ufuncs->user_data.name); \ + return ufuncs->func.name (ufuncs, unicode, ufuncs->user_data.name); \ } HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE #undef HB_UNICODE_FUNC_IMPLEMENT +hb_bool_t +hb_unicode_compose (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t a, + hb_codepoint_t b, + hb_codepoint_t *ab) +{ + return ufuncs->func.compose (ufuncs, a, b, ab, ufuncs->user_data.compose); +} + +hb_bool_t +hb_unicode_decompose (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t ab, + hb_codepoint_t *a, + hb_codepoint_t *b) +{ + return ufuncs->func.decompose (ufuncs, ab, a, b, ufuncs->user_data.decompose); +} HB_END_DECLS diff --git a/src/hb-unicode.h b/src/hb-unicode.h index e7a20056..9aa97a6b 100644 --- a/src/hb-unicode.h +++ b/src/hb-unicode.h @@ -90,73 +90,103 @@ hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs); /* typedefs */ -typedef unsigned int (*hb_unicode_get_combining_class_func_t) (hb_unicode_funcs_t *ufuncs, +typedef unsigned int (*hb_unicode_combining_class_func_t) (hb_unicode_funcs_t *ufuncs, hb_codepoint_t unicode, void *user_data); -typedef unsigned int (*hb_unicode_get_eastasian_width_func_t) (hb_unicode_funcs_t *ufuncs, +typedef unsigned int (*hb_unicode_eastasian_width_func_t) (hb_unicode_funcs_t *ufuncs, hb_codepoint_t unicode, void *user_data); -typedef hb_unicode_general_category_t (*hb_unicode_get_general_category_func_t) (hb_unicode_funcs_t *ufuncs, +typedef hb_unicode_general_category_t (*hb_unicode_general_category_func_t) (hb_unicode_funcs_t *ufuncs, hb_codepoint_t unicode, void *user_data); -typedef hb_codepoint_t (*hb_unicode_get_mirroring_func_t) (hb_unicode_funcs_t *ufuncs, +typedef hb_codepoint_t (*hb_unicode_mirroring_func_t) (hb_unicode_funcs_t *ufuncs, hb_codepoint_t unicode, void *user_data); -typedef hb_script_t (*hb_unicode_get_script_func_t) (hb_unicode_funcs_t *ufuncs, +typedef hb_script_t (*hb_unicode_script_func_t) (hb_unicode_funcs_t *ufuncs, hb_codepoint_t unicode, void *user_data); +typedef hb_bool_t (*hb_unicode_compose_func_t) (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t a, + hb_codepoint_t b, + hb_codepoint_t *ab, + void *user_data); +typedef hb_bool_t (*hb_unicode_decompose_func_t) (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t ab, + hb_codepoint_t *a, + hb_codepoint_t *b, + void *user_data); + /* setters */ void hb_unicode_funcs_set_combining_class_func (hb_unicode_funcs_t *ufuncs, - hb_unicode_get_combining_class_func_t combining_class_func, + hb_unicode_combining_class_func_t combining_class_func, void *user_data, hb_destroy_func_t destroy); void hb_unicode_funcs_set_eastasian_width_func (hb_unicode_funcs_t *ufuncs, - hb_unicode_get_eastasian_width_func_t eastasian_width_func, + hb_unicode_eastasian_width_func_t eastasian_width_func, void *user_data, hb_destroy_func_t destroy); void hb_unicode_funcs_set_general_category_func (hb_unicode_funcs_t *ufuncs, - hb_unicode_get_general_category_func_t general_category_func, + hb_unicode_general_category_func_t general_category_func, void *user_data, hb_destroy_func_t destroy); void hb_unicode_funcs_set_mirroring_func (hb_unicode_funcs_t *ufuncs, - hb_unicode_get_mirroring_func_t mirroring_func, + hb_unicode_mirroring_func_t mirroring_func, void *user_data, hb_destroy_func_t destroy); void hb_unicode_funcs_set_script_func (hb_unicode_funcs_t *ufuncs, - hb_unicode_get_script_func_t script_func, + hb_unicode_script_func_t script_func, void *user_data, hb_destroy_func_t destroy); +void +hb_unicode_funcs_set_compose_func (hb_unicode_funcs_t *ufuncs, + hb_unicode_compose_func_t compose_func, + void *user_data, hb_destroy_func_t destroy); + +void +hb_unicode_funcs_set_decompose_func (hb_unicode_funcs_t *ufuncs, + hb_unicode_decompose_func_t decompose_func, + void *user_data, hb_destroy_func_t destroy); /* accessors */ unsigned int -hb_unicode_get_combining_class (hb_unicode_funcs_t *ufuncs, - hb_codepoint_t unicode); +hb_unicode_combining_class (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t unicode); unsigned int -hb_unicode_get_eastasian_width (hb_unicode_funcs_t *ufuncs, - hb_codepoint_t unicode); +hb_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t unicode); hb_unicode_general_category_t -hb_unicode_get_general_category (hb_unicode_funcs_t *ufuncs, - hb_codepoint_t unicode); +hb_unicode_general_category (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t unicode); hb_codepoint_t -hb_unicode_get_mirroring (hb_unicode_funcs_t *ufuncs, - hb_codepoint_t unicode); +hb_unicode_mirroring (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t unicode); hb_script_t -hb_unicode_get_script (hb_unicode_funcs_t *ufuncs, - hb_codepoint_t unicode); +hb_unicode_script (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t unicode); +hb_bool_t +hb_unicode_compose (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t a, + hb_codepoint_t b, + hb_codepoint_t *ab); +hb_bool_t +hb_unicode_decompose (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t ab, + hb_codepoint_t *a, + hb_codepoint_t *b); HB_END_DECLS diff --git a/test/test-unicode.c b/test/test-unicode.c index c84ba863..a691cb4d 100644 --- a/test/test-unicode.c +++ b/test/test-unicode.c @@ -93,7 +93,7 @@ a_is_for_arabic_get_script (hb_unicode_funcs_t *ufuncs, } else { hb_unicode_funcs_t *parent = hb_unicode_funcs_get_parent (ufuncs); - return hb_unicode_get_script (parent, codepoint); + return hb_unicode_script (parent, codepoint); } } @@ -457,7 +457,7 @@ typedef struct { { \ #name, \ (func_setter_func_t) hb_unicode_funcs_set_##name##_func, \ - (getter_func_t) hb_unicode_get_##name, \ + (getter_func_t) hb_unicode_##name, \ name##_tests, \ G_N_ELEMENTS (name##_tests), \ name##_tests_more, \ @@ -667,8 +667,8 @@ test_unicode_subclassing_nil (data_fixture_t *f, gconstpointer user_data) hb_unicode_funcs_set_script_func (aa, a_is_for_arabic_get_script, &f->data[1], free_up); - g_assert_cmphex (hb_unicode_get_script (aa, 'a'), ==, HB_SCRIPT_ARABIC); - g_assert_cmphex (hb_unicode_get_script (aa, 'b'), ==, HB_SCRIPT_UNKNOWN); + g_assert_cmphex (hb_unicode_script (aa, 'a'), ==, HB_SCRIPT_ARABIC); + g_assert_cmphex (hb_unicode_script (aa, 'b'), ==, HB_SCRIPT_UNKNOWN); g_assert (!f->data[0].freed && !f->data[1].freed); hb_unicode_funcs_destroy (aa); @@ -686,8 +686,8 @@ test_unicode_subclassing_default (data_fixture_t *f, gconstpointer user_data) hb_unicode_funcs_set_script_func (aa, a_is_for_arabic_get_script, &f->data[1], free_up); - g_assert_cmphex (hb_unicode_get_script (aa, 'a'), ==, HB_SCRIPT_ARABIC); - g_assert_cmphex (hb_unicode_get_script (aa, 'b'), ==, HB_SCRIPT_LATIN); + g_assert_cmphex (hb_unicode_script (aa, 'a'), ==, HB_SCRIPT_ARABIC); + g_assert_cmphex (hb_unicode_script (aa, 'b'), ==, HB_SCRIPT_LATIN); g_assert (!f->data[0].freed && !f->data[1].freed); hb_unicode_funcs_destroy (aa); @@ -714,9 +714,9 @@ test_unicode_subclassing_deep (data_fixture_t *f, gconstpointer user_data) hb_unicode_funcs_set_script_func (aa, a_is_for_arabic_get_script, &f->data[1], free_up); - g_assert_cmphex (hb_unicode_get_script (aa, 'a'), ==, HB_SCRIPT_ARABIC); - g_assert_cmphex (hb_unicode_get_script (aa, 'b'), ==, HB_SCRIPT_LATIN); - g_assert_cmphex (hb_unicode_get_script (aa, '0'), ==, HB_SCRIPT_UNKNOWN); + g_assert_cmphex (hb_unicode_script (aa, 'a'), ==, HB_SCRIPT_ARABIC); + g_assert_cmphex (hb_unicode_script (aa, 'b'), ==, HB_SCRIPT_LATIN); + g_assert_cmphex (hb_unicode_script (aa, '0'), ==, HB_SCRIPT_UNKNOWN); g_assert (!f->data[0].freed && !f->data[1].freed); hb_unicode_funcs_destroy (aa); @@ -779,6 +779,9 @@ test_unicode_script_roundtrip (gconstpointer user_data) } +/* TODO test compose() and decompose() */ + + int main (int argc, char **argv) { -- GitLab